xcb/
lat1_str.rs

1use std::borrow::Cow;
2use std::fmt;
3use std::str;
4
5/// Error that can produce Latin-1 string operations
6#[derive(Debug, Copy, Clone)]
7pub enum Lat1Error {
8    /// Some non-ASCII characters were encountered.
9    /// This error is generated when attempting to borrow
10    /// a latin-1 string out of a UTF-8 string.
11    /// For such borrow, only the ASCII character set is allowed.
12    /// See [Lat1Str::try_from_ascii].
13    NonAscii,
14}
15
16/// A slice to a Latin-1 (aka. ISO 8859-1) string.
17///
18/// It is usually seen in its borrowed form, `&Lat1Str`.
19/// Lat1Str contains a slice of bytes and is by definition always
20/// valid Latin-1.
21///
22/// This type is useful for XCB because strings in the X protocol are
23/// expected to be Latin-1 encoded.
24/// Although the X strings are Latin-1, in reality ASCII can be
25/// expected without too much risk, hence all the ASCII related functions.
26///
27/// This does not account for strings passed as raw bytes
28/// to [x::ChangeProperty](crate::x::ChangeProperty) (e.g. to set a window title).
29/// These strings are passed as-is by the X server to the window compositor and
30/// encoding is implied by the property itself
31/// (e.g. UTF-8 for `_NET_WM_NAME` aka. window title).
32pub struct Lat1Str {
33    data: [u8],
34}
35
36impl Lat1Str {
37    /// Returns a reference to a Lat1Str that borrows the passed bytes
38    pub fn from_bytes(bytes: &[u8]) -> &Self {
39        unsafe { &*(bytes as *const [u8] as *const Self) }
40    }
41
42    /// Returns a reference to a `Lat1Str` that borrows the passed string bytes
43    /// only if `str` is pure ASCII.
44    /// Otherwise, a `Lat1Error::NonAscii` is returned.
45    pub fn try_from_ascii(str: &str) -> Result<&Self, Lat1Error> {
46        if str.is_ascii() {
47            Ok(Self::from_bytes(str.as_bytes()))
48        } else {
49            Err(Lat1Error::NonAscii)
50        }
51    }
52
53    /// Returns a reference to a `Lat1Str` that borrows the passed string bytes
54    /// only if `str` is pure ASCII.
55    ///
56    /// # Panics
57    /// This function panics if `str` contains non-ASCII chars.
58    pub fn from_ascii(str: &str) -> &Self {
59        Self::try_from_ascii(str).unwrap()
60    }
61
62    /// Returns a reference to a `Lat1Str` that borrows the passed string bytes.
63    ///
64    /// # Safety
65    /// If `str` contains non-ASCII characters, the returned string will not correspond
66    /// to the passed string (the latin-1 will contain utf-8 encoding).
67    pub unsafe fn from_ascii_unchecked(str: &str) -> &Self {
68        Self::from_bytes(str.as_bytes())
69    }
70
71    /// Returns a Latin-1 string built from a UTF-8 string
72    ///
73    /// `Cow::Borrowed` is returned if `str` contains only ASCII,
74    /// otherwise, a conversion from UTF-8 is performed and `Cow::Owned` is returned.
75    pub fn from_utf8(str: &str) -> Cow<Lat1Str> {
76        if str.is_ascii() {
77            Cow::Borrowed(Lat1Str::from_bytes(str.as_bytes()))
78        } else {
79            Cow::Owned(Lat1String::from_utf8(str))
80        }
81    }
82
83    /// Checks whether the slice only contains ASCII characters.
84    pub fn is_ascii(&self) -> bool {
85        self.data.is_ascii()
86    }
87
88    /// Returns the number of characters in the string.
89    pub fn len(&self) -> usize {
90        self.data.len()
91    }
92
93    /// Returns the string as slice of bytes.
94    pub fn as_bytes(&self) -> &[u8] {
95        &self.data
96    }
97
98    /// Returns the string in UTF-8 encoding, only if the string is pure ASCII.
99    /// Otherwise, a `Lat1Error::NonAscii` is returned.
100    pub fn try_as_ascii(&self) -> Result<&str, Lat1Error> {
101        if self.is_ascii() {
102            Ok(unsafe { str::from_utf8_unchecked(&self.data) })
103        } else {
104            Err(Lat1Error::NonAscii)
105        }
106    }
107
108    /// Returns the string in UTF-8 encoding, only if the string is pure ASCII.
109    ///
110    /// # Panics
111    /// This function panics if the string contains non-ASCII chars.
112    pub fn as_ascii(&self) -> &str {
113        self.try_as_ascii().unwrap()
114    }
115
116    /// Returns the string in UTF-8 encoding.
117    ///
118    /// # Safety
119    /// If the string contains non-ASCII characters, the returned string will be
120    /// invalid UTF-8.
121    pub unsafe fn as_ascii_unchecked(&self) -> &str {
122        str::from_utf8_unchecked(&self.data)
123    }
124
125    /// Returns the string converted to UTF-8.
126    ///
127    /// `Cow::Borrowed` is returned if the string is pure ASCII,
128    /// otherwise a conversion to UTF-8 is performed and `Cow::Owned` is returned.
129    pub fn to_utf8(&self) -> Cow<str> {
130        if self.is_ascii() {
131            Cow::Borrowed(unsafe { self.as_ascii_unchecked() })
132        } else {
133            Cow::Owned(self.data.iter().map(|c| *c as char).collect())
134        }
135    }
136}
137
138impl std::borrow::ToOwned for Lat1Str {
139    type Owned = Lat1String;
140    fn to_owned(&self) -> Self::Owned {
141        Lat1String {
142            data: self.as_bytes().to_vec(),
143        }
144    }
145}
146
147impl fmt::Display for Lat1Str {
148    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
149        let s = self.to_utf8();
150        f.write_str(&s)
151    }
152}
153
154impl fmt::Debug for Lat1Str {
155    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
156        let s = self.to_utf8();
157        f.write_fmt(format_args!("Lat1(\"{}\")", s))
158    }
159}
160
161/// A struct owning a Latin-1 (aka. ISO 8859-1) string.
162///
163/// See [Lat1Str] for details.
164#[derive(Clone)]
165pub struct Lat1String {
166    data: Vec<u8>,
167}
168
169impl Lat1String {
170    /// Construct a [Lat1String] from a slice of bytes.
171    pub fn from_bytes(bytes: &[u8]) -> Self {
172        Lat1String {
173            data: bytes.to_vec(),
174        }
175    }
176
177    /// Construct a [Lat1String] from UTF-8 (a conversion to Latin-1 is performed).
178    pub fn from_utf8(str: &str) -> Self {
179        Lat1String {
180            data: str.chars().map(|c| c as u8).collect(),
181        }
182    }
183}
184
185impl std::ops::Deref for Lat1String {
186    type Target = Lat1Str;
187    fn deref(&self) -> &Self::Target {
188        Lat1Str::from_bytes(self.data.as_slice())
189    }
190}
191
192impl std::borrow::Borrow<Lat1Str> for Lat1String {
193    fn borrow(&self) -> &Lat1Str {
194        Lat1Str::from_bytes(self.data.as_slice())
195    }
196}
197
198impl fmt::Display for Lat1String {
199    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
200        let s = self.to_utf8();
201        f.write_str(&s)
202    }
203}
204
205impl fmt::Debug for Lat1String {
206    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
207        let s = self.to_utf8();
208        f.write_fmt(format_args!("Lat1(\"{}\")", s))
209    }
210}
211
212#[derive(Copy, Clone)]
213/// Latin-1 (aka. ISO 8859-1) of fixed size
214pub struct Lat1StrF<const N: usize> {
215    data: [u8; N],
216}
217
218impl<const N: usize> Lat1StrF<N> {
219    pub fn from_bytes(bytes: [u8; N]) -> Self {
220        Self { data: bytes }
221    }
222}
223
224impl<const N: usize> std::ops::Deref for Lat1StrF<N> {
225    type Target = Lat1Str;
226    fn deref(&self) -> &Self::Target {
227        Lat1Str::from_bytes(self.data.as_slice())
228    }
229}
230
231impl<const N: usize> fmt::Display for Lat1StrF<N> {
232    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
233        let s = self.to_utf8();
234        f.write_str(&s)
235    }
236}
237
238impl<const N: usize> fmt::Debug for Lat1StrF<N> {
239    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
240        let s = self.to_utf8();
241        f.write_fmt(format_args!("Lat1(\"{}\")", s))
242    }
243}
244
245#[test]
246fn test_latin_str() {
247    let utf8 = "Mon frère est là.";
248    let latin1: &[u8] = &[
249        0x4D, 0x6F, 0x6E, 0x20, 0x66, 0x72, 0xE8, 0x72, 0x65, 0x20, 0x65, 0x73, 0x74, 0x20, 0x6C,
250        0xE0, 0x2E,
251    ];
252
253    let ls = Lat1String::from_utf8(utf8);
254    assert_eq!(ls.as_bytes(), latin1);
255
256    let ls = Lat1Str::from_bytes(latin1);
257    assert_eq!(ls.to_utf8(), utf8);
258}