1
//! Parser for an Accept-Language HTTP header.
2

            
3
use language_tags::{LanguageTag, ParseError};
4
use locale_config::{LanguageRange, Locale};
5

            
6
use std::error;
7
use std::fmt;
8
use std::str::FromStr;
9

            
10
#[cfg(doc)]
11
use crate::api::CairoRenderer;
12

            
13
/// Used to set the language for rendering.
14
///
15
/// SVG documents can use the `<switch>` element, whose children have a `systemLanguage`
16
/// attribute; only the first child which has a `systemLanguage` that matches the
17
/// preferred languages will be rendered.
18
///
19
/// This enum, used with [`CairoRenderer::with_language`], configures how to obtain the
20
/// user's prefererred languages.
21
pub enum Language {
22
    /// Use the Unix environment variables `LANGUAGE`, `LC_ALL`, `LC_MESSAGES` and `LANG` to obtain the
23
    /// user's language.
24
    ///
25
    /// This uses [`g_get_language_names()`][ggln] underneath.
26
    ///
27
    /// [ggln]: https://docs.gtk.org/glib/func.get_language_names.html
28
    FromEnvironment,
29

            
30
    /// Use a list of languages in the form of an HTTP Accept-Language header, like `es, en;q=0.8`.
31
    ///
32
    /// This is convenient when you want to select an explicit set of languages, instead of
33
    /// assuming that the Unix environment has the language you want.
34
3
    AcceptLanguage(AcceptLanguage),
35
}
36

            
37
/// `Language` but with the environment's locale converted to something we can use.
38
51878
#[derive(Clone)]
39
pub enum UserLanguage {
40
51875
    LanguageTags(LanguageTags),
41
3
    AcceptLanguage(AcceptLanguage),
42
}
43

            
44
36
#[derive(Clone, Debug, PartialEq)]
45
18
struct Weight(Option<f32>);
46

            
47
impl Weight {
48
7
    fn numeric(&self) -> f32 {
49
7
        self.0.unwrap_or(1.0)
50
7
    }
51
}
52

            
53
30
#[derive(Clone, Debug, PartialEq)]
54
struct Item {
55
18
    tag: LanguageTag,
56
18
    weight: Weight,
57
}
58

            
59
/// Stores a parsed version of an HTTP Accept-Language header.
60
///
61
/// RFC 7231: <https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.5>
62
32
#[derive(Clone, Debug, PartialEq)]
63
16
pub struct AcceptLanguage(Box<[Item]>);
64

            
65
/// Errors when parsing an `AcceptLanguage`.
66
#[derive(Debug, PartialEq)]
67
enum AcceptLanguageError {
68
    NoElements,
69
    InvalidCharacters,
70
2
    InvalidLanguageTag(ParseError),
71
    InvalidWeight,
72
}
73

            
74
impl error::Error for AcceptLanguageError {}
75

            
76
impl fmt::Display for AcceptLanguageError {
77
1
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78
1
        match self {
79
            Self::NoElements => write!(f, "no language tags in list"),
80
            Self::InvalidCharacters => write!(f, "invalid characters in language list"),
81
1
            Self::InvalidLanguageTag(e) => write!(f, "invalid language tag: {e}"),
82
            Self::InvalidWeight => write!(f, "invalid q= weight"),
83
        }
84
1
    }
85
}
86

            
87
/// Optional whitespace, Space or Tab, per RFC 7230.
88
///
89
/// RFC 7230: <https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.3>
90
const OWS: [char; 2] = ['\x20', '\x09'];
91

            
92
impl AcceptLanguage {
93
    /// Parses the payload of an HTTP Accept-Language header.
94
    ///
95
    /// For example, a valid header looks like `es, en;q=0.8`, and means, "I prefer Spanish,
96
    /// but will also accept English".
97
    ///
98
    /// Use this function to construct a [`Language::AcceptLanguage`]
99
    /// variant to pass to the [`CairoRenderer::with_language`] function.
100
    ///
101
    /// See RFC 7231 for details: <https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.5>
102
4
    pub fn parse(s: &str) -> Result<AcceptLanguage, String> {
103
5
        AcceptLanguage::parse_internal(s).map_err(|e| format!("{}", e))
104
4
    }
105

            
106
    /// Internal constructor.  We don't expose [`AcceptLanguageError`] in the public API;
107
    /// there we just use a [`String`].
108
30
    fn parse_internal(s: &str) -> Result<AcceptLanguage, AcceptLanguageError> {
109
30
        if !s.is_ascii() {
110
1
            return Err(AcceptLanguageError::InvalidCharacters);
111
        }
112

            
113
29
        let mut items = Vec::new();
114

            
115
58
        for val in s.split(',') {
116
39
            let trimmed = val.trim_matches(&OWS[..]);
117
39
            if trimmed.is_empty() {
118
                continue;
119
            }
120

            
121
27
            items.push(Item::parse(trimmed)?);
122
        }
123

            
124
31
        if items.is_empty() {
125
3
            Err(AcceptLanguageError::NoElements)
126
        } else {
127
14
            Ok(AcceptLanguage(items.into_boxed_slice()))
128
        }
129
28
    }
130

            
131
6
    fn iter(&self) -> impl Iterator<Item = (&LanguageTag, f32)> {
132
13
        self.0.iter().map(|item| (&item.tag, item.weight.numeric()))
133
6
    }
134

            
135
5
    fn any_matches(&self, tag: &LanguageTag) -> bool {
136
10
        self.iter().any(|(self_tag, _weight)| tag.matches(self_tag))
137
5
    }
138
}
139

            
140
impl Item {
141
27
    fn parse(s: &str) -> Result<Item, AcceptLanguageError> {
142
27
        let semicolon_pos = s.find(';');
143

            
144
27
        let (before_semicolon, after_semicolon) = if let Some(semi) = semicolon_pos {
145
19
            (&s[..semi], Some(&s[semi + 1..]))
146
        } else {
147
8
            (s, None)
148
        };
149

            
150
27
        let tag = LanguageTag::parse(before_semicolon)
151
2
            .map_err(AcceptLanguageError::InvalidLanguageTag)?;
152

            
153
36
        let weight = if let Some(quality) = after_semicolon {
154
19
            let quality = quality.trim_start_matches(&OWS[..]);
155

            
156
19
            let number = if let Some(qvalue) = quality.strip_prefix("q=") {
157
17
                if qvalue.starts_with(&['0', '1'][..]) {
158
15
                    let first_digit = qvalue.chars().next().unwrap();
159

            
160
24
                    if let Some(decimals) = qvalue[1..].strip_prefix('.') {
161
13
                        if (first_digit == '0'
162
5
                            && decimals.len() <= 3
163
9
                            && decimals.chars().all(|c| c.is_ascii_digit()))
164
12
                            || (first_digit == '1'
165
8
                                && decimals.len() <= 3
166
20
                                && decimals.chars().all(|c| c == '0'))
167
                        {
168
9
                            qvalue
169
                        } else {
170
4
                            return Err(AcceptLanguageError::InvalidWeight);
171
                        }
172
                    } else {
173
2
                        qvalue
174
                    }
175
                } else {
176
2
                    return Err(AcceptLanguageError::InvalidWeight);
177
                }
178
            } else {
179
2
                return Err(AcceptLanguageError::InvalidWeight);
180
            };
181

            
182
11
            Weight(Some(
183
11
                f32::from_str(number).map_err(|_| AcceptLanguageError::InvalidWeight)?,
184
            ))
185
        } else {
186
6
            Weight(None)
187
        };
188

            
189
17
        Ok(Item { tag, weight })
190
27
    }
191
}
192

            
193
/// A list of BCP47 language tags.
194
///
195
/// RFC 5664: <https://www.rfc-editor.org/info/rfc5664>
196
103752
#[derive(Debug, Clone, PartialEq)]
197
51876
pub struct LanguageTags(Vec<LanguageTag>);
198

            
199
impl LanguageTags {
200
    pub fn empty() -> Self {
201
        LanguageTags(Vec::new())
202
    }
203

            
204
    /// Converts a `Locale` to a set of language tags.
205
8302
    pub fn from_locale(locale: &Locale) -> Result<LanguageTags, String> {
206
8302
        let mut tags = Vec::new();
207

            
208
8302
        for locale_range in locale.tags_for("messages") {
209
3531
            if locale_range == LanguageRange::invariant() {
210
                continue;
211
            }
212

            
213
2303
            let str_locale_range = locale_range.as_ref();
214

            
215
2303
            let locale_tag = LanguageTag::from_str(str_locale_range).map_err(|e| {
216
                format!("invalid language tag \"{str_locale_range}\" in locale: {e}")
217
            })?;
218

            
219
2303
            if !locale_tag.is_language_range() {
220
                return Err(format!(
221
                    "language tag \"{locale_tag}\" is not a language range"
222
                ));
223
            }
224

            
225
2304
            tags.push(locale_tag);
226
3532
        }
227

            
228
1232
        Ok(LanguageTags(tags))
229
1232
    }
230

            
231
20
    pub fn from(tags: Vec<LanguageTag>) -> LanguageTags {
232
20
        LanguageTags(tags)
233
20
    }
234

            
235
18
    pub fn iter(&self) -> impl Iterator<Item = &LanguageTag> {
236
18
        self.0.iter()
237
18
    }
238

            
239
14
    pub fn any_matches(&self, language_tag: &LanguageTag) -> bool {
240
41
        self.0.iter().any(|tag| tag.matches(language_tag))
241
14
    }
242
}
243

            
244
impl UserLanguage {
245
18
    pub fn any_matches(&self, tags: &LanguageTags) -> bool {
246
18
        match *self {
247
13
            UserLanguage::LanguageTags(ref language_tags) => {
248
27
                tags.iter().any(|tag| language_tags.any_matches(tag))
249
            }
250
5
            UserLanguage::AcceptLanguage(ref accept_language) => {
251
10
                tags.iter().any(|tag| accept_language.any_matches(tag))
252
            }
253
        }
254
18
    }
255
}
256

            
257
#[cfg(test)]
258
mod tests {
259
    use super::*;
260

            
261
    #[test]
262
2
    fn parses_accept_language() {
263
        // plain tag
264
2
        assert_eq!(
265
1
            AcceptLanguage::parse_internal("es-MX").unwrap(),
266
1
            AcceptLanguage(
267
2
                vec![Item {
268
1
                    tag: LanguageTag::parse("es-MX").unwrap(),
269
1
                    weight: Weight(None)
270
                }]
271
                .into_boxed_slice()
272
            )
273
        );
274

            
275
        // with quality
276
2
        assert_eq!(
277
1
            AcceptLanguage::parse_internal("es-MX;q=1").unwrap(),
278
1
            AcceptLanguage(
279
2
                vec![Item {
280
1
                    tag: LanguageTag::parse("es-MX").unwrap(),
281
1
                    weight: Weight(Some(1.0))
282
                }]
283
                .into_boxed_slice()
284
            )
285
        );
286

            
287
        // with quality
288
2
        assert_eq!(
289
1
            AcceptLanguage::parse_internal("es-MX;q=0").unwrap(),
290
1
            AcceptLanguage(
291
2
                vec![Item {
292
1
                    tag: LanguageTag::parse("es-MX").unwrap(),
293
1
                    weight: Weight(Some(0.0))
294
                }]
295
                .into_boxed_slice()
296
            )
297
        );
298

            
299
        // zero decimals are allowed
300
2
        assert_eq!(
301
1
            AcceptLanguage::parse_internal("es-MX;q=0.").unwrap(),
302
1
            AcceptLanguage(
303
2
                vec![Item {
304
1
                    tag: LanguageTag::parse("es-MX").unwrap(),
305
1
                    weight: Weight(Some(0.0))
306
                }]
307
                .into_boxed_slice()
308
            )
309
        );
310

            
311
        // zero decimals are allowed
312
2
        assert_eq!(
313
1
            AcceptLanguage::parse_internal("es-MX;q=1.").unwrap(),
314
1
            AcceptLanguage(
315
2
                vec![Item {
316
1
                    tag: LanguageTag::parse("es-MX").unwrap(),
317
1
                    weight: Weight(Some(1.0))
318
                }]
319
                .into_boxed_slice()
320
            )
321
        );
322

            
323
        // one decimal
324
2
        assert_eq!(
325
1
            AcceptLanguage::parse_internal("es-MX;q=1.0").unwrap(),
326
1
            AcceptLanguage(
327
2
                vec![Item {
328
1
                    tag: LanguageTag::parse("es-MX").unwrap(),
329
1
                    weight: Weight(Some(1.0))
330
                }]
331
                .into_boxed_slice()
332
            )
333
        );
334

            
335
        // two decimals
336
2
        assert_eq!(
337
1
            AcceptLanguage::parse_internal("es-MX;q=1.00").unwrap(),
338
1
            AcceptLanguage(
339
2
                vec![Item {
340
1
                    tag: LanguageTag::parse("es-MX").unwrap(),
341
1
                    weight: Weight(Some(1.0))
342
                }]
343
                .into_boxed_slice()
344
            )
345
        );
346

            
347
        // three decimals
348
2
        assert_eq!(
349
1
            AcceptLanguage::parse_internal("es-MX;q=1.000").unwrap(),
350
1
            AcceptLanguage(
351
2
                vec![Item {
352
1
                    tag: LanguageTag::parse("es-MX").unwrap(),
353
1
                    weight: Weight(Some(1.0))
354
                }]
355
                .into_boxed_slice()
356
            )
357
        );
358

            
359
        // multiple elements
360
2
        assert_eq!(
361
1
            AcceptLanguage::parse_internal("es-MX, en; q=0.5").unwrap(),
362
1
            AcceptLanguage(
363
2
                vec![
364
1
                    Item {
365
1
                        tag: LanguageTag::parse("es-MX").unwrap(),
366
1
                        weight: Weight(None)
367
                    },
368
1
                    Item {
369
1
                        tag: LanguageTag::parse("en").unwrap(),
370
1
                        weight: Weight(Some(0.5))
371
                    },
372
                ]
373
                .into_boxed_slice()
374
            )
375
        );
376

            
377
        // superfluous whitespace
378
2
        assert_eq!(
379
1
            AcceptLanguage::parse_internal(",es-MX;q=1.000  , en; q=0.125  ,  ,").unwrap(),
380
1
            AcceptLanguage(
381
2
                vec![
382
1
                    Item {
383
1
                        tag: LanguageTag::parse("es-MX").unwrap(),
384
1
                        weight: Weight(Some(1.0))
385
                    },
386
1
                    Item {
387
1
                        tag: LanguageTag::parse("en").unwrap(),
388
1
                        weight: Weight(Some(0.125))
389
                    },
390
                ]
391
                .into_boxed_slice()
392
            )
393
        );
394
2
    }
395

            
396
    #[test]
397
2
    fn empty_lists() {
398
1
        assert!(matches!(
399
1
            AcceptLanguage::parse_internal(""),
400
            Err(AcceptLanguageError::NoElements)
401
        ));
402

            
403
1
        assert!(matches!(
404
1
            AcceptLanguage::parse_internal(","),
405
            Err(AcceptLanguageError::NoElements)
406
        ));
407

            
408
1
        assert!(matches!(
409
1
            AcceptLanguage::parse_internal(", , ,,,"),
410
            Err(AcceptLanguageError::NoElements)
411
        ));
412
2
    }
413

            
414
    #[test]
415
2
    fn ascii_only() {
416
1
        assert!(matches!(
417
1
            AcceptLanguage::parse_internal("ës"),
418
            Err(AcceptLanguageError::InvalidCharacters)
419
        ));
420
2
    }
421

            
422
    #[test]
423
2
    fn invalid_tag() {
424
1
        assert!(matches!(
425
1
            AcceptLanguage::parse_internal("no_underscores"),
426
            Err(AcceptLanguageError::InvalidLanguageTag(_))
427
        ));
428
2
    }
429

            
430
    #[test]
431
2
    fn invalid_weight() {
432
1
        assert!(matches!(
433
1
            AcceptLanguage::parse_internal("es;"),
434
            Err(AcceptLanguageError::InvalidWeight)
435
        ));
436
1
        assert!(matches!(
437
1
            AcceptLanguage::parse_internal("es;q"),
438
            Err(AcceptLanguageError::InvalidWeight)
439
        ));
440
1
        assert!(matches!(
441
1
            AcceptLanguage::parse_internal("es;q="),
442
            Err(AcceptLanguageError::InvalidWeight)
443
        ));
444
1
        assert!(matches!(
445
1
            AcceptLanguage::parse_internal("es;q=2"),
446
            Err(AcceptLanguageError::InvalidWeight)
447
        ));
448
1
        assert!(matches!(
449
1
            AcceptLanguage::parse_internal("es;q=1.1"),
450
            Err(AcceptLanguageError::InvalidWeight)
451
        ));
452
1
        assert!(matches!(
453
1
            AcceptLanguage::parse_internal("es;q=1.12"),
454
            Err(AcceptLanguageError::InvalidWeight)
455
        ));
456
1
        assert!(matches!(
457
1
            AcceptLanguage::parse_internal("es;q=1.123"),
458
            Err(AcceptLanguageError::InvalidWeight)
459
        ));
460

            
461
        // Up to three decimals allowed per RFC 7231
462
1
        assert!(matches!(
463
1
            AcceptLanguage::parse_internal("es;q=0.1234"),
464
            Err(AcceptLanguageError::InvalidWeight)
465
        ));
466
2
    }
467

            
468
    #[test]
469
2
    fn iter() {
470
1
        let accept_language = AcceptLanguage::parse_internal("es-MX, en; q=0.5").unwrap();
471
1
        let mut iter = accept_language.iter();
472

            
473
1
        let (tag, weight) = iter.next().unwrap();
474
2
        assert_eq!(*tag, LanguageTag::parse("es-MX").unwrap());
475
1
        assert_eq!(weight, 1.0);
476

            
477
1
        let (tag, weight) = iter.next().unwrap();
478
2
        assert_eq!(*tag, LanguageTag::parse("en").unwrap());
479
1
        assert_eq!(weight, 0.5);
480

            
481
1
        assert!(iter.next().is_none());
482
2
    }
483
}