1
//! The main XML parser.
2

            
3
use encoding_rs::Encoding;
4
use gio::{
5
    prelude::BufferedInputStreamExt, BufferedInputStream, Cancellable, ConverterInputStream,
6
    InputStream, ZlibCompressorFormat, ZlibDecompressor,
7
};
8
use glib::object::Cast;
9
use markup5ever::{
10
    expanded_name, local_name, namespace_url, ns, ExpandedName, LocalName, Namespace, QualName,
11
};
12
use std::cell::RefCell;
13
use std::collections::HashMap;
14
use std::rc::Rc;
15
use std::str;
16
use std::string::ToString;
17
use std::sync::Arc;
18
use xml5ever::buffer_queue::BufferQueue;
19
use xml5ever::tendril::format_tendril;
20
use xml5ever::tokenizer::{TagKind, Token, TokenSink, XmlTokenizer, XmlTokenizerOpts};
21

            
22
use crate::borrow_element_as;
23
use crate::css::{Origin, Stylesheet};
24
use crate::document::{Document, DocumentBuilder, LoadOptions};
25
use crate::error::{ImplementationLimit, LoadingError};
26
use crate::io::{self, IoError};
27
use crate::limits::{MAX_LOADED_ELEMENTS, MAX_XINCLUDE_DEPTH};
28
use crate::node::{Node, NodeBorrow};
29
use crate::rsvg_log;
30
use crate::session::Session;
31
use crate::style::StyleType;
32
use crate::url_resolver::AllowedUrl;
33

            
34
use xml2_load::Xml2Parser;
35

            
36
mod attributes;
37
mod xml2;
38
mod xml2_load;
39

            
40
pub use attributes::Attributes;
41

            
42
3079999
#[derive(Clone)]
43
enum Context {
44
    // Starting state
45
    Start,
46

            
47
    // Creating nodes for elements under the current node
48
    ElementCreation,
49

            
50
    // Inside <style>; accumulate text to include in a stylesheet
51
    Style,
52

            
53
    // An unsupported element inside a `<style>` element, to be ignored
54
    UnsupportedStyleChild,
55

            
56
    // Inside <xi:include>
57
11
    XInclude(XIncludeContext),
58

            
59
    // An unsupported element inside a <xi:include> context, to be ignored
60
    UnsupportedXIncludeChild,
61

            
62
    // Insie <xi::fallback>
63
4
    XIncludeFallback(XIncludeContext),
64

            
65
    // An XML parsing error was found.  We will no-op upon any further XML events.
66
262
    FatalError(LoadingError),
67
}
68

            
69
34
#[derive(Clone)]
70
struct XIncludeContext {
71
17
    need_fallback: bool,
72
}
73

            
74
// This is to hold an xmlEntityPtr from libxml2; we just hold an opaque pointer
75
// that is freed in impl Drop for XmlState
76
type XmlEntityPtr = *mut libc::c_void;
77

            
78
extern "C" {
79
    // The original function takes an xmlNodePtr, but that is compatible
80
    // with xmlEntityPtr for the purposes of this function.
81
    fn xmlFreeNode(node: XmlEntityPtr);
82
}
83

            
84
// Creates an ExpandedName from the XInclude namespace and a local_name
85
//
86
// The markup5ever crate doesn't have built-in namespaces for XInclude,
87
// so we make our own.
88
macro_rules! xinclude_name {
89
    ($local_name:expr) => {
90
        ExpandedName {
91
            ns: &Namespace::from("http://www.w3.org/2001/XInclude"),
92
            local: &LocalName::from($local_name),
93
        }
94
    };
95
}
96

            
97
/// Holds the state used for XML processing
98
///
99
/// These methods are called when an XML event is parsed out of the XML stream: `start_element`,
100
/// `end_element`, `characters`.
101
///
102
/// When an element starts, we push a corresponding `Context` into the `context_stack`.  Within
103
/// that context, all XML events will be forwarded to it, and processed in one of the `XmlHandler`
104
/// trait objects. Normally the context refers to a `NodeCreationContext` implementation which is
105
/// what creates normal graphical elements.
106
struct XmlStateInner {
107
    document_builder: DocumentBuilder,
108
    num_loaded_elements: usize,
109
    xinclude_depth: usize,
110
    context_stack: Vec<Context>,
111
    current_node: Option<Node>,
112

            
113
    // Note that neither XmlStateInner nor Xmlstate implement Drop.
114
    //
115
    // An XmlState is finally consumed in XmlState::build_document(), and that
116
    // function is responsible for freeing all the XmlEntityPtr from this field.
117
    //
118
    // (The structs cannot impl Drop because build_document()
119
    // destructures and consumes them at the same time.)
120
    entities: HashMap<String, XmlEntityPtr>,
121
}
122

            
123
pub struct XmlState {
124
    inner: RefCell<XmlStateInner>,
125

            
126
    session: Session,
127
    load_options: Arc<LoadOptions>,
128
}
129

            
130
/// Errors returned from XmlState::acquire()
131
///
132
/// These follow the terminology from <https://www.w3.org/TR/xinclude/#terminology>
133
enum AcquireError {
134
    /// Resource could not be acquired (file not found), or I/O error.
135
    /// In this case, the `xi:fallback` can be used if present.
136
    ResourceError,
137

            
138
    /// Resource could not be parsed/decoded
139
    FatalError(String),
140
}
141

            
142
impl XmlStateInner {
143
3080006
    fn context(&self) -> Context {
144
        // We can unwrap since the stack is never empty
145
3080006
        self.context_stack.last().unwrap().clone()
146
3080006
    }
147
}
148

            
149
impl XmlState {
150
1091
    fn new(
151
        session: Session,
152
        document_builder: DocumentBuilder,
153
        load_options: Arc<LoadOptions>,
154
    ) -> XmlState {
155
1091
        XmlState {
156
1091
            inner: RefCell::new(XmlStateInner {
157
1137
                document_builder,
158
                num_loaded_elements: 0,
159
                xinclude_depth: 0,
160
1091
                context_stack: vec![Context::Start],
161
1091
                current_node: None,
162
1091
                entities: HashMap::new(),
163
            }),
164

            
165
1091
            session,
166
1091
            load_options,
167
        }
168
1091
    }
169

            
170
1149
    fn check_last_error(&self) -> Result<(), LoadingError> {
171
1149
        let inner = self.inner.borrow();
172

            
173
1149
        match inner.context() {
174
48
            Context::FatalError(e) => Err(e),
175
1101
            _ => Ok(()),
176
        }
177
1149
    }
178

            
179
1022344
    fn check_limits(&self) -> Result<(), ()> {
180
1022344
        if self.inner.borrow().num_loaded_elements > MAX_LOADED_ELEMENTS {
181
1
            self.error(LoadingError::LimitExceeded(
182
1
                ImplementationLimit::TooManyLoadedElements,
183
            ));
184
1
            Err(())
185
        } else {
186
1022343
            Ok(())
187
        }
188
1022344
    }
189

            
190
1022395
    pub fn start_element(&self, name: QualName, attrs: Attributes) -> Result<(), ()> {
191
1022395
        self.check_limits()?;
192

            
193
1022336
        let context = self.inner.borrow().context();
194

            
195
1022325
        if let Context::FatalError(_) = context {
196
11
            return Err(());
197
        }
198

            
199
1022322
        self.inner.borrow_mut().num_loaded_elements += 1;
200

            
201
1022325
        let new_context = match context {
202
1101
            Context::Start => self.element_creation_start_element(&name, attrs),
203
1021222
            Context::ElementCreation => self.element_creation_start_element(&name, attrs),
204

            
205
            Context::Style => self.inside_style_start_element(&name),
206
            Context::UnsupportedStyleChild => self.unsupported_style_start_element(&name),
207

            
208
2
            Context::XInclude(ref ctx) => self.inside_xinclude_start_element(ctx, &name),
209
            Context::UnsupportedXIncludeChild => self.unsupported_xinclude_start_element(&name),
210
            Context::XIncludeFallback(ref ctx) => {
211
                self.xinclude_fallback_start_element(ctx, &name, attrs)
212
            }
213

            
214
            Context::FatalError(_) => unreachable!(),
215
        };
216

            
217
1022292
        self.inner.borrow_mut().context_stack.push(new_context);
218

            
219
1022314
        Ok(())
220
1022296
    }
221

            
222
1022358
    pub fn end_element(&self, _name: QualName) {
223
1022358
        let context = self.inner.borrow().context();
224

            
225
1022330
        match context {
226
            Context::Start => panic!("end_element: XML handler stack is empty!?"),
227
1022193
            Context::ElementCreation => self.element_creation_end_element(),
228

            
229
42
            Context::Style => self.style_end_element(),
230
            Context::UnsupportedStyleChild => (),
231

            
232
            Context::XInclude(_) => (),
233
            Context::UnsupportedXIncludeChild => (),
234
            Context::XIncludeFallback(_) => (),
235

            
236
            Context::FatalError(_) => return,
237
        }
238

            
239
        // We can unwrap since start_element() always adds a context to the stack
240
1022225
        self.inner.borrow_mut().context_stack.pop().unwrap();
241
1022289
    }
242

            
243
1034233
    pub fn characters(&self, text: &str) {
244
1034233
        let context = self.inner.borrow().context();
245

            
246
1034211
        match context {
247
            Context::Start => {
248
                // This is character data before the first element, i.e. something like
249
                //  <?xml version="1.0" encoding="UTF-8"?><svg xmlns="http://www.w3.org/2000/svg"/>
250
                // ^ note the space here
251
                // libxml2 is not finished reading the file yet; it will emit an error
252
                // on its own when it finishes.  So, ignore this condition.
253
            }
254

            
255
1034033
            Context::ElementCreation => self.element_creation_characters(text),
256

            
257
61
            Context::Style => self.element_creation_characters(text),
258
            Context::UnsupportedStyleChild => (),
259

            
260
            Context::XInclude(_) => (),
261
            Context::UnsupportedXIncludeChild => (),
262
2
            Context::XIncludeFallback(ref ctx) => self.xinclude_fallback_characters(ctx, text),
263
            Context::FatalError(_) => (),
264
        }
265
1034189
    }
266

            
267
3
    pub fn processing_instruction(&self, target: &str, data: &str) {
268
17
        if target != "xml-stylesheet" {
269
            return;
270
        }
271

            
272
10
        if let Ok(pairs) = parse_xml_stylesheet_processing_instruction(data) {
273
10
            let mut alternate = None;
274
10
            let mut type_ = None;
275
10
            let mut href = None;
276

            
277
10
            for (att, value) in pairs {
278
4
                match att.as_str() {
279
4
                    "alternate" => alternate = Some(value),
280
4
                    "type" => type_ = Some(value),
281
2
                    "href" => href = Some(value),
282
                    _ => (),
283
                }
284
4
            }
285

            
286
2
            let mut inner = self.inner.borrow_mut();
287

            
288
2
            if type_.as_deref() != Some("text/css")
289
2
                || (alternate.is_some() && alternate.as_deref() != Some("no"))
290
            {
291
                rsvg_log!(
292
                    self.session,
293
                    "invalid parameters in XML processing instruction for stylesheet",
294
                );
295
                return;
296
            }
297

            
298
2
            if let Some(href) = href {
299
2
                if let Ok(aurl) = self.load_options.url_resolver.resolve_href(&href) {
300
2
                    if let Ok(stylesheet) =
301
1
                        Stylesheet::from_href(&aurl, Origin::Author, self.session.clone())
302
                    {
303
1
                        inner.document_builder.append_stylesheet(stylesheet);
304
1
                    } else {
305
                        // FIXME: https://www.w3.org/TR/xml-stylesheet/ does not seem to specify
306
                        // what to do if the stylesheet cannot be loaded, so here we ignore the error.
307
                        rsvg_log!(
308
                            self.session,
309
                            "could not create stylesheet from {} in XML processing instruction",
310
                            href
311
                        );
312
                    }
313
1
                } else {
314
1
                    rsvg_log!(
315
                        self.session,
316
                        "{} not allowed for xml-stylesheet in XML processing instruction",
317
                        href
318
                    );
319
                }
320
2
            } else {
321
                rsvg_log!(
322
                    self.session,
323
                    "xml-stylesheet processing instruction does not have href; ignoring"
324
                );
325
            }
326
2
        } else {
327
            self.error(LoadingError::XmlParseError(String::from(
328
                "invalid processing instruction data in xml-stylesheet",
329
            )));
330
        }
331
9
    }
332

            
333
7
    pub fn error(&self, e: LoadingError) {
334
7
        self.inner
335
            .borrow_mut()
336
            .context_stack
337
7
            .push(Context::FatalError(e));
338
7
    }
339

            
340
62
    pub fn entity_lookup(&self, entity_name: &str) -> Option<XmlEntityPtr> {
341
62
        self.inner.borrow().entities.get(entity_name).copied()
342
62
    }
343

            
344
7
    pub fn entity_insert(&self, entity_name: &str, entity: XmlEntityPtr) {
345
7
        let mut inner = self.inner.borrow_mut();
346

            
347
7
        let old_value = inner.entities.insert(entity_name.to_string(), entity);
348

            
349
7
        if let Some(v) = old_value {
350
            unsafe {
351
                xmlFreeNode(v);
352
            }
353
        }
354
7
    }
355

            
356
1022320
    fn element_creation_start_element(&self, name: &QualName, attrs: Attributes) -> Context {
357
1022320
        if name.expanded() == xinclude_name!("include") {
358
49
            self.xinclude_start_element(name, attrs)
359
        } else {
360
1022296
            let mut inner = self.inner.borrow_mut();
361

            
362
1022330
            let parent = inner.current_node.clone();
363
1022330
            let node = inner.document_builder.append_element(name, attrs, parent);
364
1022259
            inner.current_node = Some(node);
365

            
366
1022259
            if name.expanded() == expanded_name!(svg "style") {
367
42
                Context::Style
368
            } else {
369
1022196
                Context::ElementCreation
370
            }
371
1022238
        }
372
1022218
    }
373

            
374
1022234
    fn element_creation_end_element(&self) {
375
1022234
        let mut inner = self.inner.borrow_mut();
376
1022234
        let node = inner.current_node.take().unwrap();
377
1022226
        inner.current_node = node.parent();
378
1022226
    }
379

            
380
1034093
    fn element_creation_characters(&self, text: &str) {
381
1034111
        let mut inner = self.inner.borrow_mut();
382

            
383
1034093
        let mut parent = inner.current_node.clone().unwrap();
384
1034095
        inner.document_builder.append_characters(text, &mut parent);
385
1034081
    }
386

            
387
42
    fn style_end_element(&self) {
388
42
        self.add_inline_stylesheet();
389
42
        self.element_creation_end_element()
390
42
    }
391

            
392
42
    fn add_inline_stylesheet(&self) {
393
42
        let mut inner = self.inner.borrow_mut();
394
42
        let current_node = inner.current_node.as_ref().unwrap();
395

            
396
84
        let style_type = borrow_element_as!(current_node, Style).style_type();
397

            
398
84
        if style_type == StyleType::TextCss {
399
42
            let stylesheet_text = current_node
400
                .children()
401
42
                .map(|child| {
402
                    // Note that here we assume that the only children of <style>
403
                    // are indeed text nodes.
404
42
                    let child_borrow = child.borrow_chars();
405
42
                    child_borrow.get_string()
406
42
                })
407
                .collect::<String>();
408

            
409
84
            if let Ok(stylesheet) = Stylesheet::from_data(
410
42
                &stylesheet_text,
411
42
                &self.load_options.url_resolver,
412
42
                Origin::Author,
413
42
                self.session.clone(),
414
42
            ) {
415
42
                inner.document_builder.append_stylesheet(stylesheet);
416
42
            } else {
417
                rsvg_log!(self.session, "invalid inline stylesheet");
418
            }
419
42
        }
420
42
    }
421

            
422
    fn inside_style_start_element(&self, name: &QualName) -> Context {
423
        self.unsupported_style_start_element(name)
424
    }
425

            
426
    fn unsupported_style_start_element(&self, _name: &QualName) -> Context {
427
        Context::UnsupportedStyleChild
428
    }
429

            
430
245
    fn xinclude_start_element(&self, _name: &QualName, attrs: Attributes) -> Context {
431
245
        let mut href = None;
432
245
        let mut parse = None;
433
245
        let mut encoding = None;
434

            
435
245
        let ln_parse = LocalName::from("parse");
436

            
437
245
        for (attr, value) in attrs.iter() {
438
101
            match attr.expanded() {
439
49
                expanded_name!("", "href") => href = Some(value),
440
49
                ref v
441
98
                    if *v
442
49
                        == ExpandedName {
443
49
                            ns: &ns!(),
444
                            local: &ln_parse,
445
49
                        } =>
446
                {
447
46
                    parse = Some(value)
448
46
                }
449
3
                expanded_name!("", "encoding") => encoding = Some(value),
450
                _ => (),
451
            }
452
98
        }
453

            
454
49
        let need_fallback = match self.acquire(href, parse, encoding) {
455
3
            Ok(()) => false,
456
2
            Err(AcquireError::ResourceError) => true,
457
44
            Err(AcquireError::FatalError(s)) => {
458
44
                return Context::FatalError(LoadingError::XmlParseError(s))
459
            }
460
49
        };
461

            
462
5
        Context::XInclude(XIncludeContext { need_fallback })
463
49
    }
464

            
465
2
    fn inside_xinclude_start_element(&self, ctx: &XIncludeContext, name: &QualName) -> Context {
466
2
        if name.expanded() == xinclude_name!("fallback") {
467
2
            Context::XIncludeFallback(ctx.clone())
468
        } else {
469
            // https://www.w3.org/TR/xinclude/#include_element
470
            //
471
            // "Other content (text, processing instructions,
472
            // comments, elements not in the XInclude namespace,
473
            // descendants of child elements) is not constrained by
474
            // this specification and is ignored by the XInclude
475
            // processor"
476

            
477
            self.unsupported_xinclude_start_element(name)
478
        }
479
2
    }
480

            
481
    fn xinclude_fallback_start_element(
482
        &self,
483
        ctx: &XIncludeContext,
484
        name: &QualName,
485
        attrs: Attributes,
486
    ) -> Context {
487
        if ctx.need_fallback {
488
            if name.expanded() == xinclude_name!("include") {
489
                self.xinclude_start_element(name, attrs)
490
            } else {
491
                self.element_creation_start_element(name, attrs)
492
            }
493
        } else {
494
            Context::UnsupportedXIncludeChild
495
        }
496
    }
497

            
498
2
    fn xinclude_fallback_characters(&self, ctx: &XIncludeContext, text: &str) {
499
2
        if ctx.need_fallback && self.inner.borrow().current_node.is_some() {
500
            // We test for is_some() because with a bad "SVG" file like this:
501
            //
502
            //    <xi:include href="blah"><xi:fallback>foo</xi:fallback></xi:include>
503
            //
504
            // at the point we get "foo" here, there is no current_node because
505
            // no nodes have been created before the xi:include.
506
2
            self.element_creation_characters(text);
507
        }
508
2
    }
509

            
510
49
    fn acquire(
511
        &self,
512
        href: Option<&str>,
513
        parse: Option<&str>,
514
        encoding: Option<&str>,
515
    ) -> Result<(), AcquireError> {
516
49
        if let Some(href) = href {
517
147
            let aurl = self
518
                .load_options
519
                .url_resolver
520
49
                .resolve_href(href)
521
51
                .map_err(|e| {
522
                    // FIXME: should AlloweUrlError::UrlParseError be a fatal error,
523
                    // not a resource error?
524
2
                    rsvg_log!(self.session, "could not acquire \"{}\": {}", href, e);
525
2
                    AcquireError::ResourceError
526
4
                })?;
527

            
528
            // https://www.w3.org/TR/xinclude/#include_element
529
            //
530
            // "When omitted, the value of "xml" is implied (even in
531
            // the absence of a default value declaration). Values
532
            // other than "xml" and "text" are a fatal error."
533
47
            match parse {
534
47
                None | Some("xml") => self.include_xml(&aurl),
535

            
536
1
                Some("text") => self.acquire_text(&aurl, encoding),
537

            
538
                Some(v) => Err(AcquireError::FatalError(format!(
539
                    "unknown 'parse' attribute value: \"{v}\""
540
                ))),
541
            }
542
47
        } else {
543
            // The href attribute is not present.  Per
544
            // https://www.w3.org/TR/xinclude/#include_element we
545
            // should use the xpointer attribute, but we do not
546
            // support that yet.  So, we'll just say, "OK" and not
547
            // actually include anything.
548
            Ok(())
549
        }
550
49
    }
551

            
552
46
    fn include_xml(&self, aurl: &AllowedUrl) -> Result<(), AcquireError> {
553
46
        self.increase_xinclude_depth(aurl)?;
554

            
555
44
        let result = self.acquire_xml(aurl);
556

            
557
44
        self.decrease_xinclude_depth();
558

            
559
44
        result
560
46
    }
561

            
562
46
    fn increase_xinclude_depth(&self, aurl: &AllowedUrl) -> Result<(), AcquireError> {
563
46
        let mut inner = self.inner.borrow_mut();
564

            
565
92
        if inner.xinclude_depth == MAX_XINCLUDE_DEPTH {
566
2
            Err(AcquireError::FatalError(format!(
567
                "exceeded maximum level of nested xinclude in {aurl}"
568
            )))
569
        } else {
570
44
            inner.xinclude_depth += 1;
571
44
            Ok(())
572
        }
573
46
    }
574

            
575
44
    fn decrease_xinclude_depth(&self) {
576
44
        let mut inner = self.inner.borrow_mut();
577
44
        inner.xinclude_depth -= 1;
578
44
    }
579

            
580
1
    fn acquire_text(&self, aurl: &AllowedUrl, encoding: Option<&str>) -> Result<(), AcquireError> {
581
1
        let binary = io::acquire_data(aurl, None).map_err(|e| {
582
            rsvg_log!(self.session, "could not acquire \"{}\": {}", aurl, e);
583
            AcquireError::ResourceError
584
        })?;
585

            
586
1
        let encoding = encoding.unwrap_or("utf-8");
587

            
588
1
        let encoder = Encoding::for_label_no_replacement(encoding.as_bytes()).ok_or_else(|| {
589
            AcquireError::FatalError(format!("unknown encoding \"{encoding}\" for \"{aurl}\""))
590
        })?;
591

            
592
1
        let utf8_data = encoder
593
1
            .decode_without_bom_handling_and_without_replacement(&binary.data)
594
1
            .ok_or_else(|| {
595
                AcquireError::FatalError(format!("could not convert contents of \"{aurl}\" from character encoding \"{encoding}\""))
596
            })?;
597

            
598
1
        self.element_creation_characters(&utf8_data);
599
1
        Ok(())
600
1
    }
601

            
602
44
    fn acquire_xml(&self, aurl: &AllowedUrl) -> Result<(), AcquireError> {
603
        // FIXME: distinguish between "file not found" and "invalid XML"
604

            
605
46
        let stream = io::acquire_stream(aurl, None).map_err(|e| match e {
606
2
            IoError::BadDataUrl => AcquireError::FatalError(String::from("malformed data: URL")),
607
            _ => AcquireError::ResourceError,
608
4
        })?;
609

            
610
        // FIXME: pass a cancellable
611
82
        self.parse_from_stream(&stream, None).map_err(|e| match e {
612
            LoadingError::Io(_) => AcquireError::ResourceError,
613
40
            LoadingError::XmlParseError(s) => AcquireError::FatalError(s),
614
            _ => AcquireError::FatalError(String::from("unknown error")),
615
40
        })
616
44
    }
617

            
618
    // Parses XML from a stream into an XmlState.
619
    //
620
    // This can be called "in the middle" of an XmlState's processing status,
621
    // for example, when including another XML file via xi:include.
622
1150
    fn parse_from_stream(
623
        &self,
624
        stream: &gio::InputStream,
625
        cancellable: Option<&gio::Cancellable>,
626
    ) -> Result<(), LoadingError> {
627
2300
        Xml2Parser::from_stream(self, self.load_options.unlimited_size, stream, cancellable)
628
1145
            .and_then(|parser| parser.parse())
629
2299
            .and_then(|_: ()| self.check_last_error())
630
1150
    }
631

            
632
    fn unsupported_xinclude_start_element(&self, _name: &QualName) -> Context {
633
        Context::UnsupportedXIncludeChild
634
    }
635

            
636
1127
    fn build_document(
637
        self,
638
        stream: &gio::InputStream,
639
        cancellable: Option<&gio::Cancellable>,
640
    ) -> Result<Document, LoadingError> {
641
1127
        self.parse_from_stream(stream, cancellable)?;
642

            
643
        // consume self, then consume inner, then consume document_builder by calling .build()
644

            
645
1129
        let XmlState { inner, .. } = self;
646
1099
        let mut inner = inner.into_inner();
647

            
648
        // Free the hash of XmlEntityPtr.  We cannot do this in Drop because we will
649
        // consume inner by destructuring it after the for() loop.
650
1114
        for (_key, entity) in inner.entities.drain() {
651
            unsafe {
652
7
                xmlFreeNode(entity);
653
            }
654
7
        }
655

            
656
        let XmlStateInner {
657
1099
            document_builder, ..
658
        } = inner;
659
1099
        document_builder.build()
660
1109
    }
661
}
662

            
663
/// Temporary holding space for data in an XML processing instruction
664
#[derive(Default)]
665
struct ProcessingInstructionData {
666
    attributes: Vec<(String, String)>,
667
    error: bool,
668
}
669

            
670
struct ProcessingInstructionSink(Rc<RefCell<ProcessingInstructionData>>);
671

            
672
impl TokenSink for ProcessingInstructionSink {
673
3
    fn process_token(&mut self, token: Token) {
674
3
        let mut data = self.0.borrow_mut();
675

            
676
3
        match token {
677
3
            Token::TagToken(tag) if tag.kind == TagKind::EmptyTag => {
678
9
                for a in &tag.attrs {
679
6
                    let name = a.name.local.as_ref().to_string();
680
6
                    let value = a.value.to_string();
681

            
682
6
                    data.attributes.push((name, value));
683
6
                }
684
3
            }
685

            
686
            Token::ParseError(_) => data.error = true,
687

            
688
            _ => (),
689
        }
690
3
    }
691
}
692

            
693
// https://www.w3.org/TR/xml-stylesheet/
694
//
695
// The syntax for the xml-stylesheet processing instruction we support
696
// is this:
697
//
698
//   <?xml-stylesheet href="uri" alternate="no" type="text/css"?>
699
//
700
// XML parsers just feed us the raw data after the target name
701
// ("xml-stylesheet"), so we'll create a mini-parser with a hackish
702
// element just to extract the data as attributes.
703
3
fn parse_xml_stylesheet_processing_instruction(data: &str) -> Result<Vec<(String, String)>, ()> {
704
6
    let pi_data = Rc::new(RefCell::new(ProcessingInstructionData {
705
3
        attributes: Vec::new(),
706
        error: false,
707
    }));
708

            
709
3
    let mut queue = BufferQueue::new();
710
3
    queue.push_back(format_tendril!("<rsvg-hack {} />", data));
711

            
712
3
    let sink = ProcessingInstructionSink(pi_data.clone());
713

            
714
3
    let mut tokenizer = XmlTokenizer::new(sink, XmlTokenizerOpts::default());
715
3
    tokenizer.run(&mut queue);
716

            
717
3
    let pi_data = pi_data.borrow();
718

            
719
6
    if pi_data.error {
720
        Err(())
721
    } else {
722
3
        Ok(pi_data.attributes.clone())
723
    }
724
3
}
725

            
726
1115
pub fn xml_load_from_possibly_compressed_stream(
727
    session: Session,
728
    document_builder: DocumentBuilder,
729
    load_options: Arc<LoadOptions>,
730
    stream: &gio::InputStream,
731
    cancellable: Option<&gio::Cancellable>,
732
) -> Result<Document, LoadingError> {
733
1115
    let state = XmlState::new(session, document_builder, load_options);
734

            
735
1115
    let stream = get_input_stream_for_loading(stream, cancellable)?;
736

            
737
1109
    state.build_document(&stream, cancellable)
738
1113
}
739

            
740
// Header of a gzip data stream
741
const GZ_MAGIC_0: u8 = 0x1f;
742
const GZ_MAGIC_1: u8 = 0x8b;
743

            
744
1118
fn get_input_stream_for_loading(
745
    stream: &InputStream,
746
    cancellable: Option<&Cancellable>,
747
) -> Result<InputStream, LoadingError> {
748
    // detect gzipped streams (svgz)
749

            
750
1118
    let buffered = BufferedInputStream::new(stream);
751
1118
    let num_read = buffered.fill(2, cancellable)?;
752
1114
    if num_read < 2 {
753
        // FIXME: this string was localized in the original; localize it
754
4
        return Err(LoadingError::XmlParseError(String::from(
755
            "Input file is too short",
756
        )));
757
    }
758

            
759
1110
    let buf = buffered.peek_buffer();
760
1109
    assert!(buf.len() >= 2);
761
2213
    if buf[0..2] == [GZ_MAGIC_0, GZ_MAGIC_1] {
762
5
        let decomp = ZlibDecompressor::new(ZlibCompressorFormat::Gzip);
763
5
        let converter = ConverterInputStream::new(&buffered, &decomp);
764
5
        Ok(converter.upcast::<InputStream>())
765
5
    } else {
766
1104
        Ok(buffered.upcast::<InputStream>())
767
    }
768
1111
}
769

            
770
#[cfg(test)]
771
mod tests {
772
    use super::*;
773

            
774
    #[test]
775
2
    fn parses_processing_instruction_data() {
776
        let mut r =
777
1
            parse_xml_stylesheet_processing_instruction("foo=\"bar\" baz=\"beep\"").unwrap();
778
2
        r.sort_by(|a, b| a.0.cmp(&b.0));
779

            
780
2
        assert_eq!(
781
            r,
782
2
            vec![
783
1
                ("baz".to_string(), "beep".to_string()),
784
1
                ("foo".to_string(), "bar".to_string())
785
            ]
786
        );
787
2
    }
788
}