StreamReader.vala 13.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/* ParserStream.vala
 *
 * Copyright (C) 2019  Daniel Espinosa <esodan@gmail.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 *
 * Authors:
 *      Daniel Espinosa <esodan@gmail.com>
 */
21
22
using Gee;

23
24
25
26
public errordomain GXml.StreamReaderError {
  INVALID_DOCUMENT_ERROR
}

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
/**
 * Parser using a on the fly-post-parsing technique
 *
 * This parser takes or creates a {@link Document}, search
 * and adds the root XML element as a {@link Element};
 * then search and add all children {@link Element}.
 *
 * The root element is added without any attribute and
 * without any content or children; the same hapends with all
 * children.
 *
 * After call {@link read} or {@link read_document},
 * Root's and its children's content and its attributes
 * are stored as a string in a {@link GLib.MemoryOutputStream}
 * object at {@link GXml.Element.read_buffer}.
 *
 * If you want all attributes and children's children,
 * you should call {@link GXml.Element.parse_buffer},
 * which execute children's {@link GXml.Element.parse_buffer}
 * all asyncronically.
 */
48
public class GXml.StreamReader : GLib.Object {
49
  uint8[] buf = new uint8[2];
50
  Gee.HashMap<string,GXml.Collection> root_collections = new Gee.HashMap<string,GXml.Collection> ();
51
52
  DataInputStream _stream = null;
  DomDocument _document = null;
53
  bool start = true;
54
55
56
57
  /**
   * The stream where data is read from
   * to parse and fill {@link GXml.Element.read_buffer}
   */
58
  public DataInputStream stream { get { return _stream; } }
59
60
61
  /**
   * Use it to cancel the parse and fill process
   */
62
  public Cancellable? cancellable { get; set; }
63
64
65
  /**
   * Current {@link DomDocument} used to read to.
   */
66
  public DomDocument document { get { return _document; } }
67
  /**
68
   * Creates a new {@link StreamReader} object.
69
   */
70
71
  public StreamReader (InputStream istream) {
    _stream = new DataInputStream (istream);
72
73
74
    buf[0] = '\0';
    buf[1] = '\0';
  }
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
  /**
   * Creates a new {@link StreamReader} object and
   * initialize {@link document} with given document
   */
  public StreamReader.for_document (InputStream istream, DomDocument document) {
    _stream = new DataInputStream (istream);
    buf[0] = '\0';
    buf[1] = '\0';
    _document = document;
  }
  /**
   * Reads the content of a stream to {@link document}.
   *
   * If {@link document} was not set, treates a new {@link DomDocument}
   *
   * Returns: {@link document}'s value
   */
  public DomDocument read () throws GLib.Error {
    if (_document == null) {
        _document = new Document ();
    }

    internal_read ();
    return _document;
  }
  /**
   * Use a {@link DomDocument} to initialize {@link document}
   * and parse its contents to
   */
  public void read_document (DomDocument doc) throws GLib.Error {
    _document = doc;
    internal_read ();
  }
108
109
110
111
  private inline uint8 read_byte () throws GLib.Error {
    buf[0] = stream.read_byte (cancellable);
    return buf[0];
  }
112
  private inline string read_upto (string str) throws GLib.Error {
113
114
115
116
117
118
119
120
    string bstr = stream.read_upto (str, -1, null, cancellable);
    return bstr;
  }
  private inline char cur_char () {
    return (char) buf[0];
  }
  private inline uint8 cur_byte () {
    return buf[0];
121
  }
122
  private void internal_read () throws GLib.Error {
123
124
125
126
127
128
129
    start = true;
    parse_doc_nodes ();
    read_root_element ();
    try {
      read_byte ();
    } catch {
        return;
130
    }
131
132
133
    parse_doc_nodes ();
  }

134
135
136
    public void parse_doc_nodes () throws GLib.Error
    {
        try {
137
            read_byte ();
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
        } catch {
            return;
        }
        while (true) {
            if (cur_char () != '<') {
                throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: expected '<' character"));
            }
            try {
                read_byte ();
            } catch {
                return;
            }
            if (is_space (cur_char ())) {
                throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: unexpected space character before node's name"));
            }
            if (cur_char () != '?' && cur_char () != '!') {
                return;
            }
            if (cur_char () == '?') {
                if (start) {
                    parse_xml_dec ();
                    start = false;
                    read_text_node ();
                    message ("Stoped at: %c", cur_char ());
                    continue;
                } else {
                    parse_pi_dec ();
                    read_text_node ();
                    message ("Stoped at: %c", cur_char ());
                    continue;
                }
            } else if (cur_char () == '!') {
                parse_comment_dec ();
                read_text_node ();
                message ("Stoped at: %c", cur_char ());
                continue;
            }
        }
176
    }
177

178
  private GXml.Element read_root_element () throws GLib.Error {
179
    return read_element (true);
180
  }
181
  private GXml.Element read_element (bool children, GXml.Element? parent = null) throws GLib.Error {
182
183
184
    if (parent != null) {
      if (!(parent is GXml.Object)) {
        throw new DomError.INVALID_NODE_TYPE_ERROR
185
                      (_("Parent '%s' is not implementing GXml.Object interface"), parent.get_type ().name ());
186
187
      }
    }
188
189
190
191
192
193
194
195
196
197
198
199
200
201
    GXml.Element e = null;
    var buf = new MemoryOutputStream.resizable ();
    var dbuf = new DataOutputStream (buf);
    var oname_buf = new MemoryOutputStream (new uint8[1024]);
    var name_buf = new DataOutputStream (oname_buf);

    dbuf.put_byte ('<');
    dbuf.put_byte (cur_byte ());

    name_buf.put_byte (cur_byte ());
    dbuf.put_byte (read_byte ());
    bool is_empty = false;
    while (cur_char () != '>') {
      if (is_space (cur_char ())) {
202
203
        break;
      }
204
205
206
207
208
209
210
      if (cur_char () == '/') {
        dbuf.put_byte (cur_char ());
        string rest = read_upto (">");
        dbuf.put_string (rest);
        dbuf.put_byte (read_byte ());
        is_empty = true;
        break;
211
      }
212
213
      name_buf.put_byte (cur_byte (), cancellable);
      dbuf.put_byte (read_byte ());
214
    }
215
    name_buf.put_byte ('\0', cancellable);
216
    if (document.document_element == null) {
217
      e = ((GXml.Document) document).search_root_element_property ();
218
219
220
221
222
223
224
    }
    if (e == null) {
      e = (GXml.Element) document.create_element ((string) oname_buf.get_data ());
      if (document.document_element == null) {
        document.append_child (e);
      }
    }
225
226
    if (document.document_element == e && parent == null) {
      foreach (ParamSpec pspec in
227
                ((GXml.Object) e).get_property_element_list ()) {
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
        if (!(pspec.value_type.is_a (typeof (Collection)))) continue;
        Collection col;
        Value vc = Value (pspec.value_type);
        e.get_property (pspec.name, ref vc);
        col = vc.get_object () as Collection;
        if (col == null) {
          col = GLib.Object.new (pspec.value_type,
                            "element", e) as Collection;
          vc.set_object (col);
          e.set_property (pspec.name, vc);
        }
        if (col.items_type == GLib.Type.INVALID
            || !(col.items_type.is_a (typeof (GXml.Object)))) {
          throw new DomError.INVALID_NODE_TYPE_ERROR
                      (_("Collection '%s' hasn't been constructed properly: items' type property was not set at construction time or set to invalid type"), col.get_type ().name ());
        }
        if (col.items_name == "" || col.items_name == null) {
          throw new DomError.INVALID_NODE_TYPE_ERROR
                      (_("Collection '%s' hasn't been constructed properly: items' name property was not set at construction time"), col.get_type ().name ());
        }
        if (col.element == null || !(col.element is GXml.Object)) {
          throw new DomError.INVALID_NODE_TYPE_ERROR
                      (_("Collection '%s' hasn't been constructed properly: element property was not set at construction time"), col.get_type ().name ());
        }
        if (!(col.element is GXml.Object)) {
          throw new DomError.INVALID_NODE_TYPE_ERROR
                      (_("Invalid object of type '%s' doesn't implement GXml.Object interface: can't be handled by the collection"), col.element.get_type ().name ());
        }
        root_collections.set (col.items_name.down (), col);
      }
    }
259
260
261
    e.read_buffer = buf;
    if (is_empty) {
      return e;
262
263
    }
    while (true) {
264
265
266
267
268
269
270
271
272
273
274
275
      read_byte ();
      if (cur_char () == '<') {
        read_byte ();
        if (cur_char () == '/') {
          dbuf.put_byte ('<');
          dbuf.put_byte (cur_byte ());
          string closetag = stream.read_upto (">", -1, null, cancellable);
          dbuf.put_string (closetag);
          dbuf.put_byte (read_byte ());
          if (closetag == (string) oname_buf.get_data ()) {
            return e;
          }
276
277
278
279
280
281
282
283
284
285
286
        } else if (children && parent == null) {
          GXml.Element ce = read_element (false, e);;
          var col = root_collections.get (ce.local_name.down ());
          if (col != null) {
            var cobj = GLib.Object.new (col.items_type,
                                  "owner-document", document) as Element;
            cobj.read_buffer = ce.read_buffer;
            e.append_child (cobj);
            col.append (cobj);
          } else {
            foreach (ParamSpec pspec in
287
                ((GXml.Object) e).get_property_element_list ()) {
288
289
290
291
292
293
294
295
296
297
298
299
300
301
              if (pspec.value_type.is_a (typeof (Collection))) continue;
              var obj = GLib.Object.new (pspec.value_type,
                                    "owner-document", document) as Element;
              if (obj.local_name.down ()
                     == ce.local_name.down ()) {
                Value v = Value (pspec.value_type);
                v.set_object (obj);
                e.set_property (pspec.name, v);
                obj.read_buffer = ce.read_buffer;
                ce = obj;
              }
            }
            e.append_child (ce);
          }
302
303
304
        } else {
          dbuf.put_byte ('<', cancellable);
          dbuf.put_byte (cur_byte (), cancellable);
305
        }
306
307
      } else {
        dbuf.put_byte (cur_byte (), cancellable);
308
309
310
      }
    }
  }
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
  private void parse_xml_dec () throws GLib.Error  {
    while (cur_char () != '>') {
      try {
        read_byte ();
      } catch {
          return;
      }
    }
    try {
      read_byte ();
    } catch {
        return;
    }
  }
  private void parse_comment_dec () throws GLib.Error  {
326
327
328
329
330
    try {
      read_byte ();
    } catch {
        return;
    }
331
332
333
334
335
336
337
338
339
    if (cur_char () != '-') {
        throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid comment declaration"));
    }
    read_byte ();
    if (cur_char () != '-') {
        throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid comment declaration"));
    }
    GLib.StringBuilder comment = new GLib.StringBuilder ("");
    read_byte ();
340
    while (cur_char () != '>') {
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
      comment.append_c (cur_char ());
      read_byte ();
      if (cur_char () == '-') {
          read_byte ();
          if (cur_char () == '-') {
            read_byte ();
            if (cur_char () == '-') {
              throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid comment declaration"));
            } else if (cur_char () == '>') {
              break;
            }
          }
          comment.append_c ('-');
      }
    }
    var c = document.create_comment (comment.str);
    document.append_child (c);
  }
  private void parse_pi_dec () throws GLib.Error
  {
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
    try {
      read_byte ();
    } catch {
        return;
    }
    GLib.StringBuilder str = new GLib.StringBuilder ("");
    while (!is_space (cur_char ())) {
      if (cur_char () == '?') {
          throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid Processing Instruccion's target declaration"));
      }
      str.append_c (cur_char ());
      try {
        read_byte ();
      } catch {
          return;
      }
    }
    string target = str.str;
    str.assign ("");
    while (cur_char () != '?') {
      str.append_c (cur_char ());
      try {
        read_byte ();
      } catch {
          return;
      }
    }
    var pi = document.create_processing_instruction (target, str.str);
    document.append_child (pi);
    try {
        read_byte ();
    } catch {
        return;
    }
    if (cur_char () != '>') {
      throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid Processing Instruccion's close declaration"));
    }
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
  }
  private void read_text_node () throws GLib.Error  {
    GLib.StringBuilder text = new GLib.StringBuilder ("");
    try {
      read_byte ();
    } catch {
        return;
    }
    if (!is_space (cur_char ())) {
        return;
    }
    while (is_space (cur_char ())) {
      text.append_c (cur_char ());
      try {
        read_byte ();
      } catch {
          return;
      }
    }

    var t = document.create_text_node (text.str);
    document.append_child (t);
420
  }
421
  private bool is_space (char c) {
422
    return c == 0x20 || c == 0x9 || c == 0xA || c == ' ' || c == '\t' || c == '\n';
423
424
  }
}