1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 """
16 This module defines three classes, extending the xml.etree.ElementTree
17 module:
18
19 - Tree: Linguistic trees, with support for:
20 convenient nested indexing t[1,1,1] instead of t[1][1][1];
21 leaves() to return the trees leaf nodes;
22 ...
23
24 - ParentedTree: Each element keeps track of a single parent
25 pointer, returned by the element's `parent()` method. Elements
26 may have at most one parent; i.e., subtrees may not be shared.
27 Any attempt to do use a single element as a child for multiple
28 parents will generate a ValueError.
29
30 - MultiParentedTree: Each element keeps track of a list of
31 parent pointers, returned by the element's `parents()` method.
32 Elements may have zero or more parients; i.e., subtrees may be
33 shraed. If a single Element is used as multiple children of the
34 same parent, then that parent will appear multiple times in the
35 parents list.
36
37 Note: Mixing of etree implementations is not supported. I.e., you
38 should never construct a tree that combines elements from ParentedTree
39 with elements from MultiParentedTree, or elements from either of these
40 implementations with elements from any other implementation. Doing so
41 may result in incorrect parent pointers and ValueError exceptions.
42 """
43
44 from nltk_lite.etree import ElementTree as ET
45
46 __all__ = ['Tree',
47 'ParentedTree',
48 'MultiParentedTree']
49
50
51
52
53
55 """
56 An abstract base class for Elements.
57 - Adds original NLTK functionality.
58 """
59
61 return "<%s %s at %x>" % (self.__class__.__name__,
62 self.tag, id(self))
63
64
65
66
67
78
90
101
102
103
104
105
122
124 raise NotImplementedError
125
127 """
128 @return: The height of this tree. The height of a tree
129 containing no children is 1; the height of a tree
130 containing only leaves is 2; and the height of any other
131 tree is one plus the maximum of its children's
132 heights.
133 @rtype: C{int}
134 """
135 max_child_height = 0
136 for child in self:
137 try:
138 max_child_height = max(max_child_height, child.height())
139 except AttributeError:
140 max_child_height = max(max_child_height, 1)
141 return 1 + max_child_height
142
144 raise NotImplementedError
145
147 raise NotImplementedError
148
150 """
151 Generate all the subtrees of this tree, optionally restricted
152 to trees matching the filter function.
153 @type: filter: C{function}
154 @param: filter: the function to filter all local trees
155 """
156 if not filter or filter(self):
157 yield self
158 for child in self:
159 try:
160 for subtree in child.subtrees(filter):
161 yield subtree
162 except AttributeError:
163 pass
164
165 - def copy(self, deep=False):
166 raise NotImplementedError
167
168 - def freeze(self, leaf_freezer=None):
169 raise NotImplementedError
170
171
172
173
174
176 """
177 An abstract base class for (multi)parented element tree Elements.
178
179 - Whenever a new child is added, L{_setparent} is called,
180 which should update that child's parent pointer to point
181 at self.
182
183 - Whenever a child is removed, L{_delparent} is called, which
184 should remove the child's parent pointer to self.
185 """
187 return "<%s %s at %x>" % (self.__class__.__name__,
188 self.tag, id(self))
189
190
191
192
193
195 """
196 Update C{child}'s parent pointer to point to self.
197 """
198 raise AssertionError, 'Abstract base class'
199
201 """
202 Remove self from C{child}'s parent pointer.
203 """
204 raise AssertionError, 'Abstract base class'
205
206
207
208
209
210
211
215
219
224
229
233
237
238 - def insert(self, index, element):
241
245
250
251
253 """
254 A specialized version of etree.ElementTree.Element that keeps
255 track of a single parent pointer per element.
256
257 Each _ParentedElement may have at most one parent. In particular,
258 subtrees may not be shared. Any attempt to reuse a single
259 _ParentedElement as a child of more than one parent (or as
260 multiple children of the same parent) will cause a ValueError
261 exception to be raised.
262
263 _ParentedElements should never be used in the same tree as other
264 Element implementation classes. Mixing Element implementations
265 may result in incorrect parent pointers and in C{ValueError}
266 exceptions.
267 """
271
274
276 assert is_parented_element(element)
277 if element._parent is not None:
278 raise ValueError, '%r already has a parent' % element
279 element._parent = self
280
282 assert is_parented_element(element)
283 assert element._parent == self
284 element._parent = None
285
288
290 return ET.iselement(element) and hasattr(element, '_parent')
291
293 """
294 A specialized version of etree.ElementTree.Element that keeps
295 track of a list of parent pointers for each element.
296
297 Each _ParentedElement may have zero or more parents. In
298 particular, subtrees may be shared. If a single
299 _MultiParentedElement is used as multiple children of the same
300 parent, then that parent will appear multiple times in the parents
301 list.
302
303 _MultiParentedElements should never be used in the same tree as
304 other Element implementation classes. Mixing Element
305 implementations may result in incorrect parent pointers and in
306 C{ValueError} exceptions.
307 """
311
313 return tuple(self._parents)
314
318
323
326
328 return ET.iselement(element) and hasattr(element, '_parents')
329
330
331
332
334 """
335 Instances of this class can be used as drop-in replacements for
336 the xml.etree.ElementTree module.
337 """
339 self._Element = ElementClass
340
341 - def Element(self, tag, attrib={}, **extra):
345
346
347
348
349
350
351
352
359
360
361
362
363
364
365
366
367 SubElement = staticmethod(ET.SubElement)
368 QName = ET.QName
369 iselement = staticmethod(ET.iselement)
370 dump = staticmethod(ET.dump)
371 tostring= staticmethod(ET.tostring)
372
373
374
375
376
377
378
379
381 if element_factory is None:
382 element_factory = self._Element
383 return ET.TreeBuilder(element_factory)
384
389
390 XMLParser = XMLTreeBuilder
391
392
393
394
395
396
397
398
399
400
404 - def __init__(self, etbase, element, file):
407 - def parse(self, source, parser=None):
408 if not parser:
409 parser = self.__default_parser_class()
410 ET.ElementTree.parse(self, source, parser)
411
412
413
414
415
416
417
418
419
420
421
422
427
434
435 PI = ProcessingInstruction
436
437 - def parse(self, source, parser=None):
441
442 - def XML(self, text):
446
457
458 fromstring = XML
459
460
461 Tree = ElementTreeImplementation(_AbstractElement)
462 ParentedTree = ElementTreeImplementation(_ParentedElement)
463 MultiParentedTree = ElementTreeImplementation(_MultiParentedElement)
464
465
488 print spine(table)
489
490 def spine_with_attribs(elt):
491 if elt is None: return []
492 label = elt.tag
493 if elt.attrib:
494 attrib = ['%s=%s' % i for i in elt.attrib.items()]
495 label += '<%s>' % ', '.join(attrib)
496 return [label] + spine_with_attribs(elt.parent())
497 print spine_with_attribs(table)
498
499 print PT.tostring(sent)
500 del sent[1][1][1]
501
502 print PT.tostring(sent)
503
504
505 if __name__ == '__main__':
506 demo()
507