Source code for udapi.core.bundle

"""Bundle class represents one sentence."""

import re

from udapi.core.root import Root
from udapi.block.write.textmodetrees import TextModeTrees

VALID_ZONE_REGEX = re.compile("^[a-z-]*(_[A-Za-z0-9-]+)?$")


[docs] class Bundle(object): """Bundle represents one sentence in an UD document. A bundle contains one or more trees. More trees are needed e.g. in case of parallel treebanks where each tree represents a translation of the sentence in a different languages. Trees in one bundle are distinguished by a zone label. """ __slots__ = ["trees", "number", "_bundle_id", "_document"] def __init__(self, bundle_id=None, document=None): self.trees = [] self._bundle_id = bundle_id self._document = document @property def bundle_id(self): """ID of this bundle.""" return self._bundle_id @bundle_id.setter def bundle_id(self, bundle_id): self._bundle_id = bundle_id if len(self.trees) == 1 and self.trees[0].zone == '': self.trees[0]._sent_id = bundle_id else: for tree in self.trees: tree._sent_id = bundle_id + '/' + tree.zone # pylint: disable=protected-access def __str__(self): if self._bundle_id is None: return 'bundle without id' return f"bundle id='{self._bundle_id}'" def __iter__(self): return iter(self.trees) @property def document(self): """Returns the document in which the bundle is contained.""" return self._document
[docs] def get_tree(self, zone=''): """Returns the tree root whose zone is equal to zone.""" trees = [tree for tree in self.trees if tree.zone == zone] if len(trees) == 1: return trees[0] elif len(trees) == 0: raise Exception("No tree with zone=" + zone + " in the bundle") else: raise Exception("More than one tree with zone=" + zone + " in the bundle")
[docs] def has_tree(self, zone=''): """Does this bundle contain a tree with a given zone?""" for tree in self.trees: if tree.zone == zone: return True return False
[docs] def create_tree(self, zone=None): """Return the root of a newly added tree with a given zone.""" root = Root() root._zone = zone self.add_tree(root) return root
[docs] def check_zone(self, new_zone): """Raise an exception if the zone is invalid or already exists.""" if not VALID_ZONE_REGEX.match(new_zone): raise ValueError("'{}' is not a valid zone name ({})".format( new_zone, VALID_ZONE_REGEX.pattern)) if new_zone == 'all': raise ValueError("'all' cannot be used as a zone name") if new_zone in [x.zone for x in self.trees]: raise Exception("Tree with zone '%s' already exists in %s" % (new_zone, self))
[docs] def add_tree(self, root): """Add an existing tree to the bundle.""" if root.zone is None: root._zone = '' self.check_zone(root.zone) if self._bundle_id: root._sent_id = self._bundle_id if root.zone: root._sent_id += '/' + root.zone root.bundle = self self.trees.append(root) doc_json = root.json.get('__doc__') if doc_json: self._document.json.update(doc_json) del root.json['__doc__'] return root
[docs] def remove(self): """Remove a bundle from the document.""" self._document.bundles = [bundle for bundle in self._document.bundles if bundle != self] for i, bundle in enumerate(self._document.bundles): bundle.number = i
[docs] def address(self): """Return bundle_id or '?' if missing.""" return self._bundle_id if self._bundle_id is not None else '?'
[docs] def draw(self, **kwargs): """Pretty print the trees using TextModeTrees.""" TextModeTrees(**kwargs).process_bundle(self)
@property def nodes(self): """An iterator over all nodes (excluding empty nodes) in all trees in this bundle.""" for tree in self: # tree.descendants is slightly slower than tree._descendants, # but it seems safer, see the comment in udapi.core.block.Block.process_tree(). for node in tree.descendants: yield node