Source code for udapi.block.transform.proj

"""Block Proj for (pseudo-)projectivization of trees à la Nivre & Nilsson (2005).

See http://www.aclweb.org/anthology/P/P05/P05-1013.pdf.
This block tries to replicate Malt parser's projectivization:
http://www.maltparser.org/userguide.html#singlemalt_proj
http://www.maltparser.org/optiondesc.html#pproj-marking_strategy

TODO: implement also path and head+path strategies.

TODO: Sometimes it would be better (intuitively)
to lower the gap-node (if its whole subtree is in the gap
and if this does not cause more non-projectivities)
rather than to lift several nodes whose parent-edge crosses this gap.
We would need another label value (usually the lowering is of depth 1),
but the advantage is that reconstruction of lowered edges
during deprojectivization is simple and needs no heuristics.
"""
from udapi.core.block import Block


[docs] class Proj(Block): """Projectivize the trees à la Nivre & Nilsson (2005).""" def __init__(self, strategy='head', lifting_order='deepest', label='misc', **kwargs): """Create the Proj block object.""" super().__init__(**kwargs) self.lifting_order = lifting_order self.strategy = strategy self.label = label
[docs] def process_tree(self, tree): nonprojs = [self.nonproj_info(n) for n in tree.descendants if n.is_nonprojective()] for nonproj in sorted(nonprojs, key=lambda info: info[0]): self.lift(nonproj[1])
[docs] def nonproj_info(self, node): if self.lifting_order == 'shortest': return (abs(node.ord - node.parent.ord), node) orig_parent = node.parent node.parent = node.parent.parent depth = 1 while node.is_nonprojective(): node.parent = node.parent.parent depth += 1 node.parent = orig_parent return (-depth, node)
[docs] def lift(self, node): orig_parent = node.parent depth = 0 while node.is_nonprojective(): node.parent = node.parent.parent depth += 1 if depth == 0: return self.mark(node, orig_parent.udeprel)
[docs] def mark(self, node, label): if self.label == 'misc': node.misc['pproj'] = label elif self.label == 'deprel': node.deprel = '%s:%s+%s' % (node.udeprel, node.sdeprel, label) else: raise ValueError('Unknown parameter label=%s' % self.label)