Source code for udapi.block.ud.gl.to2

"""Block ud.gl.To2 UD_Galician-specific conversion of UDv1 to UDv2

Author: Martin Popel
"""
from udapi.core.block import Block

ADP_HEAD_PREFERENCES = {
    'NOUN': 10,
    'PRON': 9,
    'ADJ': 8,
    'VERB': 8,
    'PUNCT': -10,
}


[docs] class To2(Block): """Block for fixing the remaining cases (before ud.Convert1to2) in UD_Galician."""
[docs] def process_node(self, node): # UD_Galician v1.4 uses incorrectly deprel=cop not for the copula verb, # but for its complement (typically ADJ) and also copula is the head. if node.deprel == 'cop': copula = node.parent # In UDv2 discussions it has been decided that only a limited set of verbs # can be annotated as copula. For Spanish, "estar" was questionable, but accepted. # I guess in Galician it is the same. The rest (considerar, resultar, quedar,...) # should not be annotated as copulas. Luckily, in UD_Galician v1.4 they are # governing the clause, so no change of topology is needed, just deprel=xcomp. if copula.lemma in ('ser', 'estar'): node.parent = copula.parent for cop_child in copula.children: cop_child.parent = node copula.parent = node node.deprel = copula.deprel copula.deprel = 'cop' else: node.deprel = 'xcomp' # Prepositions should depend on the noun, not vice versa. # This is easy to fix, but unfortunatelly, there are many nodes with deprel=case # which are not actually prepostions or case markes, but standard NOUNs, VERBs etc. # These are left as ToDo. if node.deprel == 'case' and node.children: if node.upos not in ('ADP', 'CONJ', 'PART'): node.misc['ToDo'] = 'case-upos' else: children = sorted(node.children, key=lambda n: -ADP_HEAD_PREFERENCES.get(n.upos, 0)) children[0].parent = node.parent node.parent = children[0] for child in children[1:]: child.parent = children[0] # Punctuation should have no children. if node.deprel == 'punct' and node.children and node.upos == 'PUNCT': children = sorted(node.children, key=lambda n: -ADP_HEAD_PREFERENCES.get(n.upos, 0)) children[0].parent = node.parent node.parent = children[0] for child in children[1:]: child.parent = children[0]