Source code for udapi.block.ud.fixpseudocop

"""Block to fix annotation of verbs that are currently treated as copulas
   but they should be treated as normal verbs (with secondary predication)
from udapi.core.block import Block
import logging
import re

[docs] class FixPseudoCop(Block): def __init__(self, lemmas, noncopaux=False, **kwargs): """Create the ud.FixPseudoCop block instance. Args: lemmas: comma-separated list of lemmas of the pseudocopulas that should be fixed noncopaux: do the same for non-copula auxiliaries with the given lemma """ super().__init__(**kwargs) self.lemmas = lemmas.split(',') self.noncopaux = noncopaux
[docs] def process_node(self, node): pseudocop = self.lemmas if node.lemma in pseudocop: # Besides spurious copulas, this block can be optionally used to fix spurious auxiliaries (if noncopaux is set). if node.udeprel == 'cop' or self.noncopaux and node.udeprel == 'aux': secpred = node.parent grandparent = secpred.parent node.parent = grandparent node.deprel = secpred.deprel secpred.parent = node secpred.deprel = "xcomp" ###!!! We should also take care of DEPS if they exist. # As a copula, the word was tagged AUX. Now it should be VERB. node.upos = "VERB" # Examine the children of the original parent. # Those that modify the clause should be re-attached to me. # Those that modify the word (noun, adjective) should stay there. for c in secpred.children: # obl is borderline. It could modify an adjective rather than a clause. # obj and iobj should not occur in copular clauses but it sometimes # occurs with pseudocopulas: "I declare him handsome." if re.match("(nsubj|csubj|advmod|advcl|obj|iobj|obl|aux|mark|punct|cc|expl|dislocated|vocative|discourse|parataxis)", c.udeprel): c.parent = node # Another possible error is that the word is tagged AUX without being attached as "cop" or "aux". elif self.noncopaux and node.upos == 'AUX': node.upos = 'VERB'