Source code for udapi.block.corefud.guessspan

from udapi.core.block import Block

[docs] class GuessSpan(Block): """Block corefud.GuessSpan heuristically fills mention spans, while keeping mention.head"""
[docs] def process_coref_mention(self, mention): mwords = mention.head.descendants(add_self=True) # TODO add heuristics from corefud.PrintMentions almost_forest=1 # Add empty nodes that are causing gaps. # A node "within the span" whose enhanced parent is in the mentions # must be added to the mention as well. # "within the span" includes also empty nodes "on the boundary". # However, don't add empty nodes which are in a gap cause by non-empty nodes. to_add = [] min_ord = int(mwords[0].ord) if mwords[0].is_empty() else mwords[0].ord - 1 max_ord = int(mwords[-1].ord) + 1 root = mention.head.root for empty in root.empty_nodes: if empty in mwords: continue if empty.ord > max_ord: break if empty.ord > min_ord: if any(enh['parent'] in mwords for enh in empty.deps): to_add.append(empty) elif empty.ord > min_ord + 1 and empty.ord < max_ord - 1: prev_nonempty = root.descendants[int(empty.ord) - 1] next_nonempty = root.descendants[int(empty.ord)] if prev_nonempty in mwords and next_nonempty in mwords: to_add.append(empty) #else: empty.misc['Mark'] = f'not_in_treelet_of_{mention.entity.eid}' mention.words = sorted(mwords + to_add)