Source code for udapi.block.corefud.marksamesubspan

from udapi.core.block import Block
import udapi.core.coref
import itertools

[docs] class MarkSameSubSpan(Block): """Find mentions with the same subspan.""" def __init__(self, same_entity_only=False, both_discontinuous=False, print_form=False, nested_only=False, log=True, mark=True, **kwargs): super().__init__(**kwargs) self.same_entity_only = same_entity_only self.both_discontinuous = both_discontinuous self.nested_only = nested_only self.print_form = print_form self.log = log self.mark = mark def _print(self, mention): if self.print_form: return mention.entity.eid + ':' + ' '.join([w.form for w in mention.words]) else: return mention.entity.eid + ':' + mention.span
[docs] def process_tree(self, tree): mentions = set() for node in tree.descendants_and_empty: for m in node.coref_mentions: mentions.add(m) if len(mentions) > 1: for mA, mB in itertools.combinations(mentions, 2): if self.same_entity_only and mA.entity != mB.entity: continue if self.both_discontinuous and (',' not in mA.span or ',' not in mB.span): continue sA, sB = set(mA.words), set(mB.words) if self.nested_only and not (sA <= sB) and not (sB <= sA): continue if not set(mA.span.split(',')).intersection(set(mB.span.split(','))): continue if self.mark: for w in mA.words + mB.words: w.misc['Mark'] = 1 mA.words[0].misc['Mark'] = f"{self._print(mA)}+{self._print(mB)}" if self.log: print(f"same-subspan mentions at {tree.sent_id}: {self._print(mA)} + {self._print(mB)}")