Source code for udapi.block.corefud.fixcorefud02

from udapi.core.block import Block
import udapi.core.coref
import itertools

NEW_ETYPE = {
    "misc": "other",
    "date": "time",
    "loc": "place",
    "location": "place",
    "per": "person",
    "org": "organization",
    "_": "",
    }

[docs] class FixCorefUD02(Block): """Fix errors in CorefUD 0.2 for release of CorefUD 1.0."""
[docs] def process_document(self, doc): # For GUM if doc.meta['global.Entity'] == 'entity-GRP-infstat-MIN-coref_type-identity': doc.meta['global.Entity'] = 'eid-etype-head-other-infstat-minspan-identity' for entity in doc.coref_entities: if entity.etype: # Harmonize etype. # If gen/spec is distinguished, store it in all mentions' other['gstype']. etype = entity.etype.lower() if etype.startswith('spec') or etype.startswith('gen'): gstype = 'gen' if etype.startswith('gen') else 'spec' for m in entity.mentions: m.other['gstype'] = gstype if etype == 'spec': etype = 'other' etype = etype.replace('gen', '').replace('spec', '').replace('.', '') etype = NEW_ETYPE.get(etype, etype) # etype="APPOS" is used only in NONPUBL-CorefUD_English-OntoNotes. # Apposition is a mention-based rather than entity-based attribute. # We don't know which of the mentions it should be assigned, but let's expect all non-first. # UD marks appositions with deprel appos, so once someone checks it is really redunant, # TODO we can delete the appos mention attribute. if etype == 'appos': etype = '' for mention in entity.mentions[1:]: mention.other['appos'] = '1' entity.etype = etype for mention in entity.mentions: # Harmonize bridge relation labels for bridge in mention.bridging: rel = bridge.relation.lower() if rel.endswith('-inv'): rel = 'i' + rel.replace('-inv', '') rel = rel.replace('-', '') rel = rel.replace('indirect_', '') bridge.relation = rel