Source code for udapi.core.tests.test_coref

#!/usr/bin/env python3

import os
import unittest
import udapi
from udapi.block.read.conllu import Conllu as ConlluReader


[docs] class TestCoref(unittest.TestCase):
[docs] def test_load(self): data_filename = os.path.join(os.path.dirname(__file__), 'data', 'fr-democrat-dev-sample.conllu') reader = ConlluReader(files=data_filename, split_docs=True) docs = reader.read_documents() self.assertEqual(len(docs), 2) docs[-1].draw() coref_entities = docs[-1].coref_entities self.assertEqual(len(coref_entities), 1) self.assertEqual(coref_entities[0].eid, 'e36781') node = next(docs[-1].nodes) self.assertEqual(len(node.coref_entities), 1) self.assertEqual(len(node.coref_mentions), 1) self.assertEqual(node.coref_entities[0], coref_entities[0]) self.assertEqual(docs[-1].meta["loaded_from"], data_filename)
[docs] def test_edits(self): data_filename = os.path.join(os.path.dirname(__file__), 'data', 'fr-democrat-dev-sample.conllu') doc = udapi.Document(data_filename) first_node = next(doc.nodes) second_node = first_node.next_node new_entity = doc.create_coref_entity(etype='person') self.assertEqual(new_entity.etype, 'person') self.assertEqual(len(new_entity.mentions), 0) m1 = new_entity.create_mention(words=[first_node]) # head will be automatically set to words[0] self.assertEqual(len(new_entity.mentions), 1) self.assertEqual(m1, new_entity.mentions[0]) self.assertEqual(m1.entity, new_entity) self.assertEqual(m1.head, first_node) self.assertEqual(m1.words, [first_node]) self.assertEqual(m1.span, '1') m1.words = [second_node, first_node, first_node] # intentional duplicates and wrong order self.assertEqual(m1.words, [first_node, second_node]) self.assertEqual(m1.span, '1-2') m1.head = second_node self.assertEqual(m1.head, second_node) m2 = new_entity.create_mention(head=second_node, span='1-3') # mention.words will be filled according to the span self.assertEqual(len(new_entity.mentions), 2) self.assertEqual(new_entity.mentions[0], m2) # 1-3 should go before 1-2 self.assertEqual(new_entity.mentions[1], m1) self.assertTrue(m2 < m1) self.assertEqual(m2.words, [first_node, second_node, second_node.next_node]) entity2 = doc.create_coref_entity() m1.entity = entity2 self.assertEqual(m1.entity.eid, entity2.eid) m2.entity = entity2 self.assertEqual(m2.entity.eid, entity2.eid)
if __name__ == "__main__": unittest.main()