Source code for udapi.block.write.vislcg

"""Vislcg class is a writer for the VISL-cg format."""
from udapi.core.basewriter import BaseWriter

# https://dev.w3.org/html5/html-author/charref
ESCAPE_TABLE = {
    '§': '§sect.',
    '"': '§quot.',
    '#': '§num.',
    ';': '§semi.',
    '=': '§equals.',
    '(': '§lpar.',
    ')': '§rpar.',
    '|': '§verbar.',
}


[docs] class Vislcg(BaseWriter): """A writer of files in the VISL-cg format, suitable for VISL Constraint Grammer Parser. See https://visl.sdu.dk/visl/vislcg-doc.html Usage: ``udapy write.Vislcg < in.conllu > out.vislcg`` Example output:: "<Қыз>" "қыз" n nom @nsubj #1->3 "<оның>" "ол" prn pers p3 sg gen @nmod:poss #2->3 "<қарындасы>" "қарындас" n px3sp nom @parataxis #3->8 "е" cop aor p3 sg @cop #4->3 "<,>" "," cm @punct #5->8 "<ол>" "ол" prn pers p3 sg nom @nsubj #6->8 "<бес>" "бес" num @nummod #7->8 "<жаста>" "жас" n loc @root #8->0 "е" cop aor p3 sg @cop #9->8 "<.>" "." sent @punct #10->8 Example input:: # text = Қыз оның қарындасы, ол бес жаста. 1 Қыз қыз _ n nom 3 nsubj _ _ 2 оның ол _ prn pers|p3|sg|gen 3 nmod:poss _ _ 3-4 қарындасы _ _ _ _ _ _ _ _ 3 қарындасы қарындас _ n px3sp|nom 8 parataxis _ _ 4 _ е _ cop aor|p3|sg 3 cop _ _ 5 , , _ cm _ 8 punct _ _ 6 ол ол _ prn pers|p3|sg|nom 8 nsubj _ _ 7 бес бес _ num _ 8 nummod _ _ 8-9 жаста _ _ _ _ _ _ _ _ 8 жаста жас _ n loc 0 root _ _ 9 _ е _ cop aor|p3|sg 8 cop _ _ 10 . . _ sent _ 8 punct _ _ """
[docs] def process_tree(self, tree): # Print the line with forms and optional upos tags and feats. for token in tree.token_descendants: print('"<%s>"' % self._escape(token.form)) words = token.words print('\t' + self._node(words[0])) for nonfirst_mwt_word in words[1:]: print('\t\t' + self._node(nonfirst_mwt_word)) print('')
@staticmethod def _escape(string): return ''.join(ESCAPE_TABLE.get(c, c) for c in string) def _node(self, node): attrs = ['"%s"' % self._escape(node.lemma)] attrs.append(self._escape(node.xpos)) feats = str(node.feats).replace('|', ' ') if feats != '_': attrs.append(self._escape(feats)) attrs.append('@' + node.deprel) attrs.append('#%d->%d' % (node.ord, node.parent.ord)) return ' '.join(attrs)