Source code for udapi.block.read.addsentences

"""AddSentences class is a reader for adding plain-text sentences."""
from udapi.core.basereader import BaseReader

# pylint: disable=abstract-method
# read_tree() does not need to be implemented here


[docs] class AddSentences(BaseReader): """A reader for adding plain-text sentences (one sentence per line) files. The sentences are added to an existing trees. This is useful, e.g. if there are the original raw texts in a separate file: `cat in.conllu | udapy -s read.Conllu read.AddSentences files=in.txt > merged.conllu` """ def __init__(self, zone='', into='text', **kwargs): """Args: into: name of the comment-attribute where the sentence should be stored. Default = text. That is the sentence is stored in `root.text` and in CoNLL-U it will look like e.g. `# text = John loves Mary.` Any other name than "text" is stored to `root.comment`, so e.g. `into=english_text` will result in a CoNLL-U with a comment line: `# english_text = John loves Mary.` """ super().__init__(zone=zone, **kwargs) self.into = into
[docs] @staticmethod def is_multizone_reader(): """Can this reader read bundles which contain more zones?. This implementation returns always False. """ return False
[docs] def process_document(self, document): filehandle = self.filehandle if filehandle is None: filehandle = self.next_filehandle() if filehandle is None: self.finished = True return for bundle in document.bundles: line = self.filehandle.readline() if line == '': raise IOError('File does not have enough lines') root = bundle.get_tree(zone=self.zone) if self.into == 'text': root.text = line.rstrip() else: root.comment += ' ' + self.into + " = " + line.rstrip() + "\n" self.finished = not self.files.has_next_file()