Source code for udapi.block.ud.setspaceafterfromtext

"""Block SetSpaceAfterFromText for setting of SpaceAfter=No according to the sentence text.

Usage:
udapy -s ud.SetSpaceAfterFromText < in.conllu > fixed.conllu

Author: Martin Popel
"""
import logging

from udapi.core.block import Block


[docs] class SetSpaceAfterFromText(Block): """Block for setting of the SpaceAfter=No MISC attribute according to the sentence text."""
[docs] def process_tree(self, root): # Empty nodes cannot have 'SpaceAfter=No', so make sure the file is valid. for empty_node in root.empty_nodes: del empty_node.misc['SpaceAfter'] text = root.text if text is None: raise ValueError('Tree %s has no text, cannot use ud.SetSpaceAfterFromText' % root) if text == root.compute_text(): return for node in root.token_descendants: if text.startswith(node.form): text = text[len(node.form):] if not text or text[0].isspace(): del node.misc['SpaceAfter'] text = text.lstrip() else: node.misc['SpaceAfter'] = 'No' else: logging.warning('Node %s does not match text "%s"', node, text[:20]) return if text: logging.warning('Extra text "%s" in tree %s', text, root)