Source code for udapi.block.util.eval

"""Eval is a special block for evaluating code given by parameters."""
import collections
import pprint
import re

from udapi.core.block import Block

pp = pprint.pprint  # pylint: disable=invalid-name

# We need exec in this block and the variables this etc. are not unused but provided for the exec
# pylint: disable=exec-used,unused-variable


[docs] class Eval(Block): r"""Special block for evaluating code given by parameters. Tricks: `pp` is a shortcut for `pprint.pprint`. `$.` is a shortcut for `this.` which is a shortcut for `node.`, `tree.` etc. depending on context. `count_X` is a shortcut for `self.count[X]` where X is any string (\S+) and `self.count` is a `collections.Counter()` instance. Thus you can use code like `util.Eval node='count_$.upos +=1; count_"TOTAL" +=1' end="pp(self.count)"` """ # So many arguments is the design of this block (consistent with Perl Udapi). # pylint: disable=too-many-arguments,too-many-instance-attributes def __init__(self, doc=None, bundle=None, tree=None, node=None, start=None, end=None, before_doc=None, after_doc=None, before_bundle=None, after_bundle=None, coref_mention=None, coref_entity=None, empty_nodes=False, expand_code=True, **kwargs): super().__init__(**kwargs) self.doc = doc self.bundle = bundle self.tree = tree self.node = node self.start = start self.end = end self.before_doc = before_doc self.after_doc = after_doc self.before_bundle = before_bundle self.after_bundle = after_bundle self.coref_mention = coref_mention self.coref_entity = coref_entity self.empty_nodes = empty_nodes self.expand_code = expand_code self.count = collections.Counter()
[docs] def expand_eval_code(self, to_eval): """Expand '$.' to 'this.', useful for oneliners.""" if not self.expand_code: return to_eval to_eval = re.sub(r'count_(\S+)', r'self.count[\1]', to_eval) return to_eval.replace('$.', 'this.')
[docs] def before_process_document(self, document): if self.before_doc: this = doc = document exec(self.expand_eval_code(self.before_doc))
[docs] def after_process_document(self, document): if self.after_doc: this = doc = document exec(self.expand_eval_code(self.after_doc))
[docs] def process_document(self, document): this = doc = document if self.doc: exec(self.expand_eval_code(self.doc)) if self.bundle or self.before_bundle or self.after_bundle or self.tree or self.node: for bundle in doc.bundles: # TODO if self._should_process_bundle(bundle): self.process_bundle(bundle) if self.coref_entity or self.coref_mention: for entity in doc.coref_entities: if self.coref_entity: this = entity exec(self.expand_eval_code(self.coref_entity)) if self.coref_mention: for mention in entity.mentions: this = mention exec(self.expand_eval_code(self.coref_mention))
[docs] def process_bundle(self, bundle): # Extract variables, so they can be used in eval code document = doc = bundle.document this = bundle if self.before_bundle: exec(self.expand_eval_code(self.before_bundle)) if self.bundle: exec(self.expand_eval_code(self.bundle)) if self.tree or self.node: trees = bundle.trees for tree in trees: if self._should_process_tree(tree): self.process_tree(tree) if self.after_bundle: exec(self.expand_eval_code(self.after_bundle))
[docs] def process_tree(self, tree): # Extract variables so they can be used in eval code bundle = tree.bundle doc = document = bundle.document this = tree root = tree if self.tree: exec(self.expand_eval_code(self.tree)) if self.node: nodes = tree.descendants_and_empty if self.empty_nodes else tree.descendants for node in nodes: this = node exec(self.expand_eval_code(self.node))
[docs] def process_start(self): if self.start: exec(self.expand_eval_code(self.start))
[docs] def process_end(self): if self.end: exec(self.expand_eval_code(self.end))