Source code for udapi.block.util.eval

"""Eval is a special block for evaluating code given by parameters."""
import collections
import pprint
import re

from udapi.core.block import Block

pp = pprint.pprint  # pylint: disable=invalid-name

# We need exec in this block and the variables this etc. are not unused but provided for the exec
# pylint: disable=exec-used,unused-variable



[docs]
class Eval(Block):
    r"""Special block for evaluating code given by parameters.

    Tricks:
    `pp` is a shortcut for `pprint.pprint`.
    `$.` is a shortcut for `this.` which is a shortcut for `node.`, `tree.` etc.
    depending on context.
    `count_X` is a shortcut for `self.count[X]` where X is any string (\S+)
    and `self.count` is a `collections.Counter()` instance.
    Thus you can use code like

    `util.Eval node='count_$.upos +=1; count_"TOTAL" +=1' end="pp(self.count)"`
    """

    # So many arguments is the design of this block (consistent with Perl Udapi).
    # pylint: disable=too-many-arguments,too-many-instance-attributes
    def __init__(self, doc=None, bundle=None, tree=None, node=None, start=None, end=None,
                 before_doc=None, after_doc=None, before_bundle=None, after_bundle=None,
                 coref_mention=None, coref_entity=None, empty_nodes=False,
                 expand_code=True, **kwargs):
        super().__init__(**kwargs)
        self.doc = doc
        self.bundle = bundle
        self.tree = tree
        self.node = node
        self.start = start
        self.end = end
        self.before_doc = before_doc
        self.after_doc = after_doc
        self.before_bundle = before_bundle
        self.after_bundle = after_bundle
        self.coref_mention = coref_mention
        self.coref_entity = coref_entity
        self.empty_nodes = empty_nodes
        self.expand_code = expand_code
        self.count = collections.Counter()


[docs]
    def expand_eval_code(self, to_eval):
        """Expand '$.' to 'this.', useful for oneliners."""
        if not self.expand_code:
            return to_eval
        to_eval = re.sub(r'count_(\S+)', r'self.count[\1]', to_eval)
        return to_eval.replace('$.', 'this.')



[docs]
    def before_process_document(self, document):
        if self.before_doc:
            this = doc = document
            exec(self.expand_eval_code(self.before_doc))



[docs]
    def after_process_document(self, document):
        if self.after_doc:
            this = doc = document
            exec(self.expand_eval_code(self.after_doc))



[docs]
    def process_document(self, document):
        this = doc = document
        if self.doc:
            exec(self.expand_eval_code(self.doc))

        if self.bundle or self.before_bundle or self.after_bundle or self.tree or self.node:
            for bundle in doc.bundles:
                # TODO if self._should_process_bundle(bundle):
                self.process_bundle(bundle)

        if self.coref_entity or self.coref_mention:
            for entity in doc.coref_entities:
                if self.coref_entity:
                    this = entity
                    exec(self.expand_eval_code(self.coref_entity))
                if self.coref_mention:
                    for mention in entity.mentions:
                        this = mention
                        exec(self.expand_eval_code(self.coref_mention))



[docs]
    def process_bundle(self, bundle):
        # Extract variables, so they can be used in eval code
        document = doc = bundle.document
        this = bundle

        if self.before_bundle:
            exec(self.expand_eval_code(self.before_bundle))

        if self.bundle:
            exec(self.expand_eval_code(self.bundle))

        if self.tree or self.node:
            trees = bundle.trees
            for tree in trees:
                if self._should_process_tree(tree):
                    self.process_tree(tree)

        if self.after_bundle:
            exec(self.expand_eval_code(self.after_bundle))



[docs]
    def process_tree(self, tree):
        # Extract variables so they can be used in eval code
        bundle = tree.bundle
        doc = document = bundle.document
        this = tree
        root = tree

        if self.tree:
            exec(self.expand_eval_code(self.tree))

        if self.node:
            nodes = tree.descendants_and_empty if self.empty_nodes else tree.descendants
            for node in nodes:
                this = node
                exec(self.expand_eval_code(self.node))



[docs]
    def process_start(self):
        if self.start:
            exec(self.expand_eval_code(self.start))



[docs]
    def process_end(self):
        if self.end:
            exec(self.expand_eval_code(self.end))