Source code for udapi.cli

#!/usr/bin/env python3
import os
import gc
import sys
import atexit
import logging
import argparse

from udapi.core.run import Run

# Parse command line arguments.
argparser = argparse.ArgumentParser(
    formatter_class=argparse.RawTextHelpFormatter,
    usage="udapy [optional_arguments] scenario",
    epilog="See http://udapi.github.io",
    description="udapy - Python interface to Udapi - API for Universal Dependencies\n\n"
                "Examples of usage:\n"
                "  udapy -s read.Sentences udpipe.En < in.txt > out.conllu\n"
                "  udapy -T < sample.conllu | less -R\n"
                "  udapy -HAM ud.MarkBugs < sample.conllu > bugs.html\n")
argparser.add_argument(
    "-q", "--quiet", action="store_true",
    help="Warning, info and debug messages are suppressed. Only fatal errors are reported.")
argparser.add_argument(
    "-v", "--verbose", action="store_true",
    help="Warning, info and debug messages are printed to the STDERR.")
argparser.add_argument(
    "-s", "--save", action="store_true",
    help="Add write.Conllu to the end of the scenario")
argparser.add_argument(
    "-T", "--save_text_mode_trees", action="store_true",
    help="Add write.TextModeTrees color=1 to the end of the scenario")
argparser.add_argument(
    "-H", "--save_html", action="store_true",
    help="Add write.TextModeTreesHtml color=1 to the end of the scenario")
argparser.add_argument(
    "-A", "--save_all_attributes", action="store_true",
    help="Add attributes=form,lemma,upos,xpos,feats,deprel,misc (to be used after -T and -H)")
argparser.add_argument(
    "-C", "--save_comments", action="store_true",
    help="Add print_comments=1 (to be used after -T and -H)")
argparser.add_argument(
    "-M", "--marked_only", action="store_true",
    help="Add marked_only=1 to the end of the scenario (to be used after -T and -H)")
argparser.add_argument(
    "-N", "--no_color", action="store_true",
    help="Add color=0 to the end of the scenario, this overrides color=1 of -T and -H")
argparser.add_argument(
    "-X", "--extra", action="append",
    help="Add a specified parameter (or a block name) to the end of the scenario\n"
         "For example 'udapy -TNX attributes=form,misc -X layout=align < my.conllu'")
argparser.add_argument(
    "--gc", action="store_true",
    help="By default, udapy disables Python garbage collection and at-exit cleanup\n"
         "to speed up everything (especially reading CoNLL-U files). In edge cases,\n"
         "when processing many files and running out of memory, you can disable this\n"
         "optimization (i.e. enable garbage collection) with 'udapy --gc'.")
argparser.add_argument(
    'scenario', nargs=argparse.REMAINDER, help="A sequence of blocks and their parameters.")


# Process and provide the scenario.
[docs] def main(argv=None): args = argparser.parse_args(argv) # Set the level of logs according to parameters. if args.verbose: level = logging.DEBUG elif args.quiet: level = logging.CRITICAL else: level = logging.INFO logging.basicConfig(format='%(asctime)-15s [%(levelname)7s] %(funcName)s - %(message)s', level=level) # Global flag to track if an unhandled exception occurred _unhandled_exception_occurred = False def _custom_excepthook(exc_type, exc_value, traceback): global _unhandled_exception_occurred _unhandled_exception_occurred = True # Call the default excepthook to allow normal error reporting sys.__excepthook__(exc_type, exc_value, traceback) # Override the default excepthook sys.excepthook = _custom_excepthook # Disabling garbage collections makes the whole processing much faster. # Similarly, we can save several seconds by partially disabling the at-exit Python cleanup # (atexit hooks are called in reversed order of their registration, # so flushing stdio buffers etc. will be still done before the os._exit(0) call). # See https://instagram-engineering.com/dismissing-python-garbage-collection-at-instagram-4dca40b29172 # Is it safe to disable GC? # OS will free the memory allocated by this process after it ends anyway. # The udapy wrapper is aimed for one-time tasks, not a long-running server, # so in a typical case a document is loaded and almost no memory is freed before the end. # Udapi documents have a many cyclic references, so running GC is quite slow. if not args.gc: gc.disable() # When an exception/error has happened, udapy should exit with a non-zero exit code, # so that users can use `udapy ... || echo "Error detected"` (or Makefile reports errors). # However, we cannot use `atexit.register(lambda: os._exit(1 if sys.exc_info()[0] else 0))` # because the Python has already exited the exception-handling block # (the exception/error has been already reported and sys.exc_info()[0] is None). # We thus keep record whether _unhandled_exception_occurred. atexit.register(lambda: os._exit(1 if _unhandled_exception_occurred else 0)) atexit.register(sys.stderr.flush) if args.save: args.scenario = args.scenario + ['write.Conllu'] if args.save_text_mode_trees: args.scenario = args.scenario + ['write.TextModeTrees', 'color=1'] if args.save_html: args.scenario = args.scenario + ['write.TextModeTreesHtml', 'color=1'] if args.save_all_attributes: args.scenario = args.scenario + ['attributes=form,lemma,upos,xpos,feats,deprel,misc'] if args.save_comments: args.scenario = args.scenario + ['print_comments=1'] if args.marked_only: args.scenario = args.scenario + ['marked_only=1'] if args.no_color: args.scenario = args.scenario + ['color=0'] if args.extra: args.scenario += args.extra runner = Run(args) # udapy is often piped to head etc., e.g. # `seq 1000 | udapy -s read.Sentences | head` # Let's prevent Python from reporting (with distracting stacktrace) # "BrokenPipeError: [Errno 32] Broken pipe" try: runner.execute() except BrokenPipeError: pass return 0
if __name__ == "__main__": sys.exit(main())