Source code for udapi.core.dualdict

"""DualDict is a dict with lazily synchronized string representation."""
import collections.abc
import copy


[docs] class DualDict(collections.abc.MutableMapping): """DualDict class serves as dict with lazily synchronized string representation. >>> ddict = DualDict('Number=Sing|Person=1') >>> ddict['Case'] = 'Nom' >>> str(ddict) 'Case=Nom|Number=Sing|Person=1' >>> ddict['NonExistent'] '' This class provides access to both * a structured (dict-based, deserialized) representation, e.g. {'Number': 'Sing', 'Person': '1'}, and * a string (serialized) representation of the mapping, e.g. `Number=Sing|Person=1`. There is a clever mechanism that makes sure that users can read and write both of the representations which are always kept synchronized. Moreover, the synchronization is lazy, so the serialization and deserialization is done only when needed. This speeds up scenarios where access to dict is not needed. A value can be deleted with any of the following three ways: >>> del ddict['Case'] >>> ddict['Case'] = None >>> ddict['Case'] = '' and it works even if the value was already missing. """ __slots__ = ['_string', '_dict'] def __init__(self, value=None, **kwargs): if value is not None and kwargs: raise ValueError('If value is specified, no other kwarg is allowed ' + str(kwargs)) self._dict = dict(**kwargs) self._string = None if value is not None: self.set_mapping(value) def __str__(self): if self._string is None: serialized = [] for name, value in sorted(self._dict.items(), key=lambda s: s[0].lower()): if value is True: serialized.append(name) else: serialized.append(f"{name}={value}") self._string = '|'.join(serialized) if serialized else '_' return self._string def _deserialize_if_empty(self): if not self._dict and self._string is not None and self._string != '_': for raw_feature in self._string.split('|'): namevalue = raw_feature.split('=', 1) if len(namevalue) == 2: name, value = namevalue else: name, value = namevalue[0], True self._dict[name] = value def __getitem__(self, key): self._deserialize_if_empty() return self._dict.get(key, '') def __setitem__(self, key, value): self._deserialize_if_empty() self._string = None if value is not None and value != '': self._dict[key] = value else: self.__delitem__(key) def __delitem__(self, key): self._deserialize_if_empty() try: del self._dict[key] self._string = None except KeyError: pass def __iter__(self): self._deserialize_if_empty() return self._dict.__iter__() def __len__(self): self._deserialize_if_empty() return len(self._dict) def __contains__(self, key): self._deserialize_if_empty() return self._dict.__contains__(key)
[docs] def clear(self): self._string = '_' self._dict.clear()
[docs] def copy(self): """Return a deep copy of this instance.""" return copy.deepcopy(self)
[docs] def set_mapping(self, value): """Set the mapping from a dict or string. If the `value` is None or an empty string, it is converted to storing string `_` (which is the CoNLL-U way of representing an empty value). If the `value` is a string, it is stored as is. If the `value` is a dict (or any instance of `collections.abc.Mapping`), its copy is stored. Other types of `value` raise an `ValueError` exception. """ if value is None: self.clear() elif isinstance(value, str): self._dict.clear() self._string = value if value != '' else '_' elif isinstance(value, collections.abc.Mapping): self._string = None self._dict = dict(value) else: raise ValueError("Unsupported value type " + str(value))