Source code for udapi.block.corefud.miscstats

from udapi.core.block import Block
from collections import Counter
import re

[docs] class MiscStats(Block): """Block corefud.MiscStats prints 10 most frequent values of each attribute stored in the MISC field""" def __init__(self, maxvalues=10, **kwargs): """Create the corefud.MiscStats Args: maxvalues: the number of most frequent values to be printed for each attribute. """ super().__init__(**kwargs) self.maxvalues = maxvalues self.valuecounter = {} self.totalcounter = Counter()
[docs] def process_node(self,node): for attrname in node.misc: shortattrname = re.sub(r'\[\d+\]',r'',attrname) if not shortattrname in self.valuecounter: self.valuecounter[shortattrname] = Counter() self.valuecounter[shortattrname][node.misc[attrname]] += 1 self.totalcounter[shortattrname] += 1
[docs] def process_end(self): for attrname in self.valuecounter: print() print(attrname+"\t"+str(self.totalcounter[attrname])) for value,freq in self.valuecounter[attrname].most_common(self.maxvalues): print("\t"+str(value)+"\t"+str(freq))