import types import cPickle import mdp class _Walk(object): """Recursively crawl an object and search for attributes that are reference to numpy arrays, return a dictionary: {attribute_name: array_reference}. Usage: _Walk()(object) """ def __init__(self): self.arrays = {} self.start = None self.allobjs = {} def __call__(self, x, start = None): arrays = self.arrays # loop through the object dictionary for name in dir(x): # get the corresponding member obj = getattr(x, name) if id(obj) in self.allobjs.keys(): # if we already examined the member, skip to the next continue else: # add the id of this object to the list of know members self.allobjs[id(obj)] = None if start is None: # initialize a string structure to keep track of array names struct = name else: # struct is x.y.z (where x and y are objects and z an array) struct = '.'.join((start, name)) if isinstance(obj, mdp.numx.ndarray): # the present member is an array # add it to the dictionary of all arrays if start is not None: arrays[struct] = obj else: arrays[name] = obj elif name.startswith('__') or type(obj) in (int, long, float, types.MethodType): # the present member is a private member or a known # type that does not support arrays as attributes # Note: this is to avoid infinite # recursion in python2.6. Just remove the "or type in ..." # condition to see the error. There must be a better way. continue else: # we need to examine the present member in more detail arrays.update(self(obj, start = struct)) self.start = start return arrays def _format_dig(dict_): longest_name = max(map(len, dict_.keys())) longest_size = max(map(lambda x: len('%d'%x[0]), dict_.values())) msgs = [] total_size = 0 for name in sorted(dict_.keys()): size = dict_[name][0] total_size += size pname = (name+':').ljust(longest_name+1) psize = ('%d bytes' % size).rjust(longest_size+6) msg = "%s %s" % (pname, psize) msgs.append(msg) final = "Total %d arrays (%d bytes)" % (len(dict_), total_size) msgs.append(final) return '\n'.join(msgs) def dig_node(x): """Crawl recursively an MDP Node looking for arrays. Return (dictionary, string), where the dictionary is: { attribute_name: (size_in_bytes, array_reference)} and string is a nice string representation of it. """ if not isinstance(x, mdp.Node): raise Exception('Cannot dig %s' % (str(type(x)))) arrays = _Walk()(x) for name in arrays.keys(): ar = arrays[name] if len(ar.shape) == 0: size = 1 else: size = mdp.numx.prod(ar.shape) bytes = ar.itemsize*size arrays[name] = (bytes, ar) return arrays, _format_dig(arrays) def get_node_size(x): """Return node total byte-size using cPickle with protocol=2. The byte-size is related to the memory needed by the node). """ # TODO: add check for problematic node types, like NoiseNode? # TODO: replace this with sys.getsizeof for Python >= 2.6 size = len(cPickle.dumps(x, protocol = 2)) return size def get_node_size_str(x, si_units=False): """Return node total byte-size as a well readable string. si_units -- If True si-units like KB are used instead of KiB. The get_node_size function is used to get the size. """ return _memory_size_str(get_node_size(x), si_units=si_units) _SI_MEMORY_PREFIXES = ("", "k", "M", "G", "T", "P", "E") _IEC_MEMORY_PREFIXES = ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei") def _memory_size_str(size, si_units=False): """Convert the given memory size into a nicely formatted string. si_units -- If True si-units like kB are used instead of kiB. """ if si_units: base = 10**3 else: base = 2**10 scale = 0 # 1024**scale is the actual scale while size > base**(scale+1): scale += 1 unit = "B" if scale: size_str = size = "%.1f" % (1.0 * size / (base**scale)) if si_units: unit = _SI_MEMORY_PREFIXES[scale] + unit else: unit = _IEC_MEMORY_PREFIXES[scale] + unit else: size_str = "%d" % size return size_str + " " + unit