Source code for normalize.visitor

#
# This file is a part of the normalize python library
#
# normalize is free software: you can redistribute it and/or modify
# it under the terms of the MIT License.
#
# normalize is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# MIT License for more details.
#
# You should have received a copy of the MIT license along with
# normalize.  If not, refer to the upstream repository at
# http://github.com/hearsaycorp/normalize
#

from __future__ import absolute_import

import collections
import types

from normalize.coll import Collection
import normalize.exc as exc
from normalize.record import Record
from normalize.selector import FieldSelector
from normalize.selector import MultiFieldSelector


[docs]class Visitor(object): """The Visitor object represents a single recursive visit in progress. You hopefully shouldn't have to sub-class this class for most use cases; just VisitorPattern. """
[docs] def __init__(self, unpack_func, apply_func, collect_func, reduce_func, apply_empty_slots=False, extraneous=False, ignore_empty_string=False, ignore_none=True, visit_filter=None, filter=None): """Create a new Visitor object. Generally called by a front-end class method of :py:class:`VisitorPattern` There are four positional arguments, which specify the particular functions to be used during the visit. The important options from a user of a visitor are the keyword arguments: ``apply_empty_slots=``\ *bool* If set, then your ``apply`` method (or ``reverse``, etc) will be called even if there is no corresponding value in the input. Your method will receive the Exception as if it were the value. ``extraneous=``\ *bool* Also call the apply method on properties marked *extraneous*. False by default. ``ignore_empty_string=``\ *bool* If the 'apply' function returns the empty string, treat it as if the slot or object did not exist. ``False`` by default. ``ignore_none=``\ *bool* If the 'apply' function returns ``None``, treat it as if the slot or object did not exist. ``True`` by default. ``visit_filter=``\ *MultiFieldSelector* This supplies an instance of :py:class:`normalize.selector.MultiFieldSelector`, and restricts the operation to the matched object fields. Can also be specified as just ``filter=`` """ self.unpack = unpack_func self.apply = apply_func self.collect = collect_func self.reduce = reduce_func self.apply_empty_slots = apply_empty_slots self.extraneous = extraneous self.ignore_empty_string = ignore_empty_string self.ignore_none = ignore_none if visit_filter is None: visit_filter = filter if isinstance(visit_filter, (MultiFieldSelector, types.NoneType)): self.visit_filter = visit_filter else: self.visit_filter = MultiFieldSelector(*visit_filter) self.seen = set() # TODO self.cue = list()
def is_filtered(self, prop): return (not self.extraneous and prop.extraneous) or ( self.visit_filter and not self.visit_filter[self.cue + [prop.name]] ) @property def field_selector(self): return FieldSelector(self.cue) def push(self, what): self.cue.append(what) def pop(self, what=None): if what is not None: assert(self.cue[-1] == what) return self.cue.pop()
[docs] def copy(self): """Be sure to implement this method when sub-classing, otherwise you will lose any specialization context.""" doppel = type(self)( self.unpack, self.apply, self.collect, self.reduce, apply_empty_slots=self.apply_empty_slots, extraneous=self.extraneous, ignore_empty_string=self.ignore_empty_string, ignore_none=self.ignore_none, visit_filter=self.visit_filter, ) for x in self.cue: doppel.push(x) doppel.seen = self.seen return doppel
[docs]class VisitorPattern(object): """Base Class for writing Record visitor pattern classes. These classes are not instantiated, and consist purely of class methods. There are three visitors supplied by default, which correspond to typical use for IO (:py:meth:`normalize.visitor.VisitorPattern.visit` for output, and :py:meth:`normalize.visitor.VisitorPattern.cast` for input), and for providing a centralized type catalogue (:py:meth:`normalize.visitor.VisitorPattern.reflect`). ============= =========== ============= =================================== ``visit`` ``cast`` ``reflect`` Description ============= =========== ============= =================================== ``unpack`` ``grok`` ``scantypes`` Defines how to get a property value from the thing being walked, and a generator for the collection. ``apply`` ``reverse`` ``propinfo`` Conversion for individual values ``aggregate`` ``collect`` ``itemtypes`` Combine collection results ``reduce`` ``produce`` ``typeinfo`` Combine apply results ============= =========== ============= =================================== To customize what is emitted, sub-class ``VisitorPattern`` and override the class methods of the conversion you are interested in. For many simple IO use cases, you might need only to override are ``apply`` and ``reverse``, if that. The versions for ``visit`` are documented the most thoroughly, as these are the easiest to understand and the ones most users will be customizing. The documentation for the other methods describes the differences between them and their ``visit`` counterpart. """ Visitor = Visitor @classmethod
[docs] def visit(cls, value, value_type=None, **kwargs): """A value visitor, which visits instances (typically), applies :py:meth:`normalize.visitor.VisitorPattern.apply` to every attribute slot, and returns the reduced result. Like :py:func:`normalize.diff.diff`, this function accepts a series of keyword arguments, which are passed through to :py:class:`normalize.visitor.Visitor`. This function also takes positional arguments: ``value=``\ *object* The value to visit. Normally (but not always) a :py:class:`normalize.record.Record` instance. ``value_type=``\ *RecordType* This is the ``Record`` subclass to interpret ``value`` as. The default is ``type(value)``. If you specify this, then the type information on ``value`` is essentially ignored (with the caveat mentioned below on :py:meth:`Visitor.map_prop`), and may be a ``dict``, ``list``, etc. ``**kwargs`` Visitor options accepted by :py:meth:`normalize.visitor.Visitor.__init__`. """ visitor = cls.Visitor( cls.unpack, cls.apply, cls.aggregate, cls.reduce, **kwargs) if not value_type: value_type = type(value) if not issubclass(value_type, Record): raise TypeError( "Cannot visit %s instance" % value_type.__name__ ) return cls.map(visitor, value, value_type)
@classmethod
[docs] def unpack(cls, value, value_type, visitor): """Unpack a value during a 'visit' args: ``value=``\ *object* The instance being visited ``value_type=``\ *RecordType* The expected type of the instance ``visitor=``\ *Visitor* The context/options returns a tuple with two items: ``get_prop=``\ *function* This function should take a :py:class:`normalize.property.Property` instance, and return the slot from the value, or raise ``AttributeError`` or ``KeyError`` if the slot is empty. ``get_item=``\ *generator* This generator should return the tuple protocol used by :py:class:`normalize.coll.Collection`: (K, V) where K can be an ascending integer (for sequences), V (for sets), or something hashable like a string (for dictionaries/maps) """ if issubclass(value_type, Collection): try: generator = value.itertuples() except AttributeError: if isinstance(value, value_type.colltype): generator = value_type.coll_to_tuples(value) else: raise exc.VisitorUnpackError( value=repr(value), colltype=value_type.colltype.__name__, fs=visitor.field_selector, ) else: generator = None return (lambda prop: prop.__get__(value)), generator
@classmethod
[docs] def apply(cls, value, prop, visitor): """'apply' is a general place to put a function which is called on every extant record slot. This is usually the most important function to implement when sub-classing. The default implementation passes through the slot value as-is, but expected exceptions are converted to ``None``. args: ``value=``\ *value*\ \|\ *AttributeError*\ \|\ *KeyError* This is the value currently in the slot, or the Record itself with the ``apply_records`` visitor option. *AttributeError* will only be received if you passed ``apply_empty_slots``, and *KeyError* will be passed if ``parent_obj`` is a ``dict`` (see :py:meth:`Visitor.map_prop` for details about when this might happen) ``prop=``\ *Property*\ \|\ ``None`` This is the :py:class:`normalize.Property` instance which represents the field being traversed. ``visitor=``\ *Visitor* This object can be used to inspect parameters of the current run, such as options which control which kinds of values are visited, which fields are being visited and where the function is in relation to the starting point. """ return ( None if isinstance(value, (AttributeError, KeyError)) else value )
@classmethod
[docs] def aggregate(self, mapped_coll_generator, coll_type, visitor): """Hook called for each normalize.coll.Collection, after mapping over each of the items in the collection. The default implementation calls :py:meth:`normalize.coll.Collection.tuples_to_coll` with ``coerce=False``, which just re-assembles the collection into a native python collection type of the same type of the input collection. args: ``result_coll_generator=`` *generator func* Generator which returns (key, value) pairs (like :py:meth:`normalize.coll.Collection.itertuples`) ``coll_type=``\ *CollectionType* This is the :py:class:`normalize.coll.Collection`-derived *class* which is currently being reduced. ``visitor=``\ *Visitor* Context/options object """ return coll_type.tuples_to_coll(mapped_coll_generator, coerce=False)
@classmethod
[docs] def reduce(self, mapped_props, aggregated, value_type, visitor): """This reduction is called to combine the mapped slot and collection item values into a single value for return. The default implementation tries to behave naturally; you'll almost always get a dict back when mapping over a record, and list or some other collection when mapping over collections. If the collection has additional properties which are not ignored (eg, not extraneous, not filtered), then the result will be a dictionary with the results of mapping the properties, and a 'values' key will be added with the result of mapping the items in the collection. args: ``mapped_props=``\ *generator* Iterating over this generator will yield K, V pairs, where K is **the Property object** and V is the mapped value. ``aggregated=``\ *object* This contains whatever ``aggregate`` returned, normally a list. ``value_type=``\ *RecordType* This is the type which is currently being reduced. A :py:class:`normalize.record.Record` subclass ``visitor=``\ *Visitor* Contenxt/options object. """ reduced = None if mapped_props: reduced = dict((k.name, v) for k, v in mapped_props) if issubclass(value_type, Collection) and aggregated is not None: if all(visitor.is_filtered(prop) for prop in value_type.properties.values()): reduced = aggregated else: if reduced.get("values", False): raise exc.VisitorTooSimple( fs=visitor.field_selector, value_type_name=value_type.__name__, visitor=type(self).__name__, ) else: reduced['values'] = aggregated return reduced # CAST versions
@classmethod
[docs] def cast(cls, value_type, value, visitor=None, **kwargs): """Cast is for visitors where you are visiting some random data structure (perhaps returned by a previous ``VisitorPattern.visit()`` operation), and you want to convert back to the value type. This function also takes positional arguments: ``value_type=``\ *RecordType* The type to cast to. ``value=``\ *object* ``visitor=``\ *Visitor.Options* Specifies the visitor options, which customizes the descent and reduction. """ if visitor is None: visitor = cls.Visitor( cls.grok, cls.reverse, cls.collect, cls.produce, **kwargs) return cls.map(visitor, value, value_type) # hooks for types which define what is considered acceptable input for # given contexts during 'cast' # # note: Collection.coll_to_tuples will generally allow you to pass # collections as a list or a dict with the *values* being the members of # the set, so this code allows this.
grok_mapping_types = collections.Mapping grok_coll_types = (collections.Sequence, collections.Mapping) @classmethod
[docs] def grok(cls, value, value_type, visitor): """Like :py:meth:`normalize.visitor.VisitorPattern.unpack` but called for ``cast`` operations. Expects to work with dictionaries and lists instead of Record objects. Reverses the transform performed in :py:meth:`normalize.visitor.VisitorPattern.reduce` for collections with properties. If you pass tuples to ``isa`` of your Properties, then you might need to override this function and throw ``TypeError`` if the passed ``value_type`` is not appropriate for ``value``. """ is_coll = issubclass(value_type, Collection) is_record = any(not visitor.is_filtered(prop) for prop in value_type.properties.values()) if is_record and not isinstance(value, cls.grok_mapping_types): raise exc.VisitorGrokRecordError( val=repr(value), record_type=value_type, record_type_name=value_type.__name__, field_selector=visitor.field_selector, ) values = value if is_coll and is_record: try: if "values" in value: values = value['values'] except TypeError: pass generator = None if is_coll: if not isinstance(values, cls.grok_coll_types): raise exc.VisitorGrokCollectionError( val=repr(values), record_type=value_type, record_type_name=value_type.__name__, field_selector=visitor.field_selector, ) generator = value_type.coll_to_tuples(values) return (lambda prop: value[prop.name]), generator
@classmethod
[docs] def reverse(cls, value, prop, visitor): """Like :py:meth:`normalize.visitor.VisitorPattern.apply` but called for ``cast`` operations. The default implementation passes through but squashes exceptions, just like apply. """ return ( None if isinstance(value, (AttributeError, KeyError)) else value )
@classmethod
[docs] def collect(cls, mapped_coll_generator, coll_type, visitor): """Like :py:meth:`normalize.visitor.VisitorPattern.aggregate`, but coerces the mapped values to the collection item type on the way through. """ return coll_type.tuples_to_coll(mapped_coll_generator)
@classmethod
[docs] def produce(cls, mapped_props, aggregated, value_type, visitor): """Like :py:meth:`normalize.visitor.VisitorPattern.reduce`, but constructs instances rather than returning plain dicts. """ kwargs = {} if not mapped_props else dict( (k.name, v) for k, v in mapped_props ) if issubclass(value_type, Collection): kwargs['values'] = aggregated return value_type(**kwargs) # versions which walk type objects
@classmethod
[docs] def reflect(cls, X, **kwargs): """Reflect is for visitors where you are exposing some information about the types reachable from a starting type to an external system. For example, a front-end, a REST URL router and documentation framework, an avro schema definition, etc. X can be a type or an instance. This API should be considered **experimental** """ if isinstance(X, type): value = None value_type = X else: value = X value_type = type(X) if not issubclass(value_type, Record): raise TypeError("Cannot reflect on %s" % value_type.__name__) visitor = cls.Visitor( cls.scantypes, cls.propinfo, cls.itemtypes, cls.typeinfo, **kwargs) return cls.map(visitor, value, value_type)
@classmethod
[docs] def scantypes(cls, value, value_type, visitor): """Like :py:meth:`normalize.visitor.VisitorPattern.unpack`, but returns a getter which just returns the property, and a collection getter which returns a set with a single item in it. """ item_type_generator = None if issubclass(value_type, Collection): def get_item_types(): if isinstance(value_type.itemtype, tuple): # not actually supported by Collection yet, but whatever for vt in value_type.itemtype: yield (vt, vt) else: yield value_type.itemtype, value_type.itemtype item_type_generator = get_item_types() return (lambda prop: prop), item_type_generator
@classmethod
[docs] def propinfo(cls, value, prop, visitor): """Like :py:meth:`normalize.visitor.VisitorPattern.apply`, but takes a property and returns a dict with some basic info. The default implementation returns just the name of the property and the type in here. """ rv = {"name": prop.name} if prop.valuetype: if isinstance(prop.valuetype, tuple): rv['type'] = [typ.__name__ for typ in prop.valuetype] else: rv['type'] = prop.valuetype.__name__ return rv
@classmethod
[docs] def itemtypes(cls, mapped_types, coll_type, visitor): """Like :py:meth:`normalize.visitor.VisitorPattern.aggregate`, but returns . This will normally only get called with a single type. """ rv = list(v for k, v in mapped_types) return rv[0] if len(rv) == 1 else rv
@classmethod
[docs] def typeinfo(cls, propinfo, type_parameters, value_type, visitor): """Like :py:meth:`normalize.visitor.VisitorPattern.reduce`, but returns the final dictionary to correspond to a type definition. The default implementation returns just the type name, the list of properties, and the item type for collections. """ propspec = dict((prop.name, info) for prop, info in propinfo) ts = {'name': value_type.__name__} if propspec: ts['properties'] = propspec if type_parameters: ts['itemtype'] = type_parameters return ts # sentinel iteration stopper
[docs] class StopVisiting(object): """This sentinel value may be returned by a custom implementation of ``unpack`` (or ``grok``, or ``scantypes``) to indicate that the descent should be stopped immediately, instead of proceeding to descend into sub-properties. It can be passed a literal value to use as the mapped value as a single constructor argument, or the class itself returned to indicate no mapped value.""" return_value = None def __init__(self, return_value): self.return_value = return_value # methods-in-common
@classmethod
[docs] def map(cls, visitor, value, value_type): """The common visitor API used by all three visitor implementations. args: ``visitor=``\ *Visitor* Visitor options instance: contains the callbacks to use to implement the visiting, as well as traversal & filtering options. ``value=``\ *Object* Object being visited ``value_type=``\ *RecordType* The type object controlling the visiting. """ unpacked = visitor.unpack(value, value_type, visitor) if unpacked == cls.StopVisiting or isinstance( unpacked, cls.StopVisiting ): return unpacked.return_value if isinstance(unpacked, tuple): props, coll = unpacked else: props, coll = unpacked, None # recurse into values for collections if coll: coll_map_generator = cls.map_collection( visitor, coll, value_type, ) mapped_coll = visitor.collect( coll_map_generator, value_type, visitor, ) else: mapped_coll = None # recurse into regular properties mapped_props = None if props: mapped_props = cls.map_record(visitor, props, value_type) return visitor.reduce( mapped_props, mapped_coll, value_type, visitor, )
@classmethod def map_record(cls, visitor, get_value, record_type): rv = visitor.copy() # expensive? for name, prop in record_type.properties.iteritems(): if rv.is_filtered(prop): continue rv.push(name) try: value = get_value(prop) except AttributeError as ae: value = ae except KeyError as ke: value = ke except Exception as e: rv.pop(name) raise exc.VisitorPropError( exception=e, prop=prop, prop_name=name, record_type_name=record_type.__name__, fs=rv.field_selector, ) if visitor.apply_empty_slots or not isinstance( value, (KeyError, AttributeError), ): mapped = cls.map_prop(rv, value, prop) if mapped is None and rv.ignore_none: pass elif mapped == "" and rv.ignore_empty_string: pass else: yield prop, mapped rv.pop(name) @classmethod def map_collection(cls, visitor, coll_generator, coll_type): rv = visitor.copy() for key, value in coll_generator: rv.push(key) mapped = cls.map(rv, value, coll_type.itemtype) rv.pop(key) if mapped is None and visitor.ignore_none: pass elif mapped == "" and visitor.ignore_empty_string: pass else: yield key, mapped @classmethod def map_prop(cls, visitor, value, prop): mapped = None # XXX - this fallback here is type-unsafe, and exists only for # those who don't declare their isa= for complex object types. value_type = prop.valuetype or type(value) if isinstance(value_type, tuple): mapped = cls.map_type_union( visitor, value, value_type, prop, ) elif issubclass(value_type, Record): mapped = cls.map(visitor, value, value_type) else: mapped = visitor.apply(value, prop, visitor) return mapped @classmethod def map_type_union(cls, visitor, value, type_tuple, prop): # This corner-case method applies when visiting a value and # ncountering a type union in the ``Property.valuetype`` field. # # this code has the same problem that record_id does; that is, it # doesn't know which of the type union the value is. # # the solution this function uses is to try all of them, until one of # them returns something logically true. Handlers (ie, unpack/grok) # can also protest by raising TypeError, and the next one will be # tried. record_types = [] matching_record_types = [] for value_type in type_tuple: if issubclass(value_type, Record): record_types.append(value_type) # XXX - this test here should probably be a per-visitor # hook, as it only really applies to 'visit', not 'grok' if isinstance(value, value_type): matching_record_types.append(value_type) mapped = None if matching_record_types: for value_type in matching_record_types: try: mapped = cls.map(visitor, value, value_type) except TypeError: pass else: if mapped: break else: for value_type in record_types: try: mapped = cls.map(visitor, value, value_type) except TypeError: pass else: # this could also be the wrong thing when mapping # over types. if mapped: break if not mapped: mapped = visitor.apply(value, prop, visitor) return mapped