Source code for recipe.utils

from __future__ import print_function

import importlib
import inspect
import re
import string
import unicodedata
from functools import wraps

import attr
import sqlalchemy.orm
import sqlparse
from faker import Faker
from faker.providers import BaseProvider
from sqlalchemy.engine.default import DefaultDialect
from sqlalchemy.sql.functions import FunctionElement
from sqlalchemy.sql.sqltypes import Date, DateTime, NullType, String

from recipe.compat import basestring, integer_types, str

# only expose the printing sql function
__all__ = ["prettyprintable_sql", "clean_unicode", "FakerAnonymizer", "FakerFormatter"]


def recipe_arg(*args):
    """Decorator for recipe builder arguments.

    Promotes builder pattern by returning self.
    """

    def decorator(func):
        @wraps(func)
        def wrapper(self, *_args, **_kwargs):
            from recipe import Recipe, RecipeExtension, BadRecipe

            if isinstance(self, Recipe):
                recipe = self
            elif isinstance(self, RecipeExtension):
                recipe = self.recipe
            else:
                raise BadRecipe(
                    "recipe_arg can only be applied to"
                    "methods of Recipe or RecipeExtension"
                )

            if recipe._query is not None:
                recipe.reset()

            func(self, *_args, **_kwargs)
            return recipe

        return wrapper

    return decorator


class TestProvider(BaseProvider):
    """A demo faker provider for testing string providers"""

    def foo(self):
        return "foo"


class StringLiteral(String):
    """ Teach SA how to literalize various things. """

    def literal_processor(self, dialect):
        super_processor = super(StringLiteral, self).literal_processor(dialect)

        def process(value):
            if isinstance(value, integer_types):
                return str(value)
            if not isinstance(value, basestring):
                value = str(value)
            result = super_processor(value)
            if isinstance(result, bytes):
                result = result.decode(dialect.encoding)
            return result

        return process


def prettyprintable_sql(statement, dialect=None, reindent=True):
    """
    Generate an SQL expression string with bound parameters rendered inline
    for the given SQLAlchemy statement. The function can also receive a
    `sqlalchemy.orm.Query` object instead of statement.

    WARNING: Should only be used for debugging. Inlining parameters is not
             safe when handling user created data.
    """
    if isinstance(statement, sqlalchemy.orm.Query):
        if dialect is None:
            dialect = statement.session.get_bind().dialect
        statement = statement.statement

    # Generate a class that can handle encoding
    if dialect:
        DialectKlass = dialect.__class__
    else:
        DialectKlass = DefaultDialect

    class LiteralDialect(DialectKlass):
        colspecs = {
            # prevent various encoding explosions
            String: StringLiteral,
            # teach SA about how to literalize a datetime
            DateTime: StringLiteral,
            Date: StringLiteral,
            # don't format py2 long integers to NULL
            NullType: StringLiteral,
        }

    compiled = statement.compile(
        dialect=LiteralDialect(), compile_kwargs={"literal_binds": True}
    )
    return sqlparse.format(str(compiled), reindent=reindent)


WHITESPACE_RE = re.compile(r"\s+", flags=re.DOTALL | re.MULTILINE)


def replace_whitespace_with_space(s):
    """ Replace multiple whitespaces with a single space. """
    return WHITESPACE_RE.sub(" ", s)


def clean_unicode(value):
    try:
        cleaned_value = str(value)
    except UnicodeEncodeError:
        cleaned_value = unicodedata.normalize("NFKD", value).encode("ascii", "ignore")
        if not cleaned_value:
            raise ValueError("Could not find useful chars in the string")
    return cleaned_value


class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self


def disaggregate(expr):
    if isinstance(expr, FunctionElement):
        return expr.clause_expr
    else:
        return expr


class FakerFormatter(string.Formatter):
    """A formatter that can get values from Faker generators."""

    def format_field(self, obj, format_spec):
        """

        :param obj: A faker Faker
        :param format_spec: a generator
        :return: A string generated by
        """
        generator = format_spec
        kwargs = {}
        if "|" in format_spec:
            try:
                newgenerator, potential_kwargs = format_spec.split("|")
                for part in potential_kwargs.split(","):
                    k, v = part.split("=")
                    if v == "None":
                        v = None
                    elif v == "True":
                        v = True
                    elif v == "False":
                        v = False
                    elif v.isdigit():
                        v = int(v)
                    kwargs[k] = v
                generator = newgenerator
            except ValueError:
                # If more than one "|"  don't try to parse
                # If the kwargs aren't of form x=y then don't try to parse
                pass

        value = None
        if callable(getattr(obj, generator)):
            c = getattr(obj, generator)
            argspec = inspect.getargspec(c)
            if len(argspec.args) == 1:
                value = getattr(obj, generator)()
            elif kwargs:
                value = getattr(obj, generator)(**kwargs)
            else:
                value = c

        if value is not None and not isinstance(value, basestring):
            value = str(value)
        return value or "Unknown fake generator"


[docs]@attr.s class FakerAnonymizer(object): """Returns a deterministically generated fake value that depends on the input value. """ format_str = attr.ib() postprocessor = attr.ib() locale = attr.ib(default="en_US") postprocessor = attr.ib(default=None) providers = attr.ib(default=None) def __attrs_post_init__(self): self.fake = Faker(self.locale) self.formatter = FakerFormatter() for p in self._clean_providers(self.providers): self.fake.add_provider(p) def _clean_providers(self, providers): """Convert a list of anonymizer providers into classes suitable for adding with faker.add_provider""" if not providers: return [] if not isinstance(providers, (list, tuple)): providers = [providers] cleaned_providers = [] for provider in providers: if isinstance(provider, basestring): # dynamically import the provider parts = provider.split(".") if len(parts) > 1: _module = ".".join(parts[:-1]) _provider_class = parts[-1] try: _mod = importlib.import_module(_module) _provider = getattr(_mod, _provider_class, None) if _provider is None: # TODO: log an issue, provider not found in module continue elif not issubclass(_provider, BaseProvider): # TODO: log an issue, provider not generator continue else: cleaned_providers.append(_provider) except ImportError: # TODO: log an issue, can't import module continue elif inspect.isclass(provider) and issubclass(provider, BaseProvider): cleaned_providers.append(provider) else: # TODO: log an issue, provider is not an importable string # or a ProviderBase continue return cleaned_providers def __call__(self, value): self.fake.seed_instance(hash(value)) value = self.formatter.format(self.format_str, fake=self.fake) if self.postprocessor is None: return value else: return self.postprocessor(value)