Data Quality Report

[1]:
%cd ../../../src
/Users/valery/Documents/_code/arche/src
[2]:
from arche import *
[3]:
schema = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "definitions": {
        "float": {
            "pattern": "^-?[0-9]+\\.[0-9]{2}$"
        },
        "url": {
            "pattern": "^https?://(www\\.)?[a-z0-9.-]*\\.[a-z]{2,}([^<>%\\x20\\x00-\\x1f\\x7F]|%[0-9a-fA-F]{2})*$"
        }
    },
    "additionalProperties": False,
    "type": "object",
    "properties": {
        "category": {"type": "string", "tag": ["category"]},
        "price": {"type": "string", "pattern": "^£\d{2}.\d{2}$"},
        "_type": {"type": "string"},
        "description": {"type": "string"},
        "title": {"type": "string", "tag": ["unique"]},
        "_key": {"type": "string"}
    },
    "required": [
        "_key",
        "_type",
        "category",
        "description",
        "price",
        "title"
    ]
}
[4]:
a = Arche("381798/1/2", schema=schema)
[5]:
a.data_quality_report(bucket="files.scrapinghub.com")



https://files.scrapinghub.com.s3.amazonaws.com/reports/dqr/381798/Data%20Quality%20Report%20-%20books.html
[ ]: