phml.validate.validate

  1from re import match, split, sub
  2
  3from phml.nodes import All_Nodes, Comment, Element, Literal, Parent, Root, Text
  4
  5__all__ = [
  6    "validate",
  7    "parent",
  8    "literal",
  9    "generated",
 10    "has_property",
 11    "is_heading",
 12    "is_css_link",
 13    "is_css_style",
 14    "is_javascript",
 15    "is_element",
 16    "is_embedded",
 17    "is_interactive",
 18    "is_phrasing",
 19    "is_event_handler",
 20]
 21
 22
 23def validate(node: All_Nodes) -> bool:
 24    """Validate a node based on attributes and type."""
 25
 26    if hasattr(node, "children"):
 27        if not hasattr(node, "type"):
 28            raise AssertionError("Node should have a type")
 29
 30        if node.type not in ["root", "element"]:
 31            raise AssertionError(
 32                "Node should have a type of 'root' or 'element' to contain the 'children' attribute"
 33            )
 34
 35        if not all(isinstance(child, All_Nodes) for child in node.children):
 36            raise AssertionError("Children must be a node type")
 37
 38    if hasattr(node, "properties"):
 39        if hasattr(node, "type") and node.type != "element":
 40            raise AssertionError("Node must be of type 'element' to contain 'properties'")
 41
 42        if not all(isinstance(node[prop], (int, str)) for prop in node.properties):
 43            raise AssertionError("Node 'properties' must be of type 'int' or 'str'")
 44
 45    if hasattr(node, "value") and not isinstance(node.value, str):
 46        raise AssertionError("Node 'value' must be of type 'str'")
 47
 48    return True
 49
 50
 51def parent(node: Root | Element) -> bool:
 52    """Validate a parent node based on attributes and type."""
 53    if not issubclass(type(node), Parent):
 54        raise AssertionError(
 55            "Node must inherit from 'Parent'. 'Root' and 'Element' are most common."
 56        )
 57
 58    if not hasattr(node, "children") or node.children is None:
 59        raise AssertionError("Parent nodes should have the 'children' attribute")
 60
 61    if node.type == "element" and (not hasattr(node, "properties") or node.properties is None):
 62        raise AssertionError("Parent element node shoudl have the 'properties' element.")
 63
 64
 65def literal(node: Text | Comment) -> bool:
 66    """Validate a literal node based on attributes."""
 67
 68    if not issubclass(type(node), Literal):
 69        raise AssertionError(
 70            "Node must inherit from 'Literal'. 'Text' and 'Comment' are most common."
 71        )
 72
 73    if not hasattr(node, "value") or not isinstance(node.value, str):
 74        raise AssertionError("Literal nodes 'value' type should be 'str'")
 75
 76
 77def generated(node: All_Nodes) -> bool:
 78    """Checks if a node has been generated. A node is concidered
 79    generated if it does not have a position.
 80
 81    Args:
 82        node (All_Nodes): Node to check for position with.
 83
 84    Returns:
 85        bool: Whether a node has a position or not.
 86    """
 87    return not hasattr(node, "position") or node.position is None
 88
 89
 90def is_heading(node) -> bool:
 91    """Check if an element is a heading."""
 92
 93    if node.type == "element":
 94        if match(r"h[1-6]", node.tag) is not None:
 95            return True
 96        return False
 97    raise TypeError("Node must be an element.")
 98
 99
100def is_css_link(node) -> bool:
101    """Check if an element is a `link` to a css file.
102
103    Returns `true` if `node` is a `<link>` element with a `rel` list that
104    contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'`
105    as its `type`
106    """
107
108    return (
109        # Verify it is a element with a `link` tag
110        is_element(node, "link")
111        # Must have a rel list with stylesheet
112        and has_property(node, "rel")
113        and "stylesheet" in split(r" ", sub(r" +", " ", node["rel"]))
114        and (
115            # Can have a `type` of `text/css` or empty or no `type`
116            not has_property(node, "type")
117            or (has_property(node, "type") and (node["type"] == "text/css" or node["type"] == ""))
118        )
119    )
120
121
122def is_css_style(node) -> bool:
123    """Check if an element is a css `style` element.
124
125    Returns `true` if `node` is a `<style>` element that
126    has no `type`, an empty `type`, or `'text/css'` as its `type`.
127    """
128
129    return is_element(node, "style") and (
130        not has_property(node, "type")
131        or (has_property(node, "type") and (node["type"] == "" or node["type"] == "text/css"))
132    )
133
134
135def is_javascript(node) -> bool:
136    """Check if an element is a javascript `script` element.
137
138    Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no
139    `type` and a valid JavaScript `language`, or has neither.
140    """
141    return is_element(node, "script") and (
142        (
143            has_property(node, "type")
144            and node["type"] in ["text/ecmascript", "text/javascript"]
145            and not has_property(node, "language")
146        )
147        or (
148            has_property(node, "language")
149            and node["language"] in ["ecmascript", "javascript"]
150            and not has_property(node, "type")
151        )
152        or (not has_property(node, "type") and not has_property(node, "language"))
153    )
154
155
156def is_element(node, *conditions: str | list) -> bool:
157    """Checks if the given node is a certain element.
158
159    When providing a str it will check that the elements tag matches.
160    If a list is provided it checks that one of the conditions in the list
161    passes.
162    """
163
164    return bool(
165        node.type == "element"
166        and any(
167            bool(
168                (isinstance(condition, str) and node.tag == condition)
169                or (isinstance(condition, list) and any(node.tag == nested for nested in condition))
170            )
171            for condition in conditions
172        )
173    )
174
175
176def is_event_handler(attribute: str) -> bool:
177    """Takes a attribute name and returns true if
178    it starts with `on` and its length is `5` or more.
179    """
180    return attribute.startswith("on") and len(attribute) >= 5
181
182
183def has_property(node, attribute: str) -> bool:
184    """Check to see if an element has a certain property in properties."""
185    if node.type == "element":
186        if attribute in node.properties:
187            return True
188        return False
189    raise TypeError("Node must be an element.")
190
191
192def is_embedded(node: Element) -> bool:
193    """Check to see if an element is an embedded element.
194
195    Embedded Elements:
196
197    * audio
198    * canvas
199    * embed
200    * iframe
201    * img
202    * MathML math
203    * object
204    * picture
205    * SVG svg
206    * video
207
208    Returns:
209        True if emedded
210    """
211    # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video
212
213    return is_element(
214        node,
215        "audio",
216        "canvas",
217        "embed",
218        "iframe",
219        "img",
220        "math",
221        "object",
222        "picture",
223        "svg",
224        "video",
225    )
226
227
228def is_interactive(node: Element) -> bool:
229    """Check if the element is intended for user interaction.
230
231    Conditions:
232
233    * a (if the href attribute is present)
234    * audio (if the controls attribute is present)
235    * button, details, embed, iframe, img (if the usemap attribute is present)
236    * input (if the type attribute is not in the Hidden state)
237    * label, select, text, area, video (if the controls attribute is present)
238
239    Returns:
240        True if element is interactive
241    """
242
243    if is_element(node, "a"):
244        return has_property(node, "href")
245
246    if is_element(node, "input"):
247        return has_property(node, "type") and node["type"].lower() != "hidden"
248
249    if is_element(node, "img"):
250        return has_property(node, "usemap") and node["usemap"]
251
252    if is_element(node, "video"):
253        return has_property(node, "controls")
254
255    if is_element(node, "button", "details", "embed", "iframe", "label", "select", "textarea"):
256        return True
257
258    return False
259
260
261def is_phrasing(node: Element) -> bool:
262    """Check if a node is phrasing text according to
263    https://html.spec.whatwg.org/#phrasing-content-2.
264
265    Phrasing content is the text of the document, as well as elements that mark up that text at the
266    intra-paragraph level. Runs of phrasing content form paragraphs.
267
268    * area (if it is a descendant of a map element)
269    * link (if it is allowed in the body)
270    * meta (if the itemprop attribute is present)
271    * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn,
272     em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output,
273     picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg,
274     template, textarea, time, u, var, video, wbr, text (true)
275
276    Returns:
277        True if the element is phrasing text
278    """
279
280    if isinstance(node, Text):
281        return True
282
283    if is_element(node, "area"):
284        return node.parent is not None and is_element(node.parent, "map")
285
286    if is_element(node, "meta"):
287        return has_property(node, "itemprop")
288
289    if is_element(node, "link"):
290        body_ok = [
291            "dns-prefetch",
292            "modulepreload",
293            "pingback",
294            "preconnect",
295            "prefetch",
296            "preload",
297            "prerender",
298            "stylesheet",
299        ]
300
301        return bool(
302            has_property(node, "itemprop")
303            or (
304                has_property(node, "rel")
305                and all(token.strip() in body_ok for token in node["rel"].split(" "))
306            )
307        )
308
309    if is_element(
310        node,
311        "node",
312        "map",
313        "mark",
314        "math",
315        "audio",
316        "b",
317        "bdi",
318        "bdo",
319        "br",
320        "button",
321        "canvas",
322        "cite",
323        "code",
324        "data",
325        "datalist",
326        "del",
327        "dfn",
328        "em",
329        "embed",
330        "i",
331        "iframe",
332        "img",
333        "input",
334        "ins",
335        "kbd",
336        "label",
337        "a",
338        "abbr",
339        "meter",
340        "noscript",
341        "object",
342        "output",
343        "picture",
344        "progress",
345        "q",
346        "ruby",
347        "s",
348        "samp",
349        "script",
350        "select",
351        "slot",
352        "small",
353        "span",
354        "strong",
355        "sub",
356        "sup",
357        "svg",
358        "template",
359        "textarea",
360        "time",
361        "u",
362        "var",
363        "video",
364        "wbr",
365    ):
366        return True
367
368    return False
def validate( node: phml.nodes.root.Root | phml.nodes.element.Element | phml.nodes.text.Text | phml.nodes.comment.Comment | phml.nodes.doctype.DocType | phml.nodes.parent.Parent | phml.nodes.node.Node | phml.nodes.literal.Literal) -> bool:
24def validate(node: All_Nodes) -> bool:
25    """Validate a node based on attributes and type."""
26
27    if hasattr(node, "children"):
28        if not hasattr(node, "type"):
29            raise AssertionError("Node should have a type")
30
31        if node.type not in ["root", "element"]:
32            raise AssertionError(
33                "Node should have a type of 'root' or 'element' to contain the 'children' attribute"
34            )
35
36        if not all(isinstance(child, All_Nodes) for child in node.children):
37            raise AssertionError("Children must be a node type")
38
39    if hasattr(node, "properties"):
40        if hasattr(node, "type") and node.type != "element":
41            raise AssertionError("Node must be of type 'element' to contain 'properties'")
42
43        if not all(isinstance(node[prop], (int, str)) for prop in node.properties):
44            raise AssertionError("Node 'properties' must be of type 'int' or 'str'")
45
46    if hasattr(node, "value") and not isinstance(node.value, str):
47        raise AssertionError("Node 'value' must be of type 'str'")
48
49    return True

Validate a node based on attributes and type.

def parent(node: phml.nodes.root.Root | phml.nodes.element.Element) -> bool:
52def parent(node: Root | Element) -> bool:
53    """Validate a parent node based on attributes and type."""
54    if not issubclass(type(node), Parent):
55        raise AssertionError(
56            "Node must inherit from 'Parent'. 'Root' and 'Element' are most common."
57        )
58
59    if not hasattr(node, "children") or node.children is None:
60        raise AssertionError("Parent nodes should have the 'children' attribute")
61
62    if node.type == "element" and (not hasattr(node, "properties") or node.properties is None):
63        raise AssertionError("Parent element node shoudl have the 'properties' element.")

Validate a parent node based on attributes and type.

def literal(node: phml.nodes.text.Text | phml.nodes.comment.Comment) -> bool:
66def literal(node: Text | Comment) -> bool:
67    """Validate a literal node based on attributes."""
68
69    if not issubclass(type(node), Literal):
70        raise AssertionError(
71            "Node must inherit from 'Literal'. 'Text' and 'Comment' are most common."
72        )
73
74    if not hasattr(node, "value") or not isinstance(node.value, str):
75        raise AssertionError("Literal nodes 'value' type should be 'str'")

Validate a literal node based on attributes.

def generated( node: phml.nodes.root.Root | phml.nodes.element.Element | phml.nodes.text.Text | phml.nodes.comment.Comment | phml.nodes.doctype.DocType | phml.nodes.parent.Parent | phml.nodes.node.Node | phml.nodes.literal.Literal) -> bool:
78def generated(node: All_Nodes) -> bool:
79    """Checks if a node has been generated. A node is concidered
80    generated if it does not have a position.
81
82    Args:
83        node (All_Nodes): Node to check for position with.
84
85    Returns:
86        bool: Whether a node has a position or not.
87    """
88    return not hasattr(node, "position") or node.position is None

Checks if a node has been generated. A node is concidered generated if it does not have a position.

Arguments:
  • node (All_Nodes): Node to check for position with.
Returns:

bool: Whether a node has a position or not.

def has_property(node, attribute: str) -> bool:
184def has_property(node, attribute: str) -> bool:
185    """Check to see if an element has a certain property in properties."""
186    if node.type == "element":
187        if attribute in node.properties:
188            return True
189        return False
190    raise TypeError("Node must be an element.")

Check to see if an element has a certain property in properties.

def is_heading(node) -> bool:
91def is_heading(node) -> bool:
92    """Check if an element is a heading."""
93
94    if node.type == "element":
95        if match(r"h[1-6]", node.tag) is not None:
96            return True
97        return False
98    raise TypeError("Node must be an element.")

Check if an element is a heading.

def is_css_style(node) -> bool:
123def is_css_style(node) -> bool:
124    """Check if an element is a css `style` element.
125
126    Returns `true` if `node` is a `<style>` element that
127    has no `type`, an empty `type`, or `'text/css'` as its `type`.
128    """
129
130    return is_element(node, "style") and (
131        not has_property(node, "type")
132        or (has_property(node, "type") and (node["type"] == "" or node["type"] == "text/css"))
133    )

Check if an element is a css style element.

Returns true if node is a <style> element that has no type, an empty type, or 'text/css' as its type.

def is_javascript(node) -> bool:
136def is_javascript(node) -> bool:
137    """Check if an element is a javascript `script` element.
138
139    Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no
140    `type` and a valid JavaScript `language`, or has neither.
141    """
142    return is_element(node, "script") and (
143        (
144            has_property(node, "type")
145            and node["type"] in ["text/ecmascript", "text/javascript"]
146            and not has_property(node, "language")
147        )
148        or (
149            has_property(node, "language")
150            and node["language"] in ["ecmascript", "javascript"]
151            and not has_property(node, "type")
152        )
153        or (not has_property(node, "type") and not has_property(node, "language"))
154    )

Check if an element is a javascript script element.

Returns true if node is a <script> element that has a valid JavaScript type, has no type and a valid JavaScript language, or has neither.

def is_element(node, *conditions: str | list) -> bool:
157def is_element(node, *conditions: str | list) -> bool:
158    """Checks if the given node is a certain element.
159
160    When providing a str it will check that the elements tag matches.
161    If a list is provided it checks that one of the conditions in the list
162    passes.
163    """
164
165    return bool(
166        node.type == "element"
167        and any(
168            bool(
169                (isinstance(condition, str) and node.tag == condition)
170                or (isinstance(condition, list) and any(node.tag == nested for nested in condition))
171            )
172            for condition in conditions
173        )
174    )

Checks if the given node is a certain element.

When providing a str it will check that the elements tag matches. If a list is provided it checks that one of the conditions in the list passes.

def is_embedded(node: phml.nodes.element.Element) -> bool:
193def is_embedded(node: Element) -> bool:
194    """Check to see if an element is an embedded element.
195
196    Embedded Elements:
197
198    * audio
199    * canvas
200    * embed
201    * iframe
202    * img
203    * MathML math
204    * object
205    * picture
206    * SVG svg
207    * video
208
209    Returns:
210        True if emedded
211    """
212    # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video
213
214    return is_element(
215        node,
216        "audio",
217        "canvas",
218        "embed",
219        "iframe",
220        "img",
221        "math",
222        "object",
223        "picture",
224        "svg",
225        "video",
226    )

Check to see if an element is an embedded element.

Embedded Elements:

  • audio
  • canvas
  • embed
  • iframe
  • img
  • MathML math
  • object
  • picture
  • SVG svg
  • video
Returns:

True if emedded

def is_interactive(node: phml.nodes.element.Element) -> bool:
229def is_interactive(node: Element) -> bool:
230    """Check if the element is intended for user interaction.
231
232    Conditions:
233
234    * a (if the href attribute is present)
235    * audio (if the controls attribute is present)
236    * button, details, embed, iframe, img (if the usemap attribute is present)
237    * input (if the type attribute is not in the Hidden state)
238    * label, select, text, area, video (if the controls attribute is present)
239
240    Returns:
241        True if element is interactive
242    """
243
244    if is_element(node, "a"):
245        return has_property(node, "href")
246
247    if is_element(node, "input"):
248        return has_property(node, "type") and node["type"].lower() != "hidden"
249
250    if is_element(node, "img"):
251        return has_property(node, "usemap") and node["usemap"]
252
253    if is_element(node, "video"):
254        return has_property(node, "controls")
255
256    if is_element(node, "button", "details", "embed", "iframe", "label", "select", "textarea"):
257        return True
258
259    return False

Check if the element is intended for user interaction.

Conditions:

  • a (if the href attribute is present)
  • audio (if the controls attribute is present)
  • button, details, embed, iframe, img (if the usemap attribute is present)
  • input (if the type attribute is not in the Hidden state)
  • label, select, text, area, video (if the controls attribute is present)
Returns:

True if element is interactive

def is_phrasing(node: phml.nodes.element.Element) -> bool:
262def is_phrasing(node: Element) -> bool:
263    """Check if a node is phrasing text according to
264    https://html.spec.whatwg.org/#phrasing-content-2.
265
266    Phrasing content is the text of the document, as well as elements that mark up that text at the
267    intra-paragraph level. Runs of phrasing content form paragraphs.
268
269    * area (if it is a descendant of a map element)
270    * link (if it is allowed in the body)
271    * meta (if the itemprop attribute is present)
272    * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn,
273     em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output,
274     picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg,
275     template, textarea, time, u, var, video, wbr, text (true)
276
277    Returns:
278        True if the element is phrasing text
279    """
280
281    if isinstance(node, Text):
282        return True
283
284    if is_element(node, "area"):
285        return node.parent is not None and is_element(node.parent, "map")
286
287    if is_element(node, "meta"):
288        return has_property(node, "itemprop")
289
290    if is_element(node, "link"):
291        body_ok = [
292            "dns-prefetch",
293            "modulepreload",
294            "pingback",
295            "preconnect",
296            "prefetch",
297            "preload",
298            "prerender",
299            "stylesheet",
300        ]
301
302        return bool(
303            has_property(node, "itemprop")
304            or (
305                has_property(node, "rel")
306                and all(token.strip() in body_ok for token in node["rel"].split(" "))
307            )
308        )
309
310    if is_element(
311        node,
312        "node",
313        "map",
314        "mark",
315        "math",
316        "audio",
317        "b",
318        "bdi",
319        "bdo",
320        "br",
321        "button",
322        "canvas",
323        "cite",
324        "code",
325        "data",
326        "datalist",
327        "del",
328        "dfn",
329        "em",
330        "embed",
331        "i",
332        "iframe",
333        "img",
334        "input",
335        "ins",
336        "kbd",
337        "label",
338        "a",
339        "abbr",
340        "meter",
341        "noscript",
342        "object",
343        "output",
344        "picture",
345        "progress",
346        "q",
347        "ruby",
348        "s",
349        "samp",
350        "script",
351        "select",
352        "slot",
353        "small",
354        "span",
355        "strong",
356        "sub",
357        "sup",
358        "svg",
359        "template",
360        "textarea",
361        "time",
362        "u",
363        "var",
364        "video",
365        "wbr",
366    ):
367        return True
368
369    return False

Check if a node is phrasing text according to https://html.spec.whatwg.org/#phrasing-content-2.

Phrasing content is the text of the document, as well as elements that mark up that text at the intra-paragraph level. Runs of phrasing content form paragraphs.

  • area (if it is a descendant of a map element)
  • link (if it is allowed in the body)
  • meta (if the itemprop attribute is present)
  • map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, template, textarea, time, u, var, video, wbr, text (true)
Returns:

True if the element is phrasing text

def is_event_handler(attribute: str) -> bool:
177def is_event_handler(attribute: str) -> bool:
178    """Takes a attribute name and returns true if
179    it starts with `on` and its length is `5` or more.
180    """
181    return attribute.startswith("on") and len(attribute) >= 5

Takes a attribute name and returns true if it starts with on and its length is 5 or more.