phml.validate.validate
1from re import match, split, sub 2 3from phml.nodes import All_Nodes, Comment, Element, Literal, Parent, Root, Text 4 5__all__ = [ 6 "validate", 7 "parent", 8 "literal", 9 "generated", 10 "has_property", 11 "is_heading", 12 "is_css_link", 13 "is_css_style", 14 "is_javascript", 15 "is_element", 16 "is_embedded", 17 "is_interactive", 18 "is_phrasing", 19 "is_event_handler", 20] 21 22 23def validate(node: All_Nodes) -> bool: 24 """Validate a node based on attributes and type.""" 25 26 if hasattr(node, "children"): 27 if not hasattr(node, "type"): 28 raise AssertionError("Node should have a type") 29 30 if node.type not in ["root", "element"]: 31 raise AssertionError( 32 "Node should have a type of 'root' or 'element' to contain the 'children' attribute" 33 ) 34 35 if not all(isinstance(child, All_Nodes) for child in node.children): 36 raise AssertionError("Children must be a node type") 37 38 if hasattr(node, "properties"): 39 if hasattr(node, "type") and node.type != "element": 40 raise AssertionError("Node must be of type 'element' to contain 'properties'") 41 42 if not all(isinstance(node[prop], (int, str)) for prop in node.properties): 43 raise AssertionError("Node 'properties' must be of type 'int' or 'str'") 44 45 if hasattr(node, "value") and not isinstance(node.value, str): 46 raise AssertionError("Node 'value' must be of type 'str'") 47 48 return True 49 50 51def parent(node: Root | Element) -> bool: 52 """Validate a parent node based on attributes and type.""" 53 if not issubclass(type(node), Parent): 54 raise AssertionError( 55 "Node must inherit from 'Parent'. 'Root' and 'Element' are most common." 56 ) 57 58 if not hasattr(node, "children") or node.children is None: 59 raise AssertionError("Parent nodes should have the 'children' attribute") 60 61 if node.type == "element" and (not hasattr(node, "properties") or node.properties is None): 62 raise AssertionError("Parent element node shoudl have the 'properties' element.") 63 64 65def literal(node: Text | Comment) -> bool: 66 """Validate a literal node based on attributes.""" 67 68 if not issubclass(type(node), Literal): 69 raise AssertionError( 70 "Node must inherit from 'Literal'. 'Text' and 'Comment' are most common." 71 ) 72 73 if not hasattr(node, "value") or not isinstance(node.value, str): 74 raise AssertionError("Literal nodes 'value' type should be 'str'") 75 76 77def generated(node: All_Nodes) -> bool: 78 """Checks if a node has been generated. A node is concidered 79 generated if it does not have a position. 80 81 Args: 82 node (All_Nodes): Node to check for position with. 83 84 Returns: 85 bool: Whether a node has a position or not. 86 """ 87 return not hasattr(node, "position") or node.position is None 88 89 90def is_heading(node) -> bool: 91 """Check if an element is a heading.""" 92 93 if node.type == "element": 94 if match(r"h[1-6]", node.tag) is not None: 95 return True 96 return False 97 raise TypeError("Node must be an element.") 98 99 100def is_css_link(node) -> bool: 101 """Check if an element is a `link` to a css file. 102 103 Returns `true` if `node` is a `<link>` element with a `rel` list that 104 contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'` 105 as its `type` 106 """ 107 108 return ( 109 # Verify it is a element with a `link` tag 110 is_element(node, "link") 111 # Must have a rel list with stylesheet 112 and has_property(node, "rel") 113 and "stylesheet" in split(r" ", sub(r" +", " ", node["rel"])) 114 and ( 115 # Can have a `type` of `text/css` or empty or no `type` 116 not has_property(node, "type") 117 or (has_property(node, "type") and (node["type"] == "text/css" or node["type"] == "")) 118 ) 119 ) 120 121 122def is_css_style(node) -> bool: 123 """Check if an element is a css `style` element. 124 125 Returns `true` if `node` is a `<style>` element that 126 has no `type`, an empty `type`, or `'text/css'` as its `type`. 127 """ 128 129 return is_element(node, "style") and ( 130 not has_property(node, "type") 131 or (has_property(node, "type") and (node["type"] == "" or node["type"] == "text/css")) 132 ) 133 134 135def is_javascript(node) -> bool: 136 """Check if an element is a javascript `script` element. 137 138 Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no 139 `type` and a valid JavaScript `language`, or has neither. 140 """ 141 return is_element(node, "script") and ( 142 ( 143 has_property(node, "type") 144 and node["type"] in ["text/ecmascript", "text/javascript"] 145 and not has_property(node, "language") 146 ) 147 or ( 148 has_property(node, "language") 149 and node["language"] in ["ecmascript", "javascript"] 150 and not has_property(node, "type") 151 ) 152 or (not has_property(node, "type") and not has_property(node, "language")) 153 ) 154 155 156def is_element(node, *conditions: str | list) -> bool: 157 """Checks if the given node is a certain element. 158 159 When providing a str it will check that the elements tag matches. 160 If a list is provided it checks that one of the conditions in the list 161 passes. 162 """ 163 164 return bool( 165 node.type == "element" 166 and any( 167 bool( 168 (isinstance(condition, str) and node.tag == condition) 169 or (isinstance(condition, list) and any(node.tag == nested for nested in condition)) 170 ) 171 for condition in conditions 172 ) 173 ) 174 175 176def is_event_handler(attribute: str) -> bool: 177 """Takes a attribute name and returns true if 178 it starts with `on` and its length is `5` or more. 179 """ 180 return attribute.startswith("on") and len(attribute) >= 5 181 182 183def has_property(node, attribute: str) -> bool: 184 """Check to see if an element has a certain property in properties.""" 185 if node.type == "element": 186 if attribute in node.properties: 187 return True 188 return False 189 raise TypeError("Node must be an element.") 190 191 192def is_embedded(node: Element) -> bool: 193 """Check to see if an element is an embedded element. 194 195 Embedded Elements: 196 197 * audio 198 * canvas 199 * embed 200 * iframe 201 * img 202 * MathML math 203 * object 204 * picture 205 * SVG svg 206 * video 207 208 Returns: 209 True if emedded 210 """ 211 # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video 212 213 return is_element( 214 node, 215 "audio", 216 "canvas", 217 "embed", 218 "iframe", 219 "img", 220 "math", 221 "object", 222 "picture", 223 "svg", 224 "video", 225 ) 226 227 228def is_interactive(node: Element) -> bool: 229 """Check if the element is intended for user interaction. 230 231 Conditions: 232 233 * a (if the href attribute is present) 234 * audio (if the controls attribute is present) 235 * button, details, embed, iframe, img (if the usemap attribute is present) 236 * input (if the type attribute is not in the Hidden state) 237 * label, select, text, area, video (if the controls attribute is present) 238 239 Returns: 240 True if element is interactive 241 """ 242 243 if is_element(node, "a"): 244 return has_property(node, "href") 245 246 if is_element(node, "input"): 247 return has_property(node, "type") and node["type"].lower() != "hidden" 248 249 if is_element(node, "img"): 250 return has_property(node, "usemap") and node["usemap"] 251 252 if is_element(node, "video"): 253 return has_property(node, "controls") 254 255 if is_element(node, "button", "details", "embed", "iframe", "label", "select", "textarea"): 256 return True 257 258 return False 259 260 261def is_phrasing(node: Element) -> bool: 262 """Check if a node is phrasing text according to 263 https://html.spec.whatwg.org/#phrasing-content-2. 264 265 Phrasing content is the text of the document, as well as elements that mark up that text at the 266 intra-paragraph level. Runs of phrasing content form paragraphs. 267 268 * area (if it is a descendant of a map element) 269 * link (if it is allowed in the body) 270 * meta (if the itemprop attribute is present) 271 * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, 272 em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, 273 picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, 274 template, textarea, time, u, var, video, wbr, text (true) 275 276 Returns: 277 True if the element is phrasing text 278 """ 279 280 if isinstance(node, Text): 281 return True 282 283 if is_element(node, "area"): 284 return node.parent is not None and is_element(node.parent, "map") 285 286 if is_element(node, "meta"): 287 return has_property(node, "itemprop") 288 289 if is_element(node, "link"): 290 body_ok = [ 291 "dns-prefetch", 292 "modulepreload", 293 "pingback", 294 "preconnect", 295 "prefetch", 296 "preload", 297 "prerender", 298 "stylesheet", 299 ] 300 301 return bool( 302 has_property(node, "itemprop") 303 or ( 304 has_property(node, "rel") 305 and all(token.strip() in body_ok for token in node["rel"].split(" ")) 306 ) 307 ) 308 309 if is_element( 310 node, 311 "node", 312 "map", 313 "mark", 314 "math", 315 "audio", 316 "b", 317 "bdi", 318 "bdo", 319 "br", 320 "button", 321 "canvas", 322 "cite", 323 "code", 324 "data", 325 "datalist", 326 "del", 327 "dfn", 328 "em", 329 "embed", 330 "i", 331 "iframe", 332 "img", 333 "input", 334 "ins", 335 "kbd", 336 "label", 337 "a", 338 "abbr", 339 "meter", 340 "noscript", 341 "object", 342 "output", 343 "picture", 344 "progress", 345 "q", 346 "ruby", 347 "s", 348 "samp", 349 "script", 350 "select", 351 "slot", 352 "small", 353 "span", 354 "strong", 355 "sub", 356 "sup", 357 "svg", 358 "template", 359 "textarea", 360 "time", 361 "u", 362 "var", 363 "video", 364 "wbr", 365 ): 366 return True 367 368 return False
24def validate(node: All_Nodes) -> bool: 25 """Validate a node based on attributes and type.""" 26 27 if hasattr(node, "children"): 28 if not hasattr(node, "type"): 29 raise AssertionError("Node should have a type") 30 31 if node.type not in ["root", "element"]: 32 raise AssertionError( 33 "Node should have a type of 'root' or 'element' to contain the 'children' attribute" 34 ) 35 36 if not all(isinstance(child, All_Nodes) for child in node.children): 37 raise AssertionError("Children must be a node type") 38 39 if hasattr(node, "properties"): 40 if hasattr(node, "type") and node.type != "element": 41 raise AssertionError("Node must be of type 'element' to contain 'properties'") 42 43 if not all(isinstance(node[prop], (int, str)) for prop in node.properties): 44 raise AssertionError("Node 'properties' must be of type 'int' or 'str'") 45 46 if hasattr(node, "value") and not isinstance(node.value, str): 47 raise AssertionError("Node 'value' must be of type 'str'") 48 49 return True
Validate a node based on attributes and type.
52def parent(node: Root | Element) -> bool: 53 """Validate a parent node based on attributes and type.""" 54 if not issubclass(type(node), Parent): 55 raise AssertionError( 56 "Node must inherit from 'Parent'. 'Root' and 'Element' are most common." 57 ) 58 59 if not hasattr(node, "children") or node.children is None: 60 raise AssertionError("Parent nodes should have the 'children' attribute") 61 62 if node.type == "element" and (not hasattr(node, "properties") or node.properties is None): 63 raise AssertionError("Parent element node shoudl have the 'properties' element.")
Validate a parent node based on attributes and type.
66def literal(node: Text | Comment) -> bool: 67 """Validate a literal node based on attributes.""" 68 69 if not issubclass(type(node), Literal): 70 raise AssertionError( 71 "Node must inherit from 'Literal'. 'Text' and 'Comment' are most common." 72 ) 73 74 if not hasattr(node, "value") or not isinstance(node.value, str): 75 raise AssertionError("Literal nodes 'value' type should be 'str'")
Validate a literal node based on attributes.
78def generated(node: All_Nodes) -> bool: 79 """Checks if a node has been generated. A node is concidered 80 generated if it does not have a position. 81 82 Args: 83 node (All_Nodes): Node to check for position with. 84 85 Returns: 86 bool: Whether a node has a position or not. 87 """ 88 return not hasattr(node, "position") or node.position is None
Checks if a node has been generated. A node is concidered generated if it does not have a position.
Arguments:
- node (All_Nodes): Node to check for position with.
Returns:
bool: Whether a node has a position or not.
184def has_property(node, attribute: str) -> bool: 185 """Check to see if an element has a certain property in properties.""" 186 if node.type == "element": 187 if attribute in node.properties: 188 return True 189 return False 190 raise TypeError("Node must be an element.")
Check to see if an element has a certain property in properties.
91def is_heading(node) -> bool: 92 """Check if an element is a heading.""" 93 94 if node.type == "element": 95 if match(r"h[1-6]", node.tag) is not None: 96 return True 97 return False 98 raise TypeError("Node must be an element.")
Check if an element is a heading.
101def is_css_link(node) -> bool: 102 """Check if an element is a `link` to a css file. 103 104 Returns `true` if `node` is a `<link>` element with a `rel` list that 105 contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'` 106 as its `type` 107 """ 108 109 return ( 110 # Verify it is a element with a `link` tag 111 is_element(node, "link") 112 # Must have a rel list with stylesheet 113 and has_property(node, "rel") 114 and "stylesheet" in split(r" ", sub(r" +", " ", node["rel"])) 115 and ( 116 # Can have a `type` of `text/css` or empty or no `type` 117 not has_property(node, "type") 118 or (has_property(node, "type") and (node["type"] == "text/css" or node["type"] == "")) 119 ) 120 )
Check if an element is a link
to a css file.
Returns true
if node
is a <link>
element with a rel
list that
contains 'stylesheet'
and has no type
, an empty type
, or 'text/css'
as its type
123def is_css_style(node) -> bool: 124 """Check if an element is a css `style` element. 125 126 Returns `true` if `node` is a `<style>` element that 127 has no `type`, an empty `type`, or `'text/css'` as its `type`. 128 """ 129 130 return is_element(node, "style") and ( 131 not has_property(node, "type") 132 or (has_property(node, "type") and (node["type"] == "" or node["type"] == "text/css")) 133 )
Check if an element is a css style
element.
Returns true
if node
is a <style>
element that
has no type
, an empty type
, or 'text/css'
as its type
.
136def is_javascript(node) -> bool: 137 """Check if an element is a javascript `script` element. 138 139 Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no 140 `type` and a valid JavaScript `language`, or has neither. 141 """ 142 return is_element(node, "script") and ( 143 ( 144 has_property(node, "type") 145 and node["type"] in ["text/ecmascript", "text/javascript"] 146 and not has_property(node, "language") 147 ) 148 or ( 149 has_property(node, "language") 150 and node["language"] in ["ecmascript", "javascript"] 151 and not has_property(node, "type") 152 ) 153 or (not has_property(node, "type") and not has_property(node, "language")) 154 )
Check if an element is a javascript script
element.
Returns true
if node
is a <script>
element that has a valid JavaScript type
, has no
type
and a valid JavaScript language
, or has neither.
157def is_element(node, *conditions: str | list) -> bool: 158 """Checks if the given node is a certain element. 159 160 When providing a str it will check that the elements tag matches. 161 If a list is provided it checks that one of the conditions in the list 162 passes. 163 """ 164 165 return bool( 166 node.type == "element" 167 and any( 168 bool( 169 (isinstance(condition, str) and node.tag == condition) 170 or (isinstance(condition, list) and any(node.tag == nested for nested in condition)) 171 ) 172 for condition in conditions 173 ) 174 )
Checks if the given node is a certain element.
When providing a str it will check that the elements tag matches. If a list is provided it checks that one of the conditions in the list passes.
193def is_embedded(node: Element) -> bool: 194 """Check to see if an element is an embedded element. 195 196 Embedded Elements: 197 198 * audio 199 * canvas 200 * embed 201 * iframe 202 * img 203 * MathML math 204 * object 205 * picture 206 * SVG svg 207 * video 208 209 Returns: 210 True if emedded 211 """ 212 # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video 213 214 return is_element( 215 node, 216 "audio", 217 "canvas", 218 "embed", 219 "iframe", 220 "img", 221 "math", 222 "object", 223 "picture", 224 "svg", 225 "video", 226 )
Check to see if an element is an embedded element.
Embedded Elements:
- audio
- canvas
- embed
- iframe
- img
- MathML math
- object
- picture
- SVG svg
- video
Returns:
True if emedded
229def is_interactive(node: Element) -> bool: 230 """Check if the element is intended for user interaction. 231 232 Conditions: 233 234 * a (if the href attribute is present) 235 * audio (if the controls attribute is present) 236 * button, details, embed, iframe, img (if the usemap attribute is present) 237 * input (if the type attribute is not in the Hidden state) 238 * label, select, text, area, video (if the controls attribute is present) 239 240 Returns: 241 True if element is interactive 242 """ 243 244 if is_element(node, "a"): 245 return has_property(node, "href") 246 247 if is_element(node, "input"): 248 return has_property(node, "type") and node["type"].lower() != "hidden" 249 250 if is_element(node, "img"): 251 return has_property(node, "usemap") and node["usemap"] 252 253 if is_element(node, "video"): 254 return has_property(node, "controls") 255 256 if is_element(node, "button", "details", "embed", "iframe", "label", "select", "textarea"): 257 return True 258 259 return False
Check if the element is intended for user interaction.
Conditions:
- a (if the href attribute is present)
- audio (if the controls attribute is present)
- button, details, embed, iframe, img (if the usemap attribute is present)
- input (if the type attribute is not in the Hidden state)
- label, select, text, area, video (if the controls attribute is present)
Returns:
True if element is interactive
262def is_phrasing(node: Element) -> bool: 263 """Check if a node is phrasing text according to 264 https://html.spec.whatwg.org/#phrasing-content-2. 265 266 Phrasing content is the text of the document, as well as elements that mark up that text at the 267 intra-paragraph level. Runs of phrasing content form paragraphs. 268 269 * area (if it is a descendant of a map element) 270 * link (if it is allowed in the body) 271 * meta (if the itemprop attribute is present) 272 * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, 273 em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, 274 picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, 275 template, textarea, time, u, var, video, wbr, text (true) 276 277 Returns: 278 True if the element is phrasing text 279 """ 280 281 if isinstance(node, Text): 282 return True 283 284 if is_element(node, "area"): 285 return node.parent is not None and is_element(node.parent, "map") 286 287 if is_element(node, "meta"): 288 return has_property(node, "itemprop") 289 290 if is_element(node, "link"): 291 body_ok = [ 292 "dns-prefetch", 293 "modulepreload", 294 "pingback", 295 "preconnect", 296 "prefetch", 297 "preload", 298 "prerender", 299 "stylesheet", 300 ] 301 302 return bool( 303 has_property(node, "itemprop") 304 or ( 305 has_property(node, "rel") 306 and all(token.strip() in body_ok for token in node["rel"].split(" ")) 307 ) 308 ) 309 310 if is_element( 311 node, 312 "node", 313 "map", 314 "mark", 315 "math", 316 "audio", 317 "b", 318 "bdi", 319 "bdo", 320 "br", 321 "button", 322 "canvas", 323 "cite", 324 "code", 325 "data", 326 "datalist", 327 "del", 328 "dfn", 329 "em", 330 "embed", 331 "i", 332 "iframe", 333 "img", 334 "input", 335 "ins", 336 "kbd", 337 "label", 338 "a", 339 "abbr", 340 "meter", 341 "noscript", 342 "object", 343 "output", 344 "picture", 345 "progress", 346 "q", 347 "ruby", 348 "s", 349 "samp", 350 "script", 351 "select", 352 "slot", 353 "small", 354 "span", 355 "strong", 356 "sub", 357 "sup", 358 "svg", 359 "template", 360 "textarea", 361 "time", 362 "u", 363 "var", 364 "video", 365 "wbr", 366 ): 367 return True 368 369 return False
Check if a node is phrasing text according to https://html.spec.whatwg.org/#phrasing-content-2.
Phrasing content is the text of the document, as well as elements that mark up that text at the intra-paragraph level. Runs of phrasing content form paragraphs.
- area (if it is a descendant of a map element)
- link (if it is allowed in the body)
- meta (if the itemprop attribute is present)
- map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, template, textarea, time, u, var, video, wbr, text (true)
Returns:
True if the element is phrasing text
177def is_event_handler(attribute: str) -> bool: 178 """Takes a attribute name and returns true if 179 it starts with `on` and its length is `5` or more. 180 """ 181 return attribute.startswith("on") and len(attribute) >= 5
Takes a attribute name and returns true if
it starts with on
and its length is 5
or more.