Coverage for phml\utils\locate\select.py: 100%
209 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-08 12:51 -0600
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-08 12:51 -0600
1"""utils.select
3A collection of utilities around querying for specific
4types of data.
5"""
7import re
8from typing import Callable
10from phml.nodes import AST, Element, Root
11from phml.utils.travel import visit_children, walk
13__all__ = ["query", "query_all", "matches", "parse_specifiers"]
16def query(tree: AST | Root | Element, specifier: str) -> Element:
17 """Same as javascripts querySelector. `#` indicates an id and `.`
18 indicates a class. If they are used alone they match anything.
19 Any tag can be used by itself or with `#` and/or `.`. You may use
20 any number of class specifiers, but may only use one id specifier per
21 tag name. Complex specifiers are accepted are allowed meaning you can
22 have space seperated specifiers indicating nesting or a parent child
23 relationship.
25 Examles:
26 * `.some-example` matches the first element with the class `some-example`
27 * `#some-example` matches the first element with the id `some-example`
28 * `li` matches the first `li` element
29 * `li.red` matches the first `li` with the class `red`
30 * `li#red` matches the first `li` with the id `red`
31 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
32 * `div.form-control input[type="checkbox"]` matches the first `input` with the
33 attribute `type="checked"` that has a parent `div` with the class `form-control`.
35 Return:
36 Element | None: The first element matching the specifier or None if no element was
37 found.
38 """
40 def all_nodes(current: Element, rules: list, include_self: bool = True):
41 """Get all nodes starting with the current node."""
43 result = None
44 for node in walk(current):
45 if node.type == "element" and (include_self or node != current):
46 result = branch(node, rules)
47 if result is not None:
48 break
49 return result
51 def all_children(current: Element, rules: list):
52 """Get all children of the curret node."""
53 result = None
54 for node in visit_children(current):
55 if node.type == "element":
56 result = branch(node, rules)
57 if result is not None:
58 break
59 return result
61 def first_sibling(node: Element, rules: list):
62 """Get the first sibling following the node."""
63 if node.parent is None:
64 return None
66 idx = node.parent.children.index(node)
67 if idx + 1 < len(node.parent.children):
68 if node.parent.children[idx + 1].type == "element":
69 return branch(node.parent.children[idx + 1], rules)
70 return None
72 def all_siblings(current: Element, rules: list):
73 """Get all siblings after the current node."""
74 if current.parent is None:
75 return None
77 result = None
78 idx = current.parent.children.index(current)
79 if idx + 1 < len(current.parent.children):
80 for node in range(idx + 1, len(current.parent.children)):
81 if current.parent.children[node].type == "element":
82 result = branch(current.parent.children[node], rules)
83 if result is not None:
84 break
85 return result
87 def process_dict(rules: list, node: Element):
88 if is_equal(rules[0], node):
89 if len(rules) - 1 == 0:
90 return node
92 if isinstance(rules[1], dict) or rules[1] == "*":
93 return (
94 all_nodes(node, rules[1:])
95 if isinstance(rules[1], dict)
96 else all_nodes(node, rules[2:], False)
97 )
99 return branch(node, rules[1:])
100 return None
102 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements
103 """Based on the current rule, recursively check the nodes.
104 If on the last rule then return the current valid node.
105 """
107 if len(rules) == 0:
108 return node
110 if isinstance(rules[0], dict):
111 return process_dict(rules, node)
113 if rules[0] == "*":
114 return all_nodes(node, rules[1:])
116 if rules[0] == ">":
117 return all_children(node, rules[1:])
119 if rules[0] == "+":
120 return first_sibling(node, rules[1:])
122 if rules[0] == "~":
123 return all_siblings(node, rules[1:])
125 if isinstance(tree, AST):
126 tree = tree.tree
128 rules = parse_specifiers(specifier)
130 return all_nodes(tree, rules)
133def query_all(tree: AST | Root | Element, specifier: str) -> list[Element]:
134 """Same as javascripts querySelectorAll. `#` indicates an id and `.`
135 indicates a class. If they are used alone they match anything.
136 Any tag can be used by itself or with `#` and/or `.`. You may use
137 any number of class specifiers, but may only use one id specifier per
138 tag name. Complex specifiers are accepted are allowed meaning you can
139 have space seperated specifiers indicating nesting or a parent child
140 relationship.
142 Examles:
143 * `.some-example` matches the first element with the class `some-example`
144 * `#some-example` matches the first element with the id `some-example`
145 * `li` matches the first `li` element
146 * `li.red` matches the first `li` with the class `red`
147 * `li#red` matches the first `li` with the id `red`
148 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
149 * `div.form-control input[type="checkbox"]` matches the first `input` with the
150 attribute `type="checked"` that has a parent `div` with the class `form-control`.
152 Return:
153 list[Element] | None: The all elements matching the specifier or and empty list if no
154 elements were found.
155 """
157 def all_nodes(current: Element, rules: list, include_self: bool = True):
158 """Get all nodes starting with the current node."""
159 results = []
160 for node in walk(current):
161 if node.type == "element" and (include_self or node != current):
162 results.extend(branch(node, rules))
163 return results
165 def all_children(current: Element, rules: list):
166 """Get all children of the curret node."""
167 results = []
168 for node in visit_children(current):
169 if node.type == "element":
170 results.extend(branch(node, rules))
171 return results
173 def first_sibling(node: Element, rules: list):
174 """Get the first sibling following the node."""
175 if node.parent is None:
176 return []
178 idx = node.parent.children.index(node)
179 if idx + 1 < len(node.parent.children):
180 if node.parent.children[idx + 1].type == "element":
181 result = branch(node.parent.children[idx + 1], rules)
182 return result
183 return []
185 def all_siblings(current: Element, rules: list):
186 """Get all siblings after the current node."""
187 if current.parent is None:
188 return []
190 results = []
191 idx = current.parent.children.index(current)
192 if idx + 1 < len(current.parent.children):
193 for node in range(idx + 1, len(current.parent.children)):
194 if current.parent.children[node].type == "element":
195 results.extend(branch(current.parent.children[node], rules))
196 return results
198 def process_dict(rules: list, node: Element):
199 if is_equal(rules[0], node):
200 if len(rules) - 1 == 0:
201 return [node]
203 if isinstance(rules[1], dict) or rules[1] == "*":
204 return (
205 all_nodes(node, rules[1:])
206 if isinstance(rules[1], dict)
207 else all_nodes(node, rules[2:], False)
208 )
210 return branch(node, rules[1:])
211 return []
213 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements
214 """Based on the current rule, recursively check the nodes.
215 If on the last rule then return the current valid node.
216 """
218 if len(rules) == 0:
219 return [node]
221 if isinstance(rules[0], dict):
222 return process_dict(rules, node)
224 if rules[0] == "*":
225 return all_nodes(node, rules[1:])
227 if rules[0] == ">":
228 return all_children(node, rules[1:])
230 if rules[0] == "+":
231 return first_sibling(node, rules[1:])
233 if rules[0] == "~":
234 return all_siblings(node, rules[1:])
236 if isinstance(tree, AST):
237 tree = tree.tree
239 rules = parse_specifiers(specifier)
240 result = all_nodes(tree, rules)
241 return [result[i] for i in range(len(result)) if i == result.index(result[i])]
244def matches(node: Element, specifier: str) -> bool:
245 """Works the same as the Javascript matches. `#` indicates an id and `.`
246 indicates a class. If they are used alone they match anything.
247 Any tag can be used by itself or with `#` and/or `.`. You may use
248 any number of class specifiers, but may only use one id specifier per
249 tag name. Complex specifiers are not supported. Everything in the specifier
250 must relate to one element/tag.
252 Examles:
253 * `.some-example` matches the first element with the class `some-example`
254 * `#some-example` matches the first element with the id `some-example`
255 * `li` matches the first `li` element
256 * `li.red` matches the first `li` with the class `red`
257 * `li#red` matches the first `li` with the id `red`
258 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
259 """
261 rules = parse_specifiers(specifier)
263 if len(rules) > 1:
264 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}")
265 if not isinstance(rules[0], dict):
266 raise Exception(
267 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\
268Example: `li.red#sample[class^='form-'][title~='sample']`"
269 )
271 return is_equal(rules[0], node)
274def is_equal(rule: dict, node: Element) -> bool:
275 """Checks if a rule is valid on a node.
276 A rule is a dictionary of possible values and each value must
277 be valid on the node.
279 A rule may have a tag, id, classList, and attribute list:
280 * If the `tag` is provided, the nodes `tag` must match the rules `tag`
281 * If the `id` is provided, the nodes `id` must match the rules `id`
282 * If the `classList` is not empty, each class in the `classList` must exist in the nodes
283 class attribute
284 * If the `attribute` list is not empty, each attribute in the attribute list with be compared
285 against the nodes attributes given the `attribute` lists comparators. Below is the list of
286 possible comparisons.
287 1. Exists: `[checked]` yields any element that has the attribute `checked` no matter it's
288 value.
289 2. Equals: `[checked='no']` yields any element with `checked='no'`
290 3. Contains: `[class~=sample]` or `[class*=sample]` yields any element with a class
291 containing `sample`
292 4. Equal to or startswith value-: `[class|=sample]` yields elements that either have
293 a class that equals `sample` or or a class that starts with `sample-`
294 5. Starts with: `[class^=sample]` yields elements with a class that starts with `sample`
295 6. Ends with: `[class$="sample"]` yields elements with a class that ends wtih `sample`
297 Args:
298 rule (dict): The rule to apply to the node.
299 node (Element): The node the validate.
301 Returns:
302 bool: Whether the node passes all the rules in the dictionary.
303 """
305 # Validate tag
306 if rule["tag"] != "*" and rule["tag"] != node.tag:
307 return False
309 # Validate id
310 if rule["id"] is not None and ("id" not in node.properties or rule["id"] != node["id"]):
311 return False
313 # Validate class list
314 if len(rule["classList"]) > 0:
315 for klass in rule["classList"]:
316 if "class" not in node.properties or klass not in node["class"].split(" "):
317 return False
319 # Validate all attributes
320 if len(rule["attributes"]) > 0:
321 return all(
322 attr["name"] in node.properties.keys()
323 and ((attr["compare"] is not None and __validate_attr(attr, node)))
324 for attr in rule["attributes"]
325 )
327 return True
330def __validate_attr(attr: dict, node: Element):
331 if attr["compare"] == "=":
332 return is_valid_attr(
333 attr=node[attr["name"]],
334 sub=attr["value"],
335 name=attr["name"],
336 validator=lambda x, y: x == y,
337 )
339 if attr["compare"] == "|=":
340 return is_valid_attr(
341 attr=node[attr["name"]],
342 sub=attr["value"],
343 name=attr["name"],
344 validator=lambda x, y: x == y or x.startswith(f"{y}-"),
345 )
347 if attr["compare"] == "^=":
348 return is_valid_attr(
349 attr=node[attr["name"]],
350 sub=attr["value"],
351 name=attr["name"],
352 validator=lambda x, y: x.startswith(y),
353 )
355 if attr["compare"] == "$=":
356 return is_valid_attr(
357 attr=node[attr["name"]],
358 sub=attr["value"],
359 name=attr["name"],
360 validator=lambda x, y: x.endswith(y),
361 )
363 if attr["compare"] in ["*=", "~="]:
364 return is_valid_attr(
365 attr=node[attr["name"]],
366 sub=attr["value"],
367 name=attr["name"],
368 validator=lambda x, y: y in x,
369 )
372def is_valid_attr(attr: str, sub: str, name: str, validator: Callable) -> bool:
373 """Validate an attribute value with a given string and a validator callable.
374 If classlist, create list with attribute value seperated on spaces. Otherwise,
375 the list will only have the attribute value. For each item in the list, check
376 against validator, if valid add to count.
378 Returns:
379 True if the valid count is greater than 0.
380 """
381 list_attributes = ["class"]
383 compare_values = [attr]
384 if name in list_attributes:
385 compare_values = attr.split(" ")
387 return bool(len([item for item in compare_values if validator(item, sub)]) > 0)
390def __parse_el_with_attribute(token: str) -> dict:
391 el_classid_from_attr = re.compile(r"([a-zA-Z0-9_#.-]+)((\[.*\])*)")
392 el_from_class_from_id = re.compile(r"(#|\.)?([a-zA-Z0-9_-]+)")
393 attr_compare_val = re.compile(r"\[([a-zA-Z0-9_-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]")
395 element = {
396 "tag": "*",
397 "classList": [],
398 "id": None,
399 "attributes": [],
400 }
402 res = el_classid_from_attr.match(token)
404 el_class_id, attrs = res.group(1), res.group(2)
406 if attrs not in ["", None]:
407 for attr in attr_compare_val.finditer(attrs):
408 name, compare, value = attr.groups()
409 if value is not None:
410 value = value.lstrip("'\"").rstrip("'\"")
411 element["attributes"].append(
412 {
413 "name": name,
414 "compare": compare,
415 "value": value,
416 }
417 )
419 if el_class_id not in ["", None]:
420 for item in el_from_class_from_id.finditer(el_class_id):
421 if item.group(1) == ".":
422 if item.group(2) not in element["classList"]:
423 element["classList"].append(item.group(2))
424 elif item.group(1) == "#":
425 if element["id"] is None:
426 element["id"] = item.group(2)
427 else:
428 raise Exception(
429 f"There may only be one id per element specifier.\n{token}"
430 )
431 else:
432 element["tag"] = item.group(2) or "*"
434 return element
437def __parse_attr_only_element(token: str) -> dict:
438 attr_compare_val = re.compile(r"\[([a-zA-Z0-9_-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]")
440 element = {
441 "tag": None,
442 "classList": [],
443 "id": None,
444 "attributes": [],
445 }
447 element["tag"] = "*"
449 if token not in ["", None]:
450 for attr in attr_compare_val.finditer(token):
451 name, compare, value = attr.groups()
452 if value is not None:
453 value = value.lstrip("'\"").rstrip("'\"")
454 element["attributes"].append(
455 {
456 "name": name,
457 "compare": compare,
458 "value": value,
459 }
460 )
462 return element
465def parse_specifiers(specifier: str) -> dict:
466 """
467 Rules:
468 * `*` = any element
469 * `>` = Everything with certain parent child relationship
470 * `+` = first sibling
471 * `~` = All after
472 * `.` = class
473 * `#` = id
474 * `[attribute]` = all elements with attribute
475 * `[attribute=value]` = all elements with attribute=value
476 * `[attribute~=value]` = all elements with attribute containing value
477 * `[attribute|=value]` = all elements with attribute=value or attribute starting with value-
478 * `node[attribute^=value]` = all elements with attribute starting with value
479 * `node[attribute$=value]` = all elements with attribute ending with value
480 * `node[attribute*=value]` = all elements with attribute containing value
482 """
484 splitter = re.compile(r"([~>\*+])|(([.#]?[a-zA-Z0-9_-]+)+((\[[^\[\]]+\]))*)|(\[[^\[\]]+\])+")
486 el_only_attr = re.compile(r"((\[[^\[\]]+\]))+")
487 el_with_attr = re.compile(r"([.#]?[a-zA-Z0-9_-]+)+(\[[^\[\]]+\])*")
489 tokens = []
490 for token in splitter.finditer(specifier):
491 if token.group() in ["*", ">", "+", "~"]:
492 tokens.append(token.group())
493 elif el_with_attr.match(token.group()):
494 tokens.append(__parse_el_with_attribute(token.group()))
495 elif el_only_attr.match(token.group()):
496 tokens.append(__parse_attr_only_element(token.group()))
498 return tokens