Coverage for phml\locate\select.py: 99%
212 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-08 16:33 -0600
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-08 16:33 -0600
1"""utils.select
3A collection of utilities around querying for specific
4types of data.
5"""
7import re
8from typing import Callable
10from phml.nodes import AST, Element, Root
11from phml.travel.travel import visit_children, walk
13__all__ = ["query", "query_all", "matches", "parse_specifiers"]
16def query(tree: AST | Root | Element, specifier: str) -> Element:
17 """Same as javascripts querySelector. `#` indicates an id and `.`
18 indicates a class. If they are used alone they match anything.
19 Any tag can be used by itself or with `#` and/or `.`. You may use
20 any number of class specifiers, but may only use one id specifier per
21 tag name. Complex specifiers are accepted are allowed meaning you can
22 have space seperated specifiers indicating nesting or a parent child
23 relationship.
25 Examles:
26 * `.some-example` matches the first element with the class `some-example`
27 * `#some-example` matches the first element with the id `some-example`
28 * `li` matches the first `li` element
29 * `li.red` matches the first `li` with the class `red`
30 * `li#red` matches the first `li` with the id `red`
31 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
32 * `div.form-control input[type="checkbox"]` matches the first `input` with the
33 attribute `type="checked"` that has a parent `div` with the class `form-control`.
35 Return:
36 Element | None: The first element matching the specifier or None if no element was
37 found.
38 """
40 def all_nodes(current: Element, rules: list, include_self: bool = True):
41 """Get all nodes starting with the current node."""
43 result = None
44 for node in walk(current):
45 if node.type == "element" and (include_self or node != current):
46 result = branch(node, rules)
47 if result is not None:
48 break
49 return result
51 def all_children(current: Element, rules: list):
52 """Get all children of the curret node."""
53 result = None
54 for node in visit_children(current):
55 if node.type == "element":
56 result = branch(node, rules)
57 if result is not None:
58 break
59 return result
61 def first_sibling(node: Element, rules: list):
62 """Get the first sibling following the node."""
63 if node.parent is None:
64 return None
66 idx = node.parent.children.index(node)
67 if idx + 1 < len(node.parent.children):
68 if node.parent.children[idx + 1].type == "element":
69 return branch(node.parent.children[idx + 1], rules)
70 return None
72 def all_siblings(current: Element, rules: list):
73 """Get all siblings after the current node."""
74 if current.parent is None:
75 return None
77 result = None
78 idx = current.parent.children.index(current)
79 if idx + 1 < len(current.parent.children):
80 for node in range(idx + 1, len(current.parent.children)):
81 if current.parent.children[node].type == "element":
82 result = branch(current.parent.children[node], rules)
83 if result is not None:
84 break
85 return result
87 def process_dict(rules: list, node: Element):
88 if is_equal(rules[0], node):
89 if len(rules) - 1 == 0:
90 return node
92 if isinstance(rules[1], dict) or rules[1] == "*":
93 return (
94 all_nodes(node, rules[1:])
95 if isinstance(rules[1], dict)
96 else all_nodes(node, rules[2:], False)
97 )
99 return branch(node, rules[1:])
100 return None
102 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements
103 """Based on the current rule, recursively check the nodes.
104 If on the last rule then return the current valid node.
105 """
107 if len(rules) == 0:
108 return node
110 if isinstance(rules[0], dict):
111 return process_dict(rules, node)
113 if rules[0] == "*":
114 return all_nodes(node, rules[1:])
116 if rules[0] == ">":
117 return all_children(node, rules[1:])
119 if rules[0] == "+":
120 return first_sibling(node, rules[1:])
122 if rules[0] == "~":
123 return all_siblings(node, rules[1:])
125 return None
127 if isinstance(tree, AST):
128 tree = tree.tree
130 rules = parse_specifiers(specifier)
132 return all_nodes(tree, rules)
135def query_all(tree: AST | Root | Element, specifier: str) -> list[Element]:
136 """Same as javascripts querySelectorAll. `#` indicates an id and `.`
137 indicates a class. If they are used alone they match anything.
138 Any tag can be used by itself or with `#` and/or `.`. You may use
139 any number of class specifiers, but may only use one id specifier per
140 tag name. Complex specifiers are accepted are allowed meaning you can
141 have space seperated specifiers indicating nesting or a parent child
142 relationship.
144 Examles:
145 * `.some-example` matches the first element with the class `some-example`
146 * `#some-example` matches the first element with the id `some-example`
147 * `li` matches the first `li` element
148 * `li.red` matches the first `li` with the class `red`
149 * `li#red` matches the first `li` with the id `red`
150 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
151 * `div.form-control input[type="checkbox"]` matches the first `input` with the
152 attribute `type="checked"` that has a parent `div` with the class `form-control`.
154 Return:
155 list[Element] | None: The all elements matching the specifier or and empty list if no
156 elements were found.
157 """
159 def all_nodes(current: Element, rules: list, include_self: bool = True):
160 """Get all nodes starting with the current node."""
161 results = []
162 for node in walk(current):
163 if node.type == "element" and (include_self or node != current):
164 results.extend(branch(node, rules))
165 return results
167 def all_children(current: Element, rules: list):
168 """Get all children of the curret node."""
169 results = []
170 for node in visit_children(current):
171 if node.type == "element":
172 results.extend(branch(node, rules))
173 return results
175 def first_sibling(node: Element, rules: list):
176 """Get the first sibling following the node."""
177 if node.parent is None:
178 return []
180 idx = node.parent.children.index(node)
181 if idx + 1 < len(node.parent.children):
182 if node.parent.children[idx + 1].type == "element":
183 result = branch(node.parent.children[idx + 1], rules)
184 return result
185 return []
187 def all_siblings(current: Element, rules: list):
188 """Get all siblings after the current node."""
189 if current.parent is None:
190 return []
192 results = []
193 idx = current.parent.children.index(current)
194 if idx + 1 < len(current.parent.children):
195 for node in range(idx + 1, len(current.parent.children)):
196 if current.parent.children[node].type == "element":
197 results.extend(branch(current.parent.children[node], rules))
198 return results
200 def process_dict(rules: list, node: Element):
201 if is_equal(rules[0], node):
202 if len(rules) - 1 == 0:
203 return [node]
205 if isinstance(rules[1], dict) or rules[1] == "*":
206 return (
207 all_nodes(node, rules[1:])
208 if isinstance(rules[1], dict)
209 else all_nodes(node, rules[2:], False)
210 )
212 return branch(node, rules[1:])
213 return []
215 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements
216 """Based on the current rule, recursively check the nodes.
217 If on the last rule then return the current valid node.
218 """
220 if len(rules) == 0:
221 return [node]
223 if isinstance(rules[0], dict):
224 return process_dict(rules, node)
226 if rules[0] == "*":
227 return all_nodes(node, rules[1:])
229 if rules[0] == ">":
230 return all_children(node, rules[1:])
232 if rules[0] == "+":
233 return first_sibling(node, rules[1:])
235 if rules[0] == "~":
236 return all_siblings(node, rules[1:])
238 return None
240 if isinstance(tree, AST):
241 tree = tree.tree
243 rules = parse_specifiers(specifier)
244 result = all_nodes(tree, rules)
245 return [result[i] for i in range(len(result)) if i == result.index(result[i])]
248def matches(node: Element, specifier: str) -> bool:
249 """Works the same as the Javascript matches. `#` indicates an id and `.`
250 indicates a class. If they are used alone they match anything.
251 Any tag can be used by itself or with `#` and/or `.`. You may use
252 any number of class specifiers, but may only use one id specifier per
253 tag name. Complex specifiers are not supported. Everything in the specifier
254 must relate to one element/tag.
256 Examles:
257 * `.some-example` matches the first element with the class `some-example`
258 * `#some-example` matches the first element with the id `some-example`
259 * `li` matches the first `li` element
260 * `li.red` matches the first `li` with the class `red`
261 * `li#red` matches the first `li` with the id `red`
262 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
263 """
265 rules = parse_specifiers(specifier)
267 if len(rules) > 1:
268 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}")
269 if not isinstance(rules[0], dict):
270 raise Exception(
271 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\
272Example: `li.red#sample[class^='form-'][title~='sample']`"
273 )
275 return is_equal(rules[0], node)
278def is_equal(rule: dict, node: Element) -> bool:
279 """Checks if a rule is valid on a node.
280 A rule is a dictionary of possible values and each value must
281 be valid on the node.
283 A rule may have a tag, id, classList, and attribute list:
284 * If the `tag` is provided, the nodes `tag` must match the rules `tag`
285 * If the `id` is provided, the nodes `id` must match the rules `id`
286 * If the `classList` is not empty, each class in the `classList` must exist in the nodes
287 class attribute
288 * If the `attribute` list is not empty, each attribute in the attribute list with be compared
289 against the nodes attributes given the `attribute` lists comparators. Below is the list of
290 possible comparisons.
291 1. Exists: `[checked]` yields any element that has the attribute `checked` no matter it's
292 value.
293 2. Equals: `[checked='no']` yields any element with `checked='no'`
294 3. Contains: `[class~=sample]` or `[class*=sample]` yields any element with a class
295 containing `sample`
296 4. Equal to or startswith value-: `[class|=sample]` yields elements that either have
297 a class that equals `sample` or or a class that starts with `sample-`
298 5. Starts with: `[class^=sample]` yields elements with a class that starts with `sample`
299 6. Ends with: `[class$="sample"]` yields elements with a class that ends wtih `sample`
301 Args:
302 rule (dict): The rule to apply to the node.
303 node (Element): The node the validate.
305 Returns:
306 bool: Whether the node passes all the rules in the dictionary.
307 """
309 # Validate tag
310 if rule["tag"] != "*" and rule["tag"] != node.tag:
311 return False
313 # Validate id
314 if rule["id"] is not None and ("id" not in node.properties or rule["id"] != node["id"]):
315 return False
317 # Validate class list
318 if len(rule["classList"]) > 0:
319 for klass in rule["classList"]:
320 if "class" not in node.properties or klass not in node["class"].split(" "):
321 return False
323 # Validate all attributes
324 if len(rule["attributes"]) > 0:
325 return all(
326 attr["name"] in node.properties.keys()
327 and ((attr["compare"] is not None and __validate_attr(attr, node)))
328 for attr in rule["attributes"]
329 )
331 return True
334def __validate_attr(attr: dict, node: Element):
335 if attr["compare"] == "=":
336 return is_valid_attr(
337 attr=node[attr["name"]],
338 sub=attr["value"],
339 name=attr["name"],
340 validator=lambda x, y: x == y,
341 )
343 if attr["compare"] == "|=":
344 return is_valid_attr(
345 attr=node[attr["name"]],
346 sub=attr["value"],
347 name=attr["name"],
348 validator=lambda x, y: x == y or x.startswith(f"{y}-"),
349 )
351 if attr["compare"] == "^=":
352 return is_valid_attr(
353 attr=node[attr["name"]],
354 sub=attr["value"],
355 name=attr["name"],
356 validator=lambda x, y: x.startswith(y),
357 )
359 if attr["compare"] == "$=":
360 return is_valid_attr(
361 attr=node[attr["name"]],
362 sub=attr["value"],
363 name=attr["name"],
364 validator=lambda x, y: x.endswith(y),
365 )
367 if attr["compare"] in ["*=", "~="]:
368 return is_valid_attr(
369 attr=node[attr["name"]],
370 sub=attr["value"],
371 name=attr["name"],
372 validator=lambda x, y: y in x,
373 )
375 return True
378def is_valid_attr(attr: str, sub: str, name: str, validator: Callable) -> bool:
379 """Validate an attribute value with a given string and a validator callable.
380 If classlist, create list with attribute value seperated on spaces. Otherwise,
381 the list will only have the attribute value. For each item in the list, check
382 against validator, if valid add to count.
384 Returns:
385 True if the valid count is greater than 0.
386 """
387 list_attributes = ["class"]
389 compare_values = [attr]
390 if name in list_attributes:
391 compare_values = attr.split(" ")
393 return bool(len([item for item in compare_values if validator(item, sub)]) > 0)
396def __parse_el_with_attribute(token: str) -> dict:
397 el_classid_from_attr = re.compile(r"([a-zA-Z0-9_#.-]+)((\[.*\])*)")
398 el_from_class_from_id = re.compile(r"(#|\.)?([a-zA-Z0-9_-]+)")
399 attr_compare_val = re.compile(r"\[([a-zA-Z0-9_-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]")
401 element = {
402 "tag": "*",
403 "classList": [],
404 "id": None,
405 "attributes": [],
406 }
408 res = el_classid_from_attr.match(token)
410 el_class_id, attrs = res.group(1), res.group(2)
412 if attrs not in ["", None]:
413 for attr in attr_compare_val.finditer(attrs):
414 name, compare, value = attr.groups()
415 if value is not None:
416 value = value.lstrip("'\"").rstrip("'\"")
417 element["attributes"].append(
418 {
419 "name": name,
420 "compare": compare,
421 "value": value,
422 }
423 )
425 if el_class_id not in ["", None]:
426 for item in el_from_class_from_id.finditer(el_class_id):
427 if item.group(1) == ".":
428 if item.group(2) not in element["classList"]:
429 element["classList"].append(item.group(2))
430 elif item.group(1) == "#":
431 if element["id"] is None:
432 element["id"] = item.group(2)
433 else:
434 raise Exception(f"There may only be one id per element specifier.\n{token}")
435 else:
436 element["tag"] = item.group(2) or "*"
438 return element
441def __parse_attr_only_element(token: str) -> dict:
442 attr_compare_val = re.compile(r"\[([a-zA-Z0-9_-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]")
444 element = {
445 "tag": None,
446 "classList": [],
447 "id": None,
448 "attributes": [],
449 }
451 element["tag"] = "*"
453 if token not in ["", None]:
454 for attr in attr_compare_val.finditer(token):
455 name, compare, value = attr.groups()
456 if value is not None:
457 value = value.lstrip("'\"").rstrip("'\"")
458 element["attributes"].append(
459 {
460 "name": name,
461 "compare": compare,
462 "value": value,
463 }
464 )
466 return element
469def parse_specifiers(specifier: str) -> dict:
470 """
471 Rules:
472 * `*` = any element
473 * `>` = Everything with certain parent child relationship
474 * `+` = first sibling
475 * `~` = All after
476 * `.` = class
477 * `#` = id
478 * `[attribute]` = all elements with attribute
479 * `[attribute=value]` = all elements with attribute=value
480 * `[attribute~=value]` = all elements with attribute containing value
481 * `[attribute|=value]` = all elements with attribute=value or attribute starting with value-
482 * `node[attribute^=value]` = all elements with attribute starting with value
483 * `node[attribute$=value]` = all elements with attribute ending with value
484 * `node[attribute*=value]` = all elements with attribute containing value
486 """
488 splitter = re.compile(r"([~>\*+])|(([.#]?[a-zA-Z0-9_-]+)+((\[[^\[\]]+\]))*)|(\[[^\[\]]+\])+")
490 el_only_attr = re.compile(r"((\[[^\[\]]+\]))+")
491 el_with_attr = re.compile(r"([.#]?[a-zA-Z0-9_-]+)+(\[[^\[\]]+\])*")
493 tokens = []
494 for token in splitter.finditer(specifier):
495 if token.group() in ["*", ">", "+", "~"]:
496 tokens.append(token.group())
497 elif el_with_attr.match(token.group()):
498 tokens.append(__parse_el_with_attribute(token.group()))
499 elif el_only_attr.match(token.group()):
500 tokens.append(__parse_attr_only_element(token.group()))
502 return tokens