phml.utils.locate.select
utils.select
A collection of utilities around querying for specific types of data.
1"""utils.select 2 3A collection of utilities around querying for specific 4types of data. 5""" 6 7import re 8from typing import Callable 9 10from phml.nodes import AST, Element, Root 11from phml.utils.travel import visit_children, walk 12 13__all__ = ["query", "query_all", "matches", "parse_specifiers"] 14 15 16def query(tree: AST | Root | Element, specifier: str) -> Element: 17 """Same as javascripts querySelector. `#` indicates an id and `.` 18 indicates a class. If they are used alone they match anything. 19 Any tag can be used by itself or with `#` and/or `.`. You may use 20 any number of class specifiers, but may only use one id specifier per 21 tag name. Complex specifiers are accepted are allowed meaning you can 22 have space seperated specifiers indicating nesting or a parent child 23 relationship. 24 25 Examles: 26 * `.some-example` matches the first element with the class `some-example` 27 * `#some-example` matches the first element with the id `some-example` 28 * `li` matches the first `li` element 29 * `li.red` matches the first `li` with the class `red` 30 * `li#red` matches the first `li` with the id `red` 31 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 32 * `div.form-control input[type="checkbox"]` matches the first `input` with the 33 attribute `type="checked"` that has a parent `div` with the class `form-control`. 34 35 Return: 36 Element | None: The first element matching the specifier or None if no element was 37 found. 38 """ 39 40 def all_nodes(current: Element, rules: list, include_self: bool = True): 41 """Get all nodes starting with the current node.""" 42 43 result = None 44 for node in walk(current): 45 if node.type == "element" and (include_self or node != current): 46 result = branch(node, rules) 47 if result is not None: 48 break 49 return result 50 51 def all_children(current: Element, rules: list): 52 """Get all children of the curret node.""" 53 result = None 54 for node in visit_children(current): 55 if node.type == "element": 56 result = branch(node, rules) 57 if result is not None: 58 break 59 return result 60 61 def first_sibling(node: Element, rules: list): 62 """Get the first sibling following the node.""" 63 if node.parent is None: 64 return None 65 66 idx = node.parent.children.index(node) 67 if idx + 1 < len(node.parent.children): 68 if node.parent.children[idx + 1].type == "element": 69 return branch(node.parent.children[idx + 1], rules) 70 return None 71 72 def all_siblings(current: Element, rules: list): 73 """Get all siblings after the current node.""" 74 if current.parent is None: 75 return None 76 77 result = None 78 idx = current.parent.children.index(current) 79 if idx + 1 < len(current.parent.children): 80 for node in range(idx + 1, len(current.parent.children)): 81 if current.parent.children[node].type == "element": 82 result = branch(current.parent.children[node], rules) 83 if result is not None: 84 break 85 return result 86 87 def process_dict(rules: list, node: Element): 88 if is_equal(rules[0], node): 89 if len(rules) - 1 == 0: 90 return node 91 92 if isinstance(rules[1], dict) or rules[1] == "*": 93 return ( 94 all_nodes(node, rules[1:]) 95 if isinstance(rules[1], dict) 96 else all_nodes(node, rules[2:], False) 97 ) 98 99 return branch(node, rules[1:]) 100 return None 101 102 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 103 """Based on the current rule, recursively check the nodes. 104 If on the last rule then return the current valid node. 105 """ 106 107 if len(rules) == 0: 108 return node 109 110 if isinstance(rules[0], dict): 111 return process_dict(rules, node) 112 113 if rules[0] == "*": 114 return all_nodes(node, rules[1:]) 115 116 if rules[0] == ">": 117 return all_children(node, rules[1:]) 118 119 if rules[0] == "+": 120 return first_sibling(node, rules[1:]) 121 122 if rules[0] == "~": 123 return all_siblings(node, rules[1:]) 124 125 return None 126 127 if isinstance(tree, AST): 128 tree = tree.tree 129 130 rules = parse_specifiers(specifier) 131 132 return all_nodes(tree, rules) 133 134 135def query_all(tree: AST | Root | Element, specifier: str) -> list[Element]: 136 """Same as javascripts querySelectorAll. `#` indicates an id and `.` 137 indicates a class. If they are used alone they match anything. 138 Any tag can be used by itself or with `#` and/or `.`. You may use 139 any number of class specifiers, but may only use one id specifier per 140 tag name. Complex specifiers are accepted are allowed meaning you can 141 have space seperated specifiers indicating nesting or a parent child 142 relationship. 143 144 Examles: 145 * `.some-example` matches the first element with the class `some-example` 146 * `#some-example` matches the first element with the id `some-example` 147 * `li` matches the first `li` element 148 * `li.red` matches the first `li` with the class `red` 149 * `li#red` matches the first `li` with the id `red` 150 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 151 * `div.form-control input[type="checkbox"]` matches the first `input` with the 152 attribute `type="checked"` that has a parent `div` with the class `form-control`. 153 154 Return: 155 list[Element] | None: The all elements matching the specifier or and empty list if no 156 elements were found. 157 """ 158 159 def all_nodes(current: Element, rules: list, include_self: bool = True): 160 """Get all nodes starting with the current node.""" 161 results = [] 162 for node in walk(current): 163 if node.type == "element" and (include_self or node != current): 164 result = branch(node, rules) 165 if result is not None: 166 results.extend(result) 167 return results 168 169 def all_children(current: Element, rules: list): 170 """Get all children of the curret node.""" 171 results = [] 172 for node in visit_children(current): 173 if node.type == "element": 174 result = branch(node, rules) 175 if result is not None: 176 results.extend(result) 177 return results 178 179 def first_sibling(node: Element, rules: list): 180 """Get the first sibling following the node.""" 181 if node.parent is None: 182 return [] 183 184 idx = node.parent.children.index(node) 185 if idx + 1 < len(node.parent.children): 186 if node.parent.children[idx + 1].type == "element": 187 return [*branch(node.parent.children[idx + 1], rules)] 188 return [] 189 190 def all_siblings(current: Element, rules: list): 191 """Get all siblings after the current node.""" 192 if current.parent is None: 193 return [] 194 195 results = [] 196 idx = current.parent.children.index(current) 197 if idx + 1 < len(current.parent.children): 198 for node in range(idx + 1, len(current.parent.children)): 199 if current.parent.children[node].type == "element": 200 result = branch(current.parent.children[node], rules) 201 if result is not None: 202 results.extend(result) 203 return results 204 205 def process_dict(rules: list, node: Element): 206 if is_equal(rules[0], node): 207 if len(rules) - 1 == 0: 208 return [node] 209 210 if isinstance(rules[1], dict) or rules[1] == "*": 211 return ( 212 all_nodes(node, rules[1:]) 213 if isinstance(rules[1], dict) 214 else all_nodes(node, rules[2:], False) 215 ) 216 217 return branch(node, rules[1:]) 218 return None 219 220 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 221 """Based on the current rule, recursively check the nodes. 222 If on the last rule then return the current valid node. 223 """ 224 225 if len(rules) == 0: 226 return [node] 227 228 if isinstance(rules[0], dict): 229 return process_dict(rules, node) 230 231 if rules[0] == "*": 232 return all_nodes(node, rules[1:]) 233 234 if rules[0] == ">": 235 return all_children(node, rules[1:]) 236 237 if rules[0] == "+": 238 return first_sibling(node, rules[1:]) 239 240 if rules[0] == "~": 241 return all_siblings(node, rules[1:]) 242 243 return None 244 245 if isinstance(tree, AST): 246 tree = tree.tree 247 248 rules = parse_specifiers(specifier) 249 return all_nodes(tree, rules) 250 251 252def matches(node: Element, specifier: str) -> bool: 253 """Works the same as the Javascript matches. `#` indicates an id and `.` 254 indicates a class. If they are used alone they match anything. 255 Any tag can be used by itself or with `#` and/or `.`. You may use 256 any number of class specifiers, but may only use one id specifier per 257 tag name. Complex specifiers are not supported. Everything in the specifier 258 must relate to one element/tag. 259 260 Examles: 261 * `.some-example` matches the first element with the class `some-example` 262 * `#some-example` matches the first element with the id `some-example` 263 * `li` matches the first `li` element 264 * `li.red` matches the first `li` with the class `red` 265 * `li#red` matches the first `li` with the id `red` 266 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 267 """ 268 269 rules = parse_specifiers(specifier) 270 271 if len(rules) > 1: 272 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}") 273 if not isinstance(rules[0], dict): 274 raise Exception( 275 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\ 276Example: `li.red#sample[class^='form-'][title~='sample']`" 277 ) 278 279 return is_equal(rules[0], node) 280 281 282def is_equal(rule: dict, node: Element) -> bool: 283 """Checks if a rule is valid on a node. 284 A rule is a dictionary of possible values and each value must 285 be valid on the node. 286 287 A rule may have a tag, id, classList, and attribute list: 288 * If the `tag` is provided, the nodes `tag` must match the rules `tag` 289 * If the `id` is provided, the nodes `id` must match the rules `id` 290 * If the `classList` is not empty, each class in the `classList` must exist in the nodes 291 class attribute 292 * If the `attribute` list is not empty, each attribute in the attribute list with be compared 293 against the nodes attributes given the `attribute` lists comparators. Below is the list of 294 possible comparisons. 295 1. Exists: `[checked]` yields any element that has the attribute `checked` no matter it's 296 value. 297 2. Equals: `[checked='no']` yields any element with `checked='no'` 298 3. Contains: `[class~=sample]` or `[class*=sample]` yields any element with a class 299 containing `sample` 300 4. Equal to or startswith value-: `[class|=sample]` yields elements that either have 301 a class that equals `sample` or or a class that starts with `sample-` 302 5. Starts with: `[class^=sample]` yields elements with a class that starts with `sample` 303 6. Ends with: `[class$="sample"]` yields elements with a class that ends wtih `sample` 304 305 Args: 306 rule (dict): The rule to apply to the node. 307 node (Element): The node the validate. 308 309 Returns: 310 bool: Whether the node passes all the rules in the dictionary. 311 """ 312 313 # Validate tag 314 if rule["tag"] != "*" and rule["tag"] != node.tag: 315 return False 316 317 # Validate id 318 if rule["id"] is not None and rule["id"] != node["id"]: 319 return False 320 321 # Validate class list 322 if len(rule["classList"]) > 0: 323 for klass in rule["classList"]: 324 if "class" not in node.properties or klass not in node["class"].split(" "): 325 return False 326 327 # Validate all attributes 328 if len(rule["attributes"]) > 0: 329 return all( 330 attr["name"] in node.properties.keys() 331 and ((attr["compare"] is not None and __validate_attr(attr, node)) or True) 332 for attr in rule["attributes"] 333 ) 334 335 return True 336 337 338def __validate_attr(attr: dict, node: Element): 339 if attr["compare"] == "=" and attr["value"] != node[attr["name"]]: 340 return False 341 342 if attr["compare"] == "|": 343 return is_valid_attr( 344 attr=node[attr["name"]], 345 sub=attr["value"], 346 validator=lambda x, y: x == y or x.startswith(f"{y}-"), 347 ) 348 349 if attr["compare"] == "^": 350 return is_valid_attr( 351 attr=node[attr["name"]], 352 sub=attr["value"], 353 validator=lambda x, y: x.startswith(y), 354 ) 355 356 if attr["compare"] == "$": 357 return is_valid_attr( 358 attr=node[attr["name"]], 359 sub=attr["value"], 360 validator=lambda x, y: x.endswith(y), 361 ) 362 363 if attr["compare"] in ["*", "~"]: 364 return is_valid_attr( 365 attr=node[attr["name"]], 366 sub=attr["value"], 367 validator=lambda x, y: y in x, 368 ) 369 370 return True 371 372 373def is_valid_attr(attr: str, sub: str, validator: Callable) -> bool: 374 """Validate an attribute value with a given string and a validator callable. 375 If classlist, create list with attribute value seperated on spaces. Otherwise, 376 the list will only have the attribute value. For each item in the list, check 377 against validator, if valid add to count. 378 379 Returns: 380 True if the valid count is greater than 0. 381 """ 382 list_attributes = ["class"] 383 384 compare_values = [attr] 385 if attr["name"] in list_attributes: 386 compare_values = attr.split(" ") 387 388 return bool(len([item for item in compare_values if validator(item, sub)]) > 0) 389 390 391def __parse_el_with_attribute(token: str) -> dict: 392 el_classid_from_attr = re.compile(r"([a-zA-Z0-9_#.-]+)((\[.*\])*)") 393 el_from_class_from_id = re.compile(r"(#|\.)?([a-zA-Z0-9_-]+)") 394 attr_compare_val = re.compile(r"\[([a-zA-Z0-9_-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]") 395 396 element = { 397 "tag": "*", 398 "classList": [], 399 "id": None, 400 "attributes": [], 401 } 402 403 res = el_classid_from_attr.match(token) 404 405 el_class_id, attrs = res.group(1), res.group(2) 406 407 if attrs not in ["", None]: 408 for attr in attr_compare_val.finditer(attrs): 409 name, compare, value = attr.groups() 410 if value is not None: 411 value = value.lstrip("'\"").rstrip("'\"") 412 element["attributes"].append( 413 { 414 "name": name, 415 "compare": compare, 416 "value": value, 417 } 418 ) 419 420 if el_class_id not in ["", None]: 421 for item in el_from_class_from_id.finditer(el_class_id): 422 if item.group(1) == ".": 423 if item.group(2) not in element["classList"]: 424 element["classList"].append(item.group(2)) 425 elif item.group(1) == "#": 426 if element["id"] is None: 427 element["id"] = item.group(2) 428 else: 429 raise Exception( 430 f"There may only be one id per element specifier.\n{token.group()}" 431 ) 432 else: 433 element["tag"] = item.group(2) or "*" 434 435 return element 436 437 438def __parse_attr_only_element(token: str) -> dict: 439 attr_compare_val = re.compile(r"\[([a-zA-Z0-9_-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]") 440 441 element = { 442 "tag": None, 443 "classList": [], 444 "id": None, 445 "attributes": [], 446 } 447 448 element["tag"] = "*" 449 450 if token not in ["", None]: 451 for attr in attr_compare_val.finditer(token): 452 name, compare, value = attr.groups() 453 if value is not None: 454 value = value.lstrip("'\"").rstrip("'\"") 455 element["attributes"].append( 456 { 457 "name": name, 458 "compare": compare, 459 "value": value, 460 } 461 ) 462 463 return element 464 465 466def parse_specifiers(specifier: str) -> dict: 467 """ 468 Rules: 469 * `*` = any element 470 * `>` = Everything with certain parent child relationship 471 * `+` = first sibling 472 * `~` = All after 473 * `.` = class 474 * `#` = id 475 * `[attribute]` = all elements with attribute 476 * `[attribute=value]` = all elements with attribute=value 477 * `[attribute~=value]` = all elements with attribute containing value 478 * `[attribute|=value]` = all elements with attribute=value or attribute starting with value- 479 * `node[attribute^=value]` = all elements with attribute starting with value 480 * `node[attribute$=value]` = all elements with attribute ending with value 481 * `node[attribute*=value]` = all elements with attribute containing value 482 483 """ 484 485 splitter = re.compile(r"([~>\*+])|(([.#]?[a-zA-Z0-9_-]+)+((\[[^\[\]]+\]))*)|(\[[^\[\]]+\])+") 486 487 el_only_attr = re.compile(r"((\[[^\[\]]+\]))+") 488 el_with_attr = re.compile(r"([.#]?[a-zA-Z0-9_-]+)+(\[[^\[\]]+\])*") 489 490 tokens = [] 491 for token in splitter.finditer(specifier): 492 if token.group() in ["*", ">", "+", "~"]: 493 tokens.append(token.group()) 494 elif el_with_attr.match(token.group()): 495 tokens.append(__parse_el_with_attribute(token.group())) 496 elif el_only_attr.match(token.group()): 497 tokens.append(__parse_attr_only_element(token.group())) 498 499 return tokens
17def query(tree: AST | Root | Element, specifier: str) -> Element: 18 """Same as javascripts querySelector. `#` indicates an id and `.` 19 indicates a class. If they are used alone they match anything. 20 Any tag can be used by itself or with `#` and/or `.`. You may use 21 any number of class specifiers, but may only use one id specifier per 22 tag name. Complex specifiers are accepted are allowed meaning you can 23 have space seperated specifiers indicating nesting or a parent child 24 relationship. 25 26 Examles: 27 * `.some-example` matches the first element with the class `some-example` 28 * `#some-example` matches the first element with the id `some-example` 29 * `li` matches the first `li` element 30 * `li.red` matches the first `li` with the class `red` 31 * `li#red` matches the first `li` with the id `red` 32 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 33 * `div.form-control input[type="checkbox"]` matches the first `input` with the 34 attribute `type="checked"` that has a parent `div` with the class `form-control`. 35 36 Return: 37 Element | None: The first element matching the specifier or None if no element was 38 found. 39 """ 40 41 def all_nodes(current: Element, rules: list, include_self: bool = True): 42 """Get all nodes starting with the current node.""" 43 44 result = None 45 for node in walk(current): 46 if node.type == "element" and (include_self or node != current): 47 result = branch(node, rules) 48 if result is not None: 49 break 50 return result 51 52 def all_children(current: Element, rules: list): 53 """Get all children of the curret node.""" 54 result = None 55 for node in visit_children(current): 56 if node.type == "element": 57 result = branch(node, rules) 58 if result is not None: 59 break 60 return result 61 62 def first_sibling(node: Element, rules: list): 63 """Get the first sibling following the node.""" 64 if node.parent is None: 65 return None 66 67 idx = node.parent.children.index(node) 68 if idx + 1 < len(node.parent.children): 69 if node.parent.children[idx + 1].type == "element": 70 return branch(node.parent.children[idx + 1], rules) 71 return None 72 73 def all_siblings(current: Element, rules: list): 74 """Get all siblings after the current node.""" 75 if current.parent is None: 76 return None 77 78 result = None 79 idx = current.parent.children.index(current) 80 if idx + 1 < len(current.parent.children): 81 for node in range(idx + 1, len(current.parent.children)): 82 if current.parent.children[node].type == "element": 83 result = branch(current.parent.children[node], rules) 84 if result is not None: 85 break 86 return result 87 88 def process_dict(rules: list, node: Element): 89 if is_equal(rules[0], node): 90 if len(rules) - 1 == 0: 91 return node 92 93 if isinstance(rules[1], dict) or rules[1] == "*": 94 return ( 95 all_nodes(node, rules[1:]) 96 if isinstance(rules[1], dict) 97 else all_nodes(node, rules[2:], False) 98 ) 99 100 return branch(node, rules[1:]) 101 return None 102 103 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 104 """Based on the current rule, recursively check the nodes. 105 If on the last rule then return the current valid node. 106 """ 107 108 if len(rules) == 0: 109 return node 110 111 if isinstance(rules[0], dict): 112 return process_dict(rules, node) 113 114 if rules[0] == "*": 115 return all_nodes(node, rules[1:]) 116 117 if rules[0] == ">": 118 return all_children(node, rules[1:]) 119 120 if rules[0] == "+": 121 return first_sibling(node, rules[1:]) 122 123 if rules[0] == "~": 124 return all_siblings(node, rules[1:]) 125 126 return None 127 128 if isinstance(tree, AST): 129 tree = tree.tree 130 131 rules = parse_specifiers(specifier) 132 133 return all_nodes(tree, rules)
Same as javascripts querySelector. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are accepted are allowed meaning you can
have space seperated specifiers indicating nesting or a parent child
relationship.
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
div.form-control input[type="checkbox"]
matches the firstinput
with the attributetype="checked"
that has a parentdiv
with the classform-control
.
Return
Element | None: The first element matching the specifier or None if no element was found.
136def query_all(tree: AST | Root | Element, specifier: str) -> list[Element]: 137 """Same as javascripts querySelectorAll. `#` indicates an id and `.` 138 indicates a class. If they are used alone they match anything. 139 Any tag can be used by itself or with `#` and/or `.`. You may use 140 any number of class specifiers, but may only use one id specifier per 141 tag name. Complex specifiers are accepted are allowed meaning you can 142 have space seperated specifiers indicating nesting or a parent child 143 relationship. 144 145 Examles: 146 * `.some-example` matches the first element with the class `some-example` 147 * `#some-example` matches the first element with the id `some-example` 148 * `li` matches the first `li` element 149 * `li.red` matches the first `li` with the class `red` 150 * `li#red` matches the first `li` with the id `red` 151 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 152 * `div.form-control input[type="checkbox"]` matches the first `input` with the 153 attribute `type="checked"` that has a parent `div` with the class `form-control`. 154 155 Return: 156 list[Element] | None: The all elements matching the specifier or and empty list if no 157 elements were found. 158 """ 159 160 def all_nodes(current: Element, rules: list, include_self: bool = True): 161 """Get all nodes starting with the current node.""" 162 results = [] 163 for node in walk(current): 164 if node.type == "element" and (include_self or node != current): 165 result = branch(node, rules) 166 if result is not None: 167 results.extend(result) 168 return results 169 170 def all_children(current: Element, rules: list): 171 """Get all children of the curret node.""" 172 results = [] 173 for node in visit_children(current): 174 if node.type == "element": 175 result = branch(node, rules) 176 if result is not None: 177 results.extend(result) 178 return results 179 180 def first_sibling(node: Element, rules: list): 181 """Get the first sibling following the node.""" 182 if node.parent is None: 183 return [] 184 185 idx = node.parent.children.index(node) 186 if idx + 1 < len(node.parent.children): 187 if node.parent.children[idx + 1].type == "element": 188 return [*branch(node.parent.children[idx + 1], rules)] 189 return [] 190 191 def all_siblings(current: Element, rules: list): 192 """Get all siblings after the current node.""" 193 if current.parent is None: 194 return [] 195 196 results = [] 197 idx = current.parent.children.index(current) 198 if idx + 1 < len(current.parent.children): 199 for node in range(idx + 1, len(current.parent.children)): 200 if current.parent.children[node].type == "element": 201 result = branch(current.parent.children[node], rules) 202 if result is not None: 203 results.extend(result) 204 return results 205 206 def process_dict(rules: list, node: Element): 207 if is_equal(rules[0], node): 208 if len(rules) - 1 == 0: 209 return [node] 210 211 if isinstance(rules[1], dict) or rules[1] == "*": 212 return ( 213 all_nodes(node, rules[1:]) 214 if isinstance(rules[1], dict) 215 else all_nodes(node, rules[2:], False) 216 ) 217 218 return branch(node, rules[1:]) 219 return None 220 221 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 222 """Based on the current rule, recursively check the nodes. 223 If on the last rule then return the current valid node. 224 """ 225 226 if len(rules) == 0: 227 return [node] 228 229 if isinstance(rules[0], dict): 230 return process_dict(rules, node) 231 232 if rules[0] == "*": 233 return all_nodes(node, rules[1:]) 234 235 if rules[0] == ">": 236 return all_children(node, rules[1:]) 237 238 if rules[0] == "+": 239 return first_sibling(node, rules[1:]) 240 241 if rules[0] == "~": 242 return all_siblings(node, rules[1:]) 243 244 return None 245 246 if isinstance(tree, AST): 247 tree = tree.tree 248 249 rules = parse_specifiers(specifier) 250 return all_nodes(tree, rules)
Same as javascripts querySelectorAll. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are accepted are allowed meaning you can
have space seperated specifiers indicating nesting or a parent child
relationship.
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
div.form-control input[type="checkbox"]
matches the firstinput
with the attributetype="checked"
that has a parentdiv
with the classform-control
.
Return
list[Element] | None: The all elements matching the specifier or and empty list if no elements were found.
253def matches(node: Element, specifier: str) -> bool: 254 """Works the same as the Javascript matches. `#` indicates an id and `.` 255 indicates a class. If they are used alone they match anything. 256 Any tag can be used by itself or with `#` and/or `.`. You may use 257 any number of class specifiers, but may only use one id specifier per 258 tag name. Complex specifiers are not supported. Everything in the specifier 259 must relate to one element/tag. 260 261 Examles: 262 * `.some-example` matches the first element with the class `some-example` 263 * `#some-example` matches the first element with the id `some-example` 264 * `li` matches the first `li` element 265 * `li.red` matches the first `li` with the class `red` 266 * `li#red` matches the first `li` with the id `red` 267 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 268 """ 269 270 rules = parse_specifiers(specifier) 271 272 if len(rules) > 1: 273 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}") 274 if not isinstance(rules[0], dict): 275 raise Exception( 276 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\ 277Example: `li.red#sample[class^='form-'][title~='sample']`" 278 ) 279 280 return is_equal(rules[0], node)
Works the same as the Javascript matches. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are not supported. Everything in the specifier
must relate to one element/tag.
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
467def parse_specifiers(specifier: str) -> dict: 468 """ 469 Rules: 470 * `*` = any element 471 * `>` = Everything with certain parent child relationship 472 * `+` = first sibling 473 * `~` = All after 474 * `.` = class 475 * `#` = id 476 * `[attribute]` = all elements with attribute 477 * `[attribute=value]` = all elements with attribute=value 478 * `[attribute~=value]` = all elements with attribute containing value 479 * `[attribute|=value]` = all elements with attribute=value or attribute starting with value- 480 * `node[attribute^=value]` = all elements with attribute starting with value 481 * `node[attribute$=value]` = all elements with attribute ending with value 482 * `node[attribute*=value]` = all elements with attribute containing value 483 484 """ 485 486 splitter = re.compile(r"([~>\*+])|(([.#]?[a-zA-Z0-9_-]+)+((\[[^\[\]]+\]))*)|(\[[^\[\]]+\])+") 487 488 el_only_attr = re.compile(r"((\[[^\[\]]+\]))+") 489 el_with_attr = re.compile(r"([.#]?[a-zA-Z0-9_-]+)+(\[[^\[\]]+\])*") 490 491 tokens = [] 492 for token in splitter.finditer(specifier): 493 if token.group() in ["*", ">", "+", "~"]: 494 tokens.append(token.group()) 495 elif el_with_attr.match(token.group()): 496 tokens.append(__parse_el_with_attribute(token.group())) 497 elif el_only_attr.match(token.group()): 498 tokens.append(__parse_attr_only_element(token.group())) 499 500 return tokens
Rules:
*
= any element>
= Everything with certain parent child relationship+
= first sibling~
= All after.
= class#
= id[attribute]
= all elements with attribute[attribute=value]
= all elements with attribute=value[attribute~=value]
= all elements with attribute containing value[attribute|=value]
= all elements with attribute=value or attribute starting with value-node[attribute^=value]
= all elements with attribute starting with valuenode[attribute$=value]
= all elements with attribute ending with valuenode[attribute*=value]
= all elements with attribute containing value