phml.locate.select
utils.select
A collection of utilities around querying for specific types of data.
1"""utils.select 2 3A collection of utilities around querying for specific 4types of data. 5""" 6 7import re 8from typing import Callable 9 10from phml.nodes import AST, Element, Root 11from phml.travel.travel import visit_children, walk 12 13__all__ = ["query", "query_all", "matches", "parse_specifiers"] 14 15 16def query(tree: AST | Root | Element, specifier: str) -> Element: 17 """Same as javascripts querySelector. `#` indicates an id and `.` 18 indicates a class. If they are used alone they match anything. 19 Any tag can be used by itself or with `#` and/or `.`. You may use 20 any number of class specifiers, but may only use one id specifier per 21 tag name. Complex specifiers are accepted are allowed meaning you can 22 have space seperated specifiers indicating nesting or a parent child 23 relationship. 24 25 Examles: 26 * `.some-example` matches the first element with the class `some-example` 27 * `#some-example` matches the first element with the id `some-example` 28 * `li` matches the first `li` element 29 * `li.red` matches the first `li` with the class `red` 30 * `li#red` matches the first `li` with the id `red` 31 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 32 * `div.form-control input[type="checkbox"]` matches the first `input` with the 33 attribute `type="checked"` that has a parent `div` with the class `form-control`. 34 35 Return: 36 Element | None: The first element matching the specifier or None if no element was 37 found. 38 """ 39 40 def all_nodes(current: Element, rules: list, include_self: bool = True): 41 """Get all nodes starting with the current node.""" 42 43 result = None 44 for node in walk(current): 45 if node.type == "element" and (include_self or node != current): 46 result = branch(node, rules) 47 if result is not None: 48 break 49 return result 50 51 def all_children(current: Element, rules: list): 52 """Get all children of the curret node.""" 53 result = None 54 for node in visit_children(current): 55 if node.type == "element": 56 result = branch(node, rules) 57 if result is not None: 58 break 59 return result 60 61 def first_sibling(node: Element, rules: list): 62 """Get the first sibling following the node.""" 63 if node.parent is None: 64 return None 65 66 idx = node.parent.children.index(node) 67 if idx + 1 < len(node.parent.children): 68 if node.parent.children[idx + 1].type == "element": 69 return branch(node.parent.children[idx + 1], rules) 70 return None 71 72 def all_siblings(current: Element, rules: list): 73 """Get all siblings after the current node.""" 74 if current.parent is None: 75 return None 76 77 result = None 78 idx = current.parent.children.index(current) 79 if idx + 1 < len(current.parent.children): 80 for node in range(idx + 1, len(current.parent.children)): 81 if current.parent.children[node].type == "element": 82 result = branch(current.parent.children[node], rules) 83 if result is not None: 84 break 85 return result 86 87 def process_dict(rules: list, node: Element): 88 if is_equal(rules[0], node): 89 if len(rules) - 1 == 0: 90 return node 91 92 if isinstance(rules[1], dict) or rules[1] == "*": 93 return ( 94 all_nodes(node, rules[1:]) 95 if isinstance(rules[1], dict) 96 else all_nodes(node, rules[2:], False) 97 ) 98 99 return branch(node, rules[1:]) 100 return None 101 102 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 103 """Based on the current rule, recursively check the nodes. 104 If on the last rule then return the current valid node. 105 """ 106 107 if len(rules) == 0: 108 return node 109 110 if isinstance(rules[0], dict): 111 return process_dict(rules, node) 112 113 if rules[0] == "*": 114 return all_nodes(node, rules[1:]) 115 116 if rules[0] == ">": 117 return all_children(node, rules[1:]) 118 119 if rules[0] == "+": 120 return first_sibling(node, rules[1:]) 121 122 if rules[0] == "~": 123 return all_siblings(node, rules[1:]) 124 125 return None 126 127 if isinstance(tree, AST): 128 tree = tree.tree 129 130 rules = parse_specifiers(specifier) 131 132 return all_nodes(tree, rules) 133 134 135def query_all(tree: AST | Root | Element, specifier: str) -> list[Element]: 136 """Same as javascripts querySelectorAll. `#` indicates an id and `.` 137 indicates a class. If they are used alone they match anything. 138 Any tag can be used by itself or with `#` and/or `.`. You may use 139 any number of class specifiers, but may only use one id specifier per 140 tag name. Complex specifiers are accepted are allowed meaning you can 141 have space seperated specifiers indicating nesting or a parent child 142 relationship. 143 144 Examles: 145 * `.some-example` matches the first element with the class `some-example` 146 * `#some-example` matches the first element with the id `some-example` 147 * `li` matches the first `li` element 148 * `li.red` matches the first `li` with the class `red` 149 * `li#red` matches the first `li` with the id `red` 150 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 151 * `div.form-control input[type="checkbox"]` matches the first `input` with the 152 attribute `type="checked"` that has a parent `div` with the class `form-control`. 153 154 Return: 155 list[Element] | None: The all elements matching the specifier or and empty list if no 156 elements were found. 157 """ 158 159 def all_nodes(current: Element, rules: list, include_self: bool = True): 160 """Get all nodes starting with the current node.""" 161 results = [] 162 for node in walk(current): 163 if node.type == "element" and (include_self or node != current): 164 results.extend(branch(node, rules)) 165 return results 166 167 def all_children(current: Element, rules: list): 168 """Get all children of the curret node.""" 169 results = [] 170 for node in visit_children(current): 171 if node.type == "element": 172 results.extend(branch(node, rules)) 173 return results 174 175 def first_sibling(node: Element, rules: list): 176 """Get the first sibling following the node.""" 177 if node.parent is None: 178 return [] 179 180 idx = node.parent.children.index(node) 181 if idx + 1 < len(node.parent.children): 182 if node.parent.children[idx + 1].type == "element": 183 result = branch(node.parent.children[idx + 1], rules) 184 return result 185 return [] 186 187 def all_siblings(current: Element, rules: list): 188 """Get all siblings after the current node.""" 189 if current.parent is None: 190 return [] 191 192 results = [] 193 idx = current.parent.children.index(current) 194 if idx + 1 < len(current.parent.children): 195 for node in range(idx + 1, len(current.parent.children)): 196 if current.parent.children[node].type == "element": 197 results.extend(branch(current.parent.children[node], rules)) 198 return results 199 200 def process_dict(rules: list, node: Element): 201 if is_equal(rules[0], node): 202 if len(rules) - 1 == 0: 203 return [node] 204 205 if isinstance(rules[1], dict) or rules[1] == "*": 206 return ( 207 all_nodes(node, rules[1:]) 208 if isinstance(rules[1], dict) 209 else all_nodes(node, rules[2:], False) 210 ) 211 212 return branch(node, rules[1:]) 213 return [] 214 215 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 216 """Based on the current rule, recursively check the nodes. 217 If on the last rule then return the current valid node. 218 """ 219 220 if len(rules) == 0: 221 return [node] 222 223 if isinstance(rules[0], dict): 224 return process_dict(rules, node) 225 226 if rules[0] == "*": 227 return all_nodes(node, rules[1:]) 228 229 if rules[0] == ">": 230 return all_children(node, rules[1:]) 231 232 if rules[0] == "+": 233 return first_sibling(node, rules[1:]) 234 235 if rules[0] == "~": 236 return all_siblings(node, rules[1:]) 237 238 return None 239 240 if isinstance(tree, AST): 241 tree = tree.tree 242 243 rules = parse_specifiers(specifier) 244 result = all_nodes(tree, rules) 245 return [result[i] for i in range(len(result)) if i == result.index(result[i])] 246 247 248def matches(node: Element, specifier: str) -> bool: 249 """Works the same as the Javascript matches. `#` indicates an id and `.` 250 indicates a class. If they are used alone they match anything. 251 Any tag can be used by itself or with `#` and/or `.`. You may use 252 any number of class specifiers, but may only use one id specifier per 253 tag name. Complex specifiers are not supported. Everything in the specifier 254 must relate to one element/tag. 255 256 Examles: 257 * `.some-example` matches the first element with the class `some-example` 258 * `#some-example` matches the first element with the id `some-example` 259 * `li` matches the first `li` element 260 * `li.red` matches the first `li` with the class `red` 261 * `li#red` matches the first `li` with the id `red` 262 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 263 """ 264 265 rules = parse_specifiers(specifier) 266 267 if len(rules) > 1: 268 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}") 269 if not isinstance(rules[0], dict): 270 raise Exception( 271 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\ 272Example: `li.red#sample[class^='form-'][title~='sample']`" 273 ) 274 275 return is_equal(rules[0], node) 276 277 278def is_equal(rule: dict, node: Element) -> bool: 279 """Checks if a rule is valid on a node. 280 A rule is a dictionary of possible values and each value must 281 be valid on the node. 282 283 A rule may have a tag, id, classList, and attribute list: 284 * If the `tag` is provided, the nodes `tag` must match the rules `tag` 285 * If the `id` is provided, the nodes `id` must match the rules `id` 286 * If the `classList` is not empty, each class in the `classList` must exist in the nodes 287 class attribute 288 * If the `attribute` list is not empty, each attribute in the attribute list with be compared 289 against the nodes attributes given the `attribute` lists comparators. Below is the list of 290 possible comparisons. 291 1. Exists: `[checked]` yields any element that has the attribute `checked` no matter it's 292 value. 293 2. Equals: `[checked='no']` yields any element with `checked='no'` 294 3. Contains: `[class~=sample]` or `[class*=sample]` yields any element with a class 295 containing `sample` 296 4. Equal to or startswith value-: `[class|=sample]` yields elements that either have 297 a class that equals `sample` or or a class that starts with `sample-` 298 5. Starts with: `[class^=sample]` yields elements with a class that starts with `sample` 299 6. Ends with: `[class$="sample"]` yields elements with a class that ends wtih `sample` 300 301 Args: 302 rule (dict): The rule to apply to the node. 303 node (Element): The node the validate. 304 305 Returns: 306 bool: Whether the node passes all the rules in the dictionary. 307 """ 308 309 # Validate tag 310 if rule["tag"] != "*" and rule["tag"] != node.tag: 311 return False 312 313 # Validate id 314 if rule["id"] is not None and ("id" not in node.properties or rule["id"] != node["id"]): 315 return False 316 317 # Validate class list 318 if len(rule["classList"]) > 0: 319 for klass in rule["classList"]: 320 if "class" not in node.properties or klass not in node["class"].split(" "): 321 return False 322 323 # Validate all attributes 324 if len(rule["attributes"]) > 0: 325 return all( 326 attr["name"] in node.properties.keys() 327 and ((attr["compare"] is not None and __validate_attr(attr, node))) 328 for attr in rule["attributes"] 329 ) 330 331 return True 332 333 334def __validate_attr(attr: dict, node: Element): 335 if attr["compare"] == "=": 336 return is_valid_attr( 337 attr=node[attr["name"]], 338 sub=attr["value"], 339 name=attr["name"], 340 validator=lambda x, y: x == y, 341 ) 342 343 if attr["compare"] == "|=": 344 return is_valid_attr( 345 attr=node[attr["name"]], 346 sub=attr["value"], 347 name=attr["name"], 348 validator=lambda x, y: x == y or x.startswith(f"{y}-"), 349 ) 350 351 if attr["compare"] == "^=": 352 return is_valid_attr( 353 attr=node[attr["name"]], 354 sub=attr["value"], 355 name=attr["name"], 356 validator=lambda x, y: x.startswith(y), 357 ) 358 359 if attr["compare"] == "$=": 360 return is_valid_attr( 361 attr=node[attr["name"]], 362 sub=attr["value"], 363 name=attr["name"], 364 validator=lambda x, y: x.endswith(y), 365 ) 366 367 if attr["compare"] in ["*=", "~="]: 368 return is_valid_attr( 369 attr=node[attr["name"]], 370 sub=attr["value"], 371 name=attr["name"], 372 validator=lambda x, y: y in x, 373 ) 374 375 return True 376 377 378def is_valid_attr(attr: str, sub: str, name: str, validator: Callable) -> bool: 379 """Validate an attribute value with a given string and a validator callable. 380 If classlist, create list with attribute value seperated on spaces. Otherwise, 381 the list will only have the attribute value. For each item in the list, check 382 against validator, if valid add to count. 383 384 Returns: 385 True if the valid count is greater than 0. 386 """ 387 list_attributes = ["class"] 388 389 compare_values = [attr] 390 if name in list_attributes: 391 compare_values = attr.split(" ") 392 393 return bool(len([item for item in compare_values if validator(item, sub)]) > 0) 394 395 396def __parse_el_with_attribute(token: str) -> dict: 397 el_classid_from_attr = re.compile(r"([a-zA-Z0-9_#.-]+)((\[.*\])*)") 398 el_from_class_from_id = re.compile(r"(#|\.)?([a-zA-Z0-9_-]+)") 399 attr_compare_val = re.compile(r"\[([a-zA-Z0-9_-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]") 400 401 element = { 402 "tag": "*", 403 "classList": [], 404 "id": None, 405 "attributes": [], 406 } 407 408 res = el_classid_from_attr.match(token) 409 410 el_class_id, attrs = res.group(1), res.group(2) 411 412 if attrs not in ["", None]: 413 for attr in attr_compare_val.finditer(attrs): 414 name, compare, value = attr.groups() 415 if value is not None: 416 value = value.lstrip("'\"").rstrip("'\"") 417 element["attributes"].append( 418 { 419 "name": name, 420 "compare": compare, 421 "value": value, 422 } 423 ) 424 425 if el_class_id not in ["", None]: 426 for item in el_from_class_from_id.finditer(el_class_id): 427 if item.group(1) == ".": 428 if item.group(2) not in element["classList"]: 429 element["classList"].append(item.group(2)) 430 elif item.group(1) == "#": 431 if element["id"] is None: 432 element["id"] = item.group(2) 433 else: 434 raise Exception(f"There may only be one id per element specifier.\n{token}") 435 else: 436 element["tag"] = item.group(2) or "*" 437 438 return element 439 440 441def __parse_attr_only_element(token: str) -> dict: 442 attr_compare_val = re.compile(r"\[([a-zA-Z0-9_-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]") 443 444 element = { 445 "tag": None, 446 "classList": [], 447 "id": None, 448 "attributes": [], 449 } 450 451 element["tag"] = "*" 452 453 if token not in ["", None]: 454 for attr in attr_compare_val.finditer(token): 455 name, compare, value = attr.groups() 456 if value is not None: 457 value = value.lstrip("'\"").rstrip("'\"") 458 element["attributes"].append( 459 { 460 "name": name, 461 "compare": compare, 462 "value": value, 463 } 464 ) 465 466 return element 467 468 469def parse_specifiers(specifier: str) -> dict: 470 """ 471 Rules: 472 * `*` = any element 473 * `>` = Everything with certain parent child relationship 474 * `+` = first sibling 475 * `~` = All after 476 * `.` = class 477 * `#` = id 478 * `[attribute]` = all elements with attribute 479 * `[attribute=value]` = all elements with attribute=value 480 * `[attribute~=value]` = all elements with attribute containing value 481 * `[attribute|=value]` = all elements with attribute=value or attribute starting with value- 482 * `node[attribute^=value]` = all elements with attribute starting with value 483 * `node[attribute$=value]` = all elements with attribute ending with value 484 * `node[attribute*=value]` = all elements with attribute containing value 485 486 """ 487 488 splitter = re.compile(r"([~>\*+])|(([.#]?[a-zA-Z0-9_-]+)+((\[[^\[\]]+\]))*)|(\[[^\[\]]+\])+") 489 490 el_only_attr = re.compile(r"((\[[^\[\]]+\]))+") 491 el_with_attr = re.compile(r"([.#]?[a-zA-Z0-9_-]+)+(\[[^\[\]]+\])*") 492 493 tokens = [] 494 for token in splitter.finditer(specifier): 495 if token.group() in ["*", ">", "+", "~"]: 496 tokens.append(token.group()) 497 elif el_with_attr.match(token.group()): 498 tokens.append(__parse_el_with_attribute(token.group())) 499 elif el_only_attr.match(token.group()): 500 tokens.append(__parse_attr_only_element(token.group())) 501 502 return tokens
17def query(tree: AST | Root | Element, specifier: str) -> Element: 18 """Same as javascripts querySelector. `#` indicates an id and `.` 19 indicates a class. If they are used alone they match anything. 20 Any tag can be used by itself or with `#` and/or `.`. You may use 21 any number of class specifiers, but may only use one id specifier per 22 tag name. Complex specifiers are accepted are allowed meaning you can 23 have space seperated specifiers indicating nesting or a parent child 24 relationship. 25 26 Examles: 27 * `.some-example` matches the first element with the class `some-example` 28 * `#some-example` matches the first element with the id `some-example` 29 * `li` matches the first `li` element 30 * `li.red` matches the first `li` with the class `red` 31 * `li#red` matches the first `li` with the id `red` 32 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 33 * `div.form-control input[type="checkbox"]` matches the first `input` with the 34 attribute `type="checked"` that has a parent `div` with the class `form-control`. 35 36 Return: 37 Element | None: The first element matching the specifier or None if no element was 38 found. 39 """ 40 41 def all_nodes(current: Element, rules: list, include_self: bool = True): 42 """Get all nodes starting with the current node.""" 43 44 result = None 45 for node in walk(current): 46 if node.type == "element" and (include_self or node != current): 47 result = branch(node, rules) 48 if result is not None: 49 break 50 return result 51 52 def all_children(current: Element, rules: list): 53 """Get all children of the curret node.""" 54 result = None 55 for node in visit_children(current): 56 if node.type == "element": 57 result = branch(node, rules) 58 if result is not None: 59 break 60 return result 61 62 def first_sibling(node: Element, rules: list): 63 """Get the first sibling following the node.""" 64 if node.parent is None: 65 return None 66 67 idx = node.parent.children.index(node) 68 if idx + 1 < len(node.parent.children): 69 if node.parent.children[idx + 1].type == "element": 70 return branch(node.parent.children[idx + 1], rules) 71 return None 72 73 def all_siblings(current: Element, rules: list): 74 """Get all siblings after the current node.""" 75 if current.parent is None: 76 return None 77 78 result = None 79 idx = current.parent.children.index(current) 80 if idx + 1 < len(current.parent.children): 81 for node in range(idx + 1, len(current.parent.children)): 82 if current.parent.children[node].type == "element": 83 result = branch(current.parent.children[node], rules) 84 if result is not None: 85 break 86 return result 87 88 def process_dict(rules: list, node: Element): 89 if is_equal(rules[0], node): 90 if len(rules) - 1 == 0: 91 return node 92 93 if isinstance(rules[1], dict) or rules[1] == "*": 94 return ( 95 all_nodes(node, rules[1:]) 96 if isinstance(rules[1], dict) 97 else all_nodes(node, rules[2:], False) 98 ) 99 100 return branch(node, rules[1:]) 101 return None 102 103 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 104 """Based on the current rule, recursively check the nodes. 105 If on the last rule then return the current valid node. 106 """ 107 108 if len(rules) == 0: 109 return node 110 111 if isinstance(rules[0], dict): 112 return process_dict(rules, node) 113 114 if rules[0] == "*": 115 return all_nodes(node, rules[1:]) 116 117 if rules[0] == ">": 118 return all_children(node, rules[1:]) 119 120 if rules[0] == "+": 121 return first_sibling(node, rules[1:]) 122 123 if rules[0] == "~": 124 return all_siblings(node, rules[1:]) 125 126 return None 127 128 if isinstance(tree, AST): 129 tree = tree.tree 130 131 rules = parse_specifiers(specifier) 132 133 return all_nodes(tree, rules)
Same as javascripts querySelector. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are accepted are allowed meaning you can
have space seperated specifiers indicating nesting or a parent child
relationship.
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
div.form-control input[type="checkbox"]
matches the firstinput
with the attributetype="checked"
that has a parentdiv
with the classform-control
.
Return:
Element | None: The first element matching the specifier or None if no element was found.
136def query_all(tree: AST | Root | Element, specifier: str) -> list[Element]: 137 """Same as javascripts querySelectorAll. `#` indicates an id and `.` 138 indicates a class. If they are used alone they match anything. 139 Any tag can be used by itself or with `#` and/or `.`. You may use 140 any number of class specifiers, but may only use one id specifier per 141 tag name. Complex specifiers are accepted are allowed meaning you can 142 have space seperated specifiers indicating nesting or a parent child 143 relationship. 144 145 Examles: 146 * `.some-example` matches the first element with the class `some-example` 147 * `#some-example` matches the first element with the id `some-example` 148 * `li` matches the first `li` element 149 * `li.red` matches the first `li` with the class `red` 150 * `li#red` matches the first `li` with the id `red` 151 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 152 * `div.form-control input[type="checkbox"]` matches the first `input` with the 153 attribute `type="checked"` that has a parent `div` with the class `form-control`. 154 155 Return: 156 list[Element] | None: The all elements matching the specifier or and empty list if no 157 elements were found. 158 """ 159 160 def all_nodes(current: Element, rules: list, include_self: bool = True): 161 """Get all nodes starting with the current node.""" 162 results = [] 163 for node in walk(current): 164 if node.type == "element" and (include_self or node != current): 165 results.extend(branch(node, rules)) 166 return results 167 168 def all_children(current: Element, rules: list): 169 """Get all children of the curret node.""" 170 results = [] 171 for node in visit_children(current): 172 if node.type == "element": 173 results.extend(branch(node, rules)) 174 return results 175 176 def first_sibling(node: Element, rules: list): 177 """Get the first sibling following the node.""" 178 if node.parent is None: 179 return [] 180 181 idx = node.parent.children.index(node) 182 if idx + 1 < len(node.parent.children): 183 if node.parent.children[idx + 1].type == "element": 184 result = branch(node.parent.children[idx + 1], rules) 185 return result 186 return [] 187 188 def all_siblings(current: Element, rules: list): 189 """Get all siblings after the current node.""" 190 if current.parent is None: 191 return [] 192 193 results = [] 194 idx = current.parent.children.index(current) 195 if idx + 1 < len(current.parent.children): 196 for node in range(idx + 1, len(current.parent.children)): 197 if current.parent.children[node].type == "element": 198 results.extend(branch(current.parent.children[node], rules)) 199 return results 200 201 def process_dict(rules: list, node: Element): 202 if is_equal(rules[0], node): 203 if len(rules) - 1 == 0: 204 return [node] 205 206 if isinstance(rules[1], dict) or rules[1] == "*": 207 return ( 208 all_nodes(node, rules[1:]) 209 if isinstance(rules[1], dict) 210 else all_nodes(node, rules[2:], False) 211 ) 212 213 return branch(node, rules[1:]) 214 return [] 215 216 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 217 """Based on the current rule, recursively check the nodes. 218 If on the last rule then return the current valid node. 219 """ 220 221 if len(rules) == 0: 222 return [node] 223 224 if isinstance(rules[0], dict): 225 return process_dict(rules, node) 226 227 if rules[0] == "*": 228 return all_nodes(node, rules[1:]) 229 230 if rules[0] == ">": 231 return all_children(node, rules[1:]) 232 233 if rules[0] == "+": 234 return first_sibling(node, rules[1:]) 235 236 if rules[0] == "~": 237 return all_siblings(node, rules[1:]) 238 239 return None 240 241 if isinstance(tree, AST): 242 tree = tree.tree 243 244 rules = parse_specifiers(specifier) 245 result = all_nodes(tree, rules) 246 return [result[i] for i in range(len(result)) if i == result.index(result[i])]
Same as javascripts querySelectorAll. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are accepted are allowed meaning you can
have space seperated specifiers indicating nesting or a parent child
relationship.
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
div.form-control input[type="checkbox"]
matches the firstinput
with the attributetype="checked"
that has a parentdiv
with the classform-control
.
Return:
list[Element] | None: The all elements matching the specifier or and empty list if no elements were found.
249def matches(node: Element, specifier: str) -> bool: 250 """Works the same as the Javascript matches. `#` indicates an id and `.` 251 indicates a class. If they are used alone they match anything. 252 Any tag can be used by itself or with `#` and/or `.`. You may use 253 any number of class specifiers, but may only use one id specifier per 254 tag name. Complex specifiers are not supported. Everything in the specifier 255 must relate to one element/tag. 256 257 Examles: 258 * `.some-example` matches the first element with the class `some-example` 259 * `#some-example` matches the first element with the id `some-example` 260 * `li` matches the first `li` element 261 * `li.red` matches the first `li` with the class `red` 262 * `li#red` matches the first `li` with the id `red` 263 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 264 """ 265 266 rules = parse_specifiers(specifier) 267 268 if len(rules) > 1: 269 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}") 270 if not isinstance(rules[0], dict): 271 raise Exception( 272 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\ 273Example: `li.red#sample[class^='form-'][title~='sample']`" 274 ) 275 276 return is_equal(rules[0], node)
Works the same as the Javascript matches. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are not supported. Everything in the specifier
must relate to one element/tag.
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
470def parse_specifiers(specifier: str) -> dict: 471 """ 472 Rules: 473 * `*` = any element 474 * `>` = Everything with certain parent child relationship 475 * `+` = first sibling 476 * `~` = All after 477 * `.` = class 478 * `#` = id 479 * `[attribute]` = all elements with attribute 480 * `[attribute=value]` = all elements with attribute=value 481 * `[attribute~=value]` = all elements with attribute containing value 482 * `[attribute|=value]` = all elements with attribute=value or attribute starting with value- 483 * `node[attribute^=value]` = all elements with attribute starting with value 484 * `node[attribute$=value]` = all elements with attribute ending with value 485 * `node[attribute*=value]` = all elements with attribute containing value 486 487 """ 488 489 splitter = re.compile(r"([~>\*+])|(([.#]?[a-zA-Z0-9_-]+)+((\[[^\[\]]+\]))*)|(\[[^\[\]]+\])+") 490 491 el_only_attr = re.compile(r"((\[[^\[\]]+\]))+") 492 el_with_attr = re.compile(r"([.#]?[a-zA-Z0-9_-]+)+(\[[^\[\]]+\])*") 493 494 tokens = [] 495 for token in splitter.finditer(specifier): 496 if token.group() in ["*", ">", "+", "~"]: 497 tokens.append(token.group()) 498 elif el_with_attr.match(token.group()): 499 tokens.append(__parse_el_with_attribute(token.group())) 500 elif el_only_attr.match(token.group()): 501 tokens.append(__parse_attr_only_element(token.group())) 502 503 return tokens
Rules:
*
= any element>
= Everything with certain parent child relationship+
= first sibling~
= All after.
= class#
= id[attribute]
= all elements with attribute[attribute=value]
= all elements with attribute=value[attribute~=value]
= all elements with attribute containing value[attribute|=value]
= all elements with attribute=value or attribute starting with value-node[attribute^=value]
= all elements with attribute starting with valuenode[attribute$=value]
= all elements with attribute ending with valuenode[attribute*=value]
= all elements with attribute containing value