phml.utils.transform.extract
1# pylint: disable=missing-module-docstring 2from phml.nodes import AST, All_Nodes, Comment, Element, Root, Text 3 4__all__ = ["to_string"] 5 6 7def to_string(node: AST | All_Nodes) -> str: 8 """Get the raw text content of the element. Works similar to 9 the DOMs Node#textContent getter. 10 11 Args: 12 node (Root | Element | Text): Node to get the text content from 13 14 Returns: 15 str: Raw inner text without formatting. 16 """ 17 18 if isinstance(node, AST): 19 node = node.tree 20 21 if isinstance(node, Text | Comment): 22 return node.value 23 24 def concat_text(element: Element | Root) -> list[str]: 25 result = [] 26 27 for child in element.children: 28 if isinstance(child, (Element, Root)): 29 result.extend(concat_text(child)) 30 elif isinstance(child, Text): 31 result.append(child.value) 32 return result 33 34 if isinstance(node, Root | Element): 35 # Recursive concat 36 return " ".join(concat_text(node)) 37 38 return None
def
to_string( node: phml.nodes.AST.AST | phml.nodes.root.Root | phml.nodes.element.Element | phml.nodes.text.Text | phml.nodes.comment.Comment | phml.nodes.doctype.DocType | phml.nodes.parent.Parent | phml.nodes.node.Node | phml.nodes.literal.Literal) -> str:
8def to_string(node: AST | All_Nodes) -> str: 9 """Get the raw text content of the element. Works similar to 10 the DOMs Node#textContent getter. 11 12 Args: 13 node (Root | Element | Text): Node to get the text content from 14 15 Returns: 16 str: Raw inner text without formatting. 17 """ 18 19 if isinstance(node, AST): 20 node = node.tree 21 22 if isinstance(node, Text | Comment): 23 return node.value 24 25 def concat_text(element: Element | Root) -> list[str]: 26 result = [] 27 28 for child in element.children: 29 if isinstance(child, (Element, Root)): 30 result.extend(concat_text(child)) 31 elif isinstance(child, Text): 32 result.append(child.value) 33 return result 34 35 if isinstance(node, Root | Element): 36 # Recursive concat 37 return " ".join(concat_text(node)) 38 39 return None
Get the raw text content of the element. Works similar to the DOMs Node#textContent getter.
Args
- node (Root | Element | Text): Node to get the text content from
Returns
str: Raw inner text without formatting.