Coverage for phml\utils\validate\validate.py: 72%

80 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-08 11:07 -0600

1# pylint: disable=missing-module-docstring 

2from re import match, split, sub 

3 

4from phml.nodes import All_Nodes, Comment, Element, Literal, Parent, Root, Text 

5 

6__all__ = [ 

7 "validate", 

8 "parent", 

9 "literal", 

10 "generated", 

11 "has_property", 

12 "is_heading", 

13 "is_css_link", 

14 "is_css_style", 

15 "is_javascript", 

16 "is_element", 

17 "is_event_handler", 

18] 

19 

20 

21def validate(node: All_Nodes) -> bool: 

22 """Validate a node based on attributes and type.""" 

23 

24 if hasattr(node, "children"): 

25 if not hasattr(node, "type"): 

26 raise AssertionError("Node should have a type") 

27 

28 if node.type not in ["root", "element"]: 

29 raise AssertionError( 

30 "Node should have a type of 'root' or 'element' to contain the 'children' attribute" 

31 ) 

32 

33 if not all(isinstance(child, All_Nodes) for child in node.children): 

34 raise AssertionError("Children must be a node type") 

35 

36 if hasattr(node, "properties"): 

37 if hasattr(node, "type") and node.type != "element": 

38 raise AssertionError("Node must be of type 'element' to contain 'properties'") 

39 

40 if not all(isinstance(node[prop], (int, str)) for prop in node.properties): 

41 raise AssertionError("Node 'properties' must be of type 'int' or 'str'") 

42 

43 if hasattr(node, "value") and not isinstance(node.value, str): 

44 raise AssertionError("Node 'value' must be of type 'str'") 

45 

46 return True 

47 

48 

49def parent(node: Root | Element) -> bool: 

50 """Validate a parent node based on attributes and type.""" 

51 if not issubclass(type(node), Parent): 

52 raise AssertionError( 

53 "Node must inherit from 'Parent'. 'Root' and 'Element' are most common." 

54 ) 

55 

56 if not hasattr(node, "children") or node.children is None: 

57 raise AssertionError("Parent nodes should have the 'children' attribute") 

58 

59 if node.type == "element" and (not hasattr(node, "properties") or node.properties is None): 

60 raise AssertionError("Parent element node shoudl have the 'properties' element.") 

61 

62 

63def literal(node: Text | Comment) -> bool: 

64 """Validate a literal node based on attributes.""" 

65 

66 if not issubclass(type(node), Literal): 

67 raise AssertionError( 

68 "Node must inherit from 'Literal'. 'Text' and 'Comment' are most common." 

69 ) 

70 

71 if not hasattr(node, "value") or not isinstance(node.value, str): 

72 raise AssertionError("Literal nodes 'value' type should be 'str'") 

73 

74 

75def generated(node: All_Nodes) -> bool: 

76 """Checks if a node has been generated. A node is concidered 

77 generated if it does not have a position. 

78 

79 Args: 

80 node (All_Nodes): Node to check for position with. 

81 

82 Returns: 

83 bool: Whether a node has a position or not. 

84 """ 

85 return not hasattr(node, "position") or node.position is None 

86 

87 

88def is_heading(node) -> bool: 

89 """Check if an element is a heading.""" 

90 

91 if node.type == "element": 

92 if match(r"h[1-6]", node.tag) is not None: 

93 return True 

94 return False 

95 raise TypeError("Node must be an element.") 

96 

97 

98def is_css_link(node) -> bool: 

99 """Check if an element is a `link` to a css file. 

100 

101 Returns `true` if `node` is a `<link>` element with a `rel` list that 

102 contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'` 

103 as its `type` 

104 """ 

105 

106 return ( 

107 # Verify it is a element with a `link` tag 

108 is_element(node, "link") 

109 # Must have a rel list with stylesheet 

110 and has_property(node, "rel") 

111 and "stylesheet" in split(r" ", sub(r" +", " ", node["rel"])) 

112 and ( 

113 # Can have a `type` of `text/css` or empty or no `type` 

114 not has_property(node, "type") 

115 or (has_property(node, "type") and (node["type"] == "text/css" or node["type"] == "")) 

116 ) 

117 ) 

118 

119 

120def is_css_style(node) -> bool: 

121 """Check if an element is a css `style` element. 

122 

123 Returns `true` if `node` is a `<style>` element that 

124 has no `type`, an empty `type`, or `'text/css'` as its `type`. 

125 """ 

126 

127 return is_element(node, "style") and ( 

128 not has_property(node, "type") 

129 or (has_property(node, "type") and (node["type"] == "" or node["type"] == "text/css")) 

130 ) 

131 

132 

133def is_javascript(node) -> bool: 

134 """Check if an element is a javascript `script` element. 

135 

136 Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no 

137 `type` and a valid JavaScript `language`, or has neither. 

138 """ 

139 return is_element(node, "script") and ( 

140 ( 

141 has_property(node, "type") 

142 and node["type"] in ["text/ecmascript", "text/javascript"] 

143 and not has_property(node, "language") 

144 ) 

145 or ( 

146 has_property(node, "language") 

147 and node["language"] in ["ecmascript", "javascript"] 

148 and not has_property(node, "type") 

149 ) 

150 or (not has_property(node, "type") and not has_property(node, "language")) 

151 ) 

152 

153 

154def is_element(node, *conditions: str | list) -> bool: 

155 """Checks if the given node is a certain element. 

156 

157 When providing a str it will check that the elements tag matches. 

158 If a list is provided it checks that one of the conditions in the list 

159 passes. 

160 """ 

161 

162 return bool( 

163 node.type == "element" 

164 and any( 

165 bool( 

166 (isinstance(condition, str) and node.tag == condition) 

167 or (isinstance(condition, list) and any(node.tag == nested for nested in condition)) 

168 ) 

169 for condition in conditions 

170 ) 

171 ) 

172 

173 

174def is_event_handler(attribute: str) -> bool: 

175 """Takes a attribute name and returns true if 

176 it starts with `on` and its length is `5` or more. 

177 """ 

178 return attribute.startswith("on") and len(attribute) >= 5 

179 

180 

181def has_property(node, attribute: str) -> bool: 

182 """Check to see if an element has a certain property in properties.""" 

183 if node.type == "element": 

184 if attribute in node.properties: 

185 return True 

186 return False 

187 raise TypeError("Node must be an element.") 

188 

189 

190def is_embedded(node: Element) -> bool: 

191 """Check to see if an element is an embedded element. 

192 

193 Embedded Elements: 

194 

195 * audio 

196 * canvas 

197 * embed 

198 * iframe 

199 * img 

200 * MathML math 

201 * object 

202 * picture 

203 * SVG svg 

204 * video 

205 

206 Returns: 

207 True if emedded 

208 """ 

209 # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video 

210 

211 return is_element( 

212 node, 

213 "audio", 

214 "canvas", 

215 "embed", 

216 "iframe", 

217 "img", 

218 "math", 

219 "object", 

220 "picture", 

221 "svg", 

222 "video", 

223 ) 

224 

225 

226def is_interactive(node: Element) -> bool: 

227 """Check if the element is intended for user interaction. 

228 

229 Conditions: 

230 

231 * a (if the href attribute is present) 

232 * audio (if the controls attribute is present) 

233 * button, details, embed, iframe, img (if the usemap attribute is present) 

234 * input (if the type attribute is not in the Hidden state) 

235 * label, select, text, area, video (if the controls attribute is present) 

236 

237 Returns: 

238 True if element is interactive 

239 """ 

240 

241 if is_element(node, "a"): 

242 return has_property(node, "href") 

243 

244 if is_element(node, "input"): 

245 return has_property(node, "type") and node["type"].lower() != "hidden" 

246 

247 if is_element(node, "button", "details", "embed", "iframe", "img"): 

248 return has_property(node, "usemap") 

249 

250 if is_element(node, "audio", "label", "select", "text", "area", "video"): 

251 return has_property(node, "controls") 

252 

253 return False 

254 

255 

256def is_phrasing(node: Element) -> bool: 

257 """Check if a node is phrasing text according to 

258 https://html.spec.whatwg.org/#phrasing-content-2. 

259 

260 Phrasing content is the text of the document, as well as elements that mark up that text at the 

261 intra-paragraph level. Runs of phrasing content form paragraphs. 

262 

263 * area (if it is a descendant of a map element) 

264 * link (if it is allowed in the body) 

265 * meta (if the itemprop attribute is present) 

266 * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, 

267 em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, 

268 picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, 

269 template, textarea, time, u, var, video, wbr, text (true) 

270 

271 Returns: 

272 True if the element is phrasing text 

273 """ 

274 

275 if isinstance(node, Text): 

276 return True 

277 

278 if is_element(node, "area"): 

279 return node.parent is not None and is_element(node.parent, "map") 

280 

281 if is_element(node, "meta"): 

282 return has_property(node, "itemprop") 

283 

284 if is_element(node, "link"): 

285 body_ok = [ 

286 "dns-prefetch", 

287 "modulepreload", 

288 "pingback", 

289 "preconnect", 

290 "prefetch", 

291 "preload", 

292 "prerender", 

293 "stylesheet", 

294 ] 

295 

296 return bool( 

297 has_property(node, "itemprop") 

298 or ( 

299 has_property(node, "rel") 

300 and all(token.strip() in body_ok for token in node["rel"].split(" ")) 

301 ) 

302 ) 

303 

304 if is_element( 

305 "node", 

306 "map", 

307 "mark", 

308 "math", 

309 "audio", 

310 "b", 

311 "bdi", 

312 "bdo", 

313 "br", 

314 "button", 

315 "canvas", 

316 "cite", 

317 "code", 

318 "data", 

319 "datalist", 

320 "del", 

321 "dfn", 

322 "em", 

323 "embed", 

324 "i", 

325 "iframe", 

326 "img", 

327 "input", 

328 "ins", 

329 "kbd", 

330 "label", 

331 "a", 

332 "abbr", 

333 "meter", 

334 "noscript", 

335 "object", 

336 "output", 

337 "picture", 

338 "progress", 

339 "q", 

340 "ruby", 

341 "s", 

342 "samp", 

343 "script", 

344 "select", 

345 "slot", 

346 "small", 

347 "span", 

348 "strong", 

349 "sub", 

350 "sup", 

351 "svg", 

352 "template", 

353 "textarea", 

354 "time", 

355 "u", 

356 "var", 

357 "video", 

358 "wbr", 

359 ): 

360 return True 

361 

362 return False