Coverage for phml\validate\validate.py: 100%

82 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-08 16:33 -0600

1from re import match, split, sub 

2 

3from phml.nodes import All_Nodes, Comment, Element, Literal, Parent, Root, Text 

4 

5__all__ = [ 

6 "validate", 

7 "parent", 

8 "literal", 

9 "generated", 

10 "has_property", 

11 "is_heading", 

12 "is_css_link", 

13 "is_css_style", 

14 "is_javascript", 

15 "is_element", 

16 "is_embedded", 

17 "is_interactive", 

18 "is_phrasing", 

19 "is_event_handler", 

20] 

21 

22 

23def validate(node: All_Nodes) -> bool: 

24 """Validate a node based on attributes and type.""" 

25 

26 if hasattr(node, "children"): 

27 if not hasattr(node, "type"): 

28 raise AssertionError("Node should have a type") 

29 

30 if node.type not in ["root", "element"]: 

31 raise AssertionError( 

32 "Node should have a type of 'root' or 'element' to contain the 'children' attribute" 

33 ) 

34 

35 if not all(isinstance(child, All_Nodes) for child in node.children): 

36 raise AssertionError("Children must be a node type") 

37 

38 if hasattr(node, "properties"): 

39 if hasattr(node, "type") and node.type != "element": 

40 raise AssertionError("Node must be of type 'element' to contain 'properties'") 

41 

42 if not all(isinstance(node[prop], (int, str)) for prop in node.properties): 

43 raise AssertionError("Node 'properties' must be of type 'int' or 'str'") 

44 

45 if hasattr(node, "value") and not isinstance(node.value, str): 

46 raise AssertionError("Node 'value' must be of type 'str'") 

47 

48 return True 

49 

50 

51def parent(node: Root | Element) -> bool: 

52 """Validate a parent node based on attributes and type.""" 

53 if not issubclass(type(node), Parent): 

54 raise AssertionError( 

55 "Node must inherit from 'Parent'. 'Root' and 'Element' are most common." 

56 ) 

57 

58 if not hasattr(node, "children") or node.children is None: 

59 raise AssertionError("Parent nodes should have the 'children' attribute") 

60 

61 if node.type == "element" and (not hasattr(node, "properties") or node.properties is None): 

62 raise AssertionError("Parent element node shoudl have the 'properties' element.") 

63 

64 

65def literal(node: Text | Comment) -> bool: 

66 """Validate a literal node based on attributes.""" 

67 

68 if not issubclass(type(node), Literal): 

69 raise AssertionError( 

70 "Node must inherit from 'Literal'. 'Text' and 'Comment' are most common." 

71 ) 

72 

73 if not hasattr(node, "value") or not isinstance(node.value, str): 

74 raise AssertionError("Literal nodes 'value' type should be 'str'") 

75 

76 

77def generated(node: All_Nodes) -> bool: 

78 """Checks if a node has been generated. A node is concidered 

79 generated if it does not have a position. 

80 

81 Args: 

82 node (All_Nodes): Node to check for position with. 

83 

84 Returns: 

85 bool: Whether a node has a position or not. 

86 """ 

87 return not hasattr(node, "position") or node.position is None 

88 

89 

90def is_heading(node) -> bool: 

91 """Check if an element is a heading.""" 

92 

93 if node.type == "element": 

94 if match(r"h[1-6]", node.tag) is not None: 

95 return True 

96 return False 

97 raise TypeError("Node must be an element.") 

98 

99 

100def is_css_link(node) -> bool: 

101 """Check if an element is a `link` to a css file. 

102 

103 Returns `true` if `node` is a `<link>` element with a `rel` list that 

104 contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'` 

105 as its `type` 

106 """ 

107 

108 return ( 

109 # Verify it is a element with a `link` tag 

110 is_element(node, "link") 

111 # Must have a rel list with stylesheet 

112 and has_property(node, "rel") 

113 and "stylesheet" in split(r" ", sub(r" +", " ", node["rel"])) 

114 and ( 

115 # Can have a `type` of `text/css` or empty or no `type` 

116 not has_property(node, "type") 

117 or (has_property(node, "type") and (node["type"] == "text/css" or node["type"] == "")) 

118 ) 

119 ) 

120 

121 

122def is_css_style(node) -> bool: 

123 """Check if an element is a css `style` element. 

124 

125 Returns `true` if `node` is a `<style>` element that 

126 has no `type`, an empty `type`, or `'text/css'` as its `type`. 

127 """ 

128 

129 return is_element(node, "style") and ( 

130 not has_property(node, "type") 

131 or (has_property(node, "type") and (node["type"] == "" or node["type"] == "text/css")) 

132 ) 

133 

134 

135def is_javascript(node) -> bool: 

136 """Check if an element is a javascript `script` element. 

137 

138 Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no 

139 `type` and a valid JavaScript `language`, or has neither. 

140 """ 

141 return is_element(node, "script") and ( 

142 ( 

143 has_property(node, "type") 

144 and node["type"] in ["text/ecmascript", "text/javascript"] 

145 and not has_property(node, "language") 

146 ) 

147 or ( 

148 has_property(node, "language") 

149 and node["language"] in ["ecmascript", "javascript"] 

150 and not has_property(node, "type") 

151 ) 

152 or (not has_property(node, "type") and not has_property(node, "language")) 

153 ) 

154 

155 

156def is_element(node, *conditions: str | list) -> bool: 

157 """Checks if the given node is a certain element. 

158 

159 When providing a str it will check that the elements tag matches. 

160 If a list is provided it checks that one of the conditions in the list 

161 passes. 

162 """ 

163 

164 return bool( 

165 node.type == "element" 

166 and any( 

167 bool( 

168 (isinstance(condition, str) and node.tag == condition) 

169 or (isinstance(condition, list) and any(node.tag == nested for nested in condition)) 

170 ) 

171 for condition in conditions 

172 ) 

173 ) 

174 

175 

176def is_event_handler(attribute: str) -> bool: 

177 """Takes a attribute name and returns true if 

178 it starts with `on` and its length is `5` or more. 

179 """ 

180 return attribute.startswith("on") and len(attribute) >= 5 

181 

182 

183def has_property(node, attribute: str) -> bool: 

184 """Check to see if an element has a certain property in properties.""" 

185 if node.type == "element": 

186 if attribute in node.properties: 

187 return True 

188 return False 

189 raise TypeError("Node must be an element.") 

190 

191 

192def is_embedded(node: Element) -> bool: 

193 """Check to see if an element is an embedded element. 

194 

195 Embedded Elements: 

196 

197 * audio 

198 * canvas 

199 * embed 

200 * iframe 

201 * img 

202 * MathML math 

203 * object 

204 * picture 

205 * SVG svg 

206 * video 

207 

208 Returns: 

209 True if emedded 

210 """ 

211 # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video 

212 

213 return is_element( 

214 node, 

215 "audio", 

216 "canvas", 

217 "embed", 

218 "iframe", 

219 "img", 

220 "math", 

221 "object", 

222 "picture", 

223 "svg", 

224 "video", 

225 ) 

226 

227 

228def is_interactive(node: Element) -> bool: 

229 """Check if the element is intended for user interaction. 

230 

231 Conditions: 

232 

233 * a (if the href attribute is present) 

234 * audio (if the controls attribute is present) 

235 * button, details, embed, iframe, img (if the usemap attribute is present) 

236 * input (if the type attribute is not in the Hidden state) 

237 * label, select, text, area, video (if the controls attribute is present) 

238 

239 Returns: 

240 True if element is interactive 

241 """ 

242 

243 if is_element(node, "a"): 

244 return has_property(node, "href") 

245 

246 if is_element(node, "input"): 

247 return has_property(node, "type") and node["type"].lower() != "hidden" 

248 

249 if is_element(node, "img"): 

250 return has_property(node, "usemap") and node["usemap"] 

251 

252 if is_element(node, "video"): 

253 return has_property(node, "controls") 

254 

255 if is_element(node, "button", "details", "embed", "iframe", "label", "select", "textarea"): 

256 return True 

257 

258 return False 

259 

260 

261def is_phrasing(node: Element) -> bool: 

262 """Check if a node is phrasing text according to 

263 https://html.spec.whatwg.org/#phrasing-content-2. 

264 

265 Phrasing content is the text of the document, as well as elements that mark up that text at the 

266 intra-paragraph level. Runs of phrasing content form paragraphs. 

267 

268 * area (if it is a descendant of a map element) 

269 * link (if it is allowed in the body) 

270 * meta (if the itemprop attribute is present) 

271 * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, 

272 em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, 

273 picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, 

274 template, textarea, time, u, var, video, wbr, text (true) 

275 

276 Returns: 

277 True if the element is phrasing text 

278 """ 

279 

280 if isinstance(node, Text): 

281 return True 

282 

283 if is_element(node, "area"): 

284 return node.parent is not None and is_element(node.parent, "map") 

285 

286 if is_element(node, "meta"): 

287 return has_property(node, "itemprop") 

288 

289 if is_element(node, "link"): 

290 body_ok = [ 

291 "dns-prefetch", 

292 "modulepreload", 

293 "pingback", 

294 "preconnect", 

295 "prefetch", 

296 "preload", 

297 "prerender", 

298 "stylesheet", 

299 ] 

300 

301 return bool( 

302 has_property(node, "itemprop") 

303 or ( 

304 has_property(node, "rel") 

305 and all(token.strip() in body_ok for token in node["rel"].split(" ")) 

306 ) 

307 ) 

308 

309 if is_element( 

310 node, 

311 "node", 

312 "map", 

313 "mark", 

314 "math", 

315 "audio", 

316 "b", 

317 "bdi", 

318 "bdo", 

319 "br", 

320 "button", 

321 "canvas", 

322 "cite", 

323 "code", 

324 "data", 

325 "datalist", 

326 "del", 

327 "dfn", 

328 "em", 

329 "embed", 

330 "i", 

331 "iframe", 

332 "img", 

333 "input", 

334 "ins", 

335 "kbd", 

336 "label", 

337 "a", 

338 "abbr", 

339 "meter", 

340 "noscript", 

341 "object", 

342 "output", 

343 "picture", 

344 "progress", 

345 "q", 

346 "ruby", 

347 "s", 

348 "samp", 

349 "script", 

350 "select", 

351 "slot", 

352 "small", 

353 "span", 

354 "strong", 

355 "sub", 

356 "sup", 

357 "svg", 

358 "template", 

359 "textarea", 

360 "time", 

361 "u", 

362 "var", 

363 "video", 

364 "wbr", 

365 ): 

366 return True 

367 

368 return False