Coverage for src/validate.py: 33%

30 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2024-10-23 12:26 +0000

1import collections 

2import html.parser 

3import logging 

4import pathlib 

5 

6logger = logging.getLogger('blog') 

7 

8 

9Reference = collections.namedtuple('Reference', [ 

10 'attr', 

11 'path', 

12 'value', 

13]) 

14 

15 

16class ReferenceParser(html.parser.HTMLParser): 

17 def __init__(self, parent): 

18 self.parent = pathlib.Path(parent) 

19 self.references = [] 

20 super().__init__() 

21 

22 def handle_starttag(self, tag, attrs): 

23 for attr in attrs: 

24 key, val = attr 

25 val = val or '' 

26 if key not in ['src', 'href']: 

27 continue 

28 if not val.startswith('./'): 

29 continue 

30 self.references.append(Reference( 

31 attr=key, 

32 value=val, 

33 path=(self.parent / val) 

34 )) 

35 

36 

37def validate_html_references(path: str | pathlib.Path) -> int: 

38 """Validate an HTML file 

39 """ 

40 

41 path = pathlib.Path(path) 

42 

43 with path.open('r') as f: 

44 content = f.read() 

45 

46 # check refs 

47 checker = ReferenceParser(parent=path.parent) 

48 checker.feed(content) 

49 for reference in checker.references: 

50 if not reference.path.is_file(): 

51 logger.warn('%s: %s reference not found: %s', 

52 path.name, reference.attr, reference.value) 

53 return len(checker.references)