Coverage for src/validate.py: 33%
30 statements
« prev ^ index » next coverage.py v7.3.0, created at 2024-11-20 12:27 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2024-11-20 12:27 +0000
1import collections
2import html.parser
3import logging
4import pathlib
6logger = logging.getLogger('blog')
9Reference = collections.namedtuple('Reference', [
10 'attr',
11 'path',
12 'value',
13])
16class ReferenceParser(html.parser.HTMLParser):
17 def __init__(self, parent):
18 self.parent = pathlib.Path(parent)
19 self.references = []
20 super().__init__()
22 def handle_starttag(self, tag, attrs):
23 for attr in attrs:
24 key, val = attr
25 val = val or ''
26 if key not in ['src', 'href']:
27 continue
28 if not val.startswith('./'):
29 continue
30 self.references.append(Reference(
31 attr=key,
32 value=val,
33 path=(self.parent / val)
34 ))
37def validate_html_references(path: str | pathlib.Path) -> int:
38 """Validate an HTML file
39 """
41 path = pathlib.Path(path)
43 with path.open('r') as f:
44 content = f.read()
46 # check refs
47 checker = ReferenceParser(parent=path.parent)
48 checker.feed(content)
49 for reference in checker.references:
50 if not reference.path.is_file():
51 logger.warn('%s: %s reference not found: %s',
52 path.name, reference.attr, reference.value)
53 return len(checker.references)