Coverage for src/models/page.py: 50%

111 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2024-12-21 12:23 +0000

1import collections 

2import datetime 

3import functools 

4import logging 

5import pathlib 

6import re 

7import xml.etree.ElementTree 

8 

9from src.template import template_env, render_template 

10from src import xml, validate 

11 

12logger = logging.getLogger('blog') 

13 

14 

15def load_entries(entries_dir='./entries'): 

16 """ 

17 Load a list of journal entries as `Page` objects. Order the 

18 list starting with the latest entry first. 

19 

20 ```python 

21 entries = src.load_entries() 

22 ``` 

23 """ 

24 entries = [] 

25 

26 entry_paths = list(sorted(pathlib.Path(entries_dir).glob('*.html'))) 

27 

28 # get pagination map 

29 pagination = paginate_entries(entry_paths) 

30 

31 for path in entry_paths: 

32 entries.append(Page( 

33 path, 

34 next_page=pagination[path.name].next, 

35 previous_page=pagination[path.name].previous 

36 )) 

37 

38 # sort latest first 

39 return sorted(entries, reverse=True, key=lambda e: e.date) 

40 

41 

42def load_pages(pages_dir='./pages'): 

43 """ 

44 Fetches a list of website pages as `Page` objects. 

45 

46 ```python 

47 pages = src.load_pages() 

48 ``` 

49 """ 

50 pages = pathlib.Path(pages_dir).glob('*.*') 

51 pages = map(Page, pages) 

52 return sorted(pages, key=lambda p: p.filename) 

53 

54 

55class Page: 

56 """ 

57 A website page. Can be either a normal page, or a journal entry. 

58 """ 

59 

60 def __init__(self, path: pathlib.Path, next_page=None, previous_page=None): 

61 """ 

62 `path` should be a pathlib Path. 

63 

64 `next_page` and `previous_page` can be filenames, if 

65 pagination should be enabled. 

66 """ 

67 

68 self.path = pathlib.Path(path) 

69 

70 self._next = next_page 

71 self._previous = previous_page 

72 

73 @property 

74 def filename(self): 

75 """ 

76 Page filename, e.g. `index.html`. 

77 

78 The file extension will always be `.html`, so even if the 

79 source page is rendered from a template, this suffix will be 

80 removed. 

81 """ 

82 if self.path.suffix == '.j2': 

83 return self.path.name[:-3] 

84 return self.path.name 

85 

86 @property 

87 def is_entry(self) -> bool: 

88 """ 

89 `True` if the page is a journal entry, False if it's just a 

90 normal Page. 

91 """ 

92 entry_dir = pathlib.Path('./entries') 

93 return entry_dir in self.path.parents 

94 

95 @property 

96 def date(self) -> datetime.datetime: 

97 """ 

98 Page date, as parsed from the filename. 

99 """ 

100 return datetime.datetime.strptime(self.path.stem, '%Y-%m-%d') 

101 

102 @functools.cached_property 

103 def metadata(self) -> dict: 

104 """ 

105 Metadata embedded in the page. This is read from special HTML 

106 comments. 

107 

108 A page with this header: 

109 

110 ```html 

111 <!-- meta:title a walk in the park --> 

112 <!-- meta:description I take a nice walk in the park --> 

113 ``` 

114 

115 Will yield this metadata: 

116 

117 ```python 

118 { 

119 'title': 'a walk in the park', 

120 'description': 'I take a nice walk in the park.', 

121 } 

122 ``` 

123 

124 For performance, this information is only read once, then 

125 cached in memory during website build. 

126 """ 

127 with self.path.open('r') as f: 

128 return parse_metadata(f.read()) 

129 

130 @property 

131 def title(self): 

132 if self.is_entry: 

133 return self.date.strftime('%A, %B %-d %Y') 

134 else: 

135 return self.get('title') 

136 

137 @property 

138 def description(self): 

139 if self.is_entry: 

140 return self.metadata['title'].replace("'", '') 

141 else: 

142 return self.metadata.get('description') 

143 

144 @property 

145 def banner(self): 

146 return self.metadata.get('banner') 

147 

148 @property 

149 def next(self): 

150 """Next `Page` object, if paginated.""" 

151 return self._next 

152 

153 @property 

154 def previous(self): 

155 """Previous `Page` object, if paginated.""" 

156 return self._previous 

157 

158 @property 

159 def href(self): 

160 """ 

161 The `href` html value that points to the image. 

162 

163 Can be used in templates like so: 

164 

165 ```html 

166 <a href="{{ page.href }}">...</a> 

167 ``` 

168 """ 

169 return f'./{self.filename}' 

170 

171 def render(self, context: dict) -> str: 

172 """ 

173 Render the complete content for a page. 

174 """ 

175 # add current page to context 

176 context['page'] = self 

177 

178 # build inner content 

179 if self.path.name.endswith('.j2'): 

180 # page is a template, so render it 

181 with self.path.open('r') as f: 

182 tmpl = template_env.from_string(f.read()) 

183 content = tmpl.render(**context) 

184 else: 

185 # page isn't a template, so just read it 

186 with self.path.open('r') as f: 

187 content = f.read() 

188 

189 # now, wrap that content in the base template 

190 context['content'] = content.strip() 

191 content = render_template('base.html.j2', context=context).strip() 

192 

193 # prettify the markup 

194 try: 

195 return xml.prettify(content) 

196 except xml.ParseError as e: 

197 logger.error('cannot parse %s: %s', self.filename, e) 

198 return content 

199 

200 def write(self, context: dict): 

201 """ 

202 Write the page to the www directory. 

203 """ 

204 target = pathlib.Path(f'./www/{self.filename}') 

205 content = self.render(context) 

206 with target.open('w') as f: 

207 f.write(content) 

208 

209 def extract_links(self) -> list[pathlib.Path]: 

210 """ 

211 Returns a list of href or src values. 

212 """ 

213 results = [] 

214 

215 # make a ReferenceParser 

216 parser = validate.ReferenceParser(parent='./www/') 

217 

218 # feed content to parser 

219 with self.path.open('r') as f: 

220 parser.feed(f.read()) 

221 

222 # collect all the links 

223 for reference in parser.references: 

224 results.append(reference.path) 

225 

226 return results 

227 

228 

229Pagination = collections.namedtuple('Pagination', ['next', 'previous']) 

230 

231 

232def paginate_entries(files=[]) -> dict: 

233 pagination = {} 

234 

235 for i, this_file in enumerate(files): 

236 kwargs = {} 

237 

238 if i > 0: 

239 kwargs['previous'] = files[i - 1].name 

240 else: 

241 kwargs['previous'] = None 

242 

243 try: 

244 kwargs['next'] = files[i + 1].name 

245 except IndexError: 

246 kwargs['next'] = None 

247 

248 pagination[this_file.name] = Pagination(**kwargs) 

249 

250 return pagination 

251 

252 

253def parse_metadata(content: str) -> dict: 

254 metadata = re.compile( 

255 r'^\s?<!--\s?meta:(?P<key>[A-za-z]+)\s?(?P<value>.*)\s?-->$', 

256 re.MULTILINE) 

257 metadata = [(k, v) for k, v in metadata.findall(content)] 

258 metadata = dict([(k.strip(), v.strip()) for k, v in metadata]) 

259 return metadata