Coverage for src/models/page.py: 50%

1import collections

2import datetime

3import functools

4import logging

5import pathlib

6import re

7import xml.etree.ElementTree

9from src.template import template_env, render_template

10from src import xml, validate

12logger = logging.getLogger('blog')

15def load_entries(entries_dir='./entries'):

16 """

17 Load a list of journal entries as `Page` objects. Order the

18 list starting with the latest entry first.

20 ```python

21 entries = src.load_entries()

22 ```

23 """

24 entries = []

26 entry_paths = list(sorted(pathlib.Path(entries_dir).glob('*.html')))

28 # get pagination map

29 pagination = paginate_entries(entry_paths)

31 for path in entry_paths:

32 entries.append(Page(

33 path,

34 next_page=pagination[path.name].next,

35 previous_page=pagination[path.name].previous

36 ))

38 # sort latest first

39 return sorted(entries, reverse=True, key=lambda e: e.date)

42def load_pages(pages_dir='./pages'):

43 """

44 Fetches a list of website pages as `Page` objects.

46 ```python

47 pages = src.load_pages()

48 ```

49 """

50 pages = pathlib.Path(pages_dir).glob('*.*')

51 pages = map(Page, pages)

52 return sorted(pages, key=lambda p: p.filename)

55class Page:

56 """

57 A website page. Can be either a normal page, or a journal entry.

58 """

60 def __init__(self, path: pathlib.Path, next_page=None, previous_page=None):

61 """

62 `path` should be a pathlib Path.

64 `next_page` and `previous_page` can be filenames, if

65 pagination should be enabled.

66 """

68 self.path = pathlib.Path(path)

70 self._next = next_page

71 self._previous = previous_page

73 @property

74 def filename(self):

75 """

76 Page filename, e.g. `index.html`.

78 The file extension will always be `.html`, so even if the

79 source page is rendered from a template, this suffix will be

80 removed.

81 """

82 if self.path.suffix == '.j2':

83 return self.path.name[:-3]

84 return self.path.name

86 @property

87 def is_entry(self) -> bool:

88 """

89 `True` if the page is a journal entry, False if it's just a

90 normal Page.

91 """

92 entry_dir = pathlib.Path('./entries')

93 return entry_dir in self.path.parents

95 @property

96 def date(self) -> datetime.datetime:

97 """

98 Page date, as parsed from the filename.

99 """

100 return datetime.datetime.strptime(self.path.stem, '%Y-%m-%d')

101

102 @functools.cached_property

103 def metadata(self) -> dict:

104 """

105 Metadata embedded in the page. This is read from special HTML

106 comments.

107

108 A page with this header:

109

110 ```html

111

112

113 ```

114

115 Will yield this metadata:

116

117 ```python

118 {

119 'title': 'a walk in the park',

120 'description': 'I take a nice walk in the park.',

121 }

122 ```

123

124 For performance, this information is only read once, then

125 cached in memory during website build.

126 """

127 with self.path.open('r') as f:

128 return parse_metadata(f.read())

129

130 @property

131 def title(self):

132 if self.is_entry:

133 return self.date.strftime('%A, %B %-d %Y')

134 else:

135 return self.get('title')

136

137 @property

138 def description(self):

139 if self.is_entry:

140 return self.metadata['title'].replace("'", '')

141 else:

142 return self.metadata.get('description')

143

144 @property

145 def banner(self):

146 return self.metadata.get('banner')

147

148 @property

149 def next(self):

150 """Next `Page` object, if paginated."""

151 return self._next

152

153 @property

154 def previous(self):

155 """Previous `Page` object, if paginated."""

156 return self._previous

157

158 @property

159 def href(self):

160 """

161 The `href` html value that points to the image.

162

163 Can be used in templates like so:

164

165 ```html

166 <a href="{{ page.href }}">...</a>

167 ```

168 """

169 return f'./{self.filename}'

170

171 def render(self, context: dict) -> str:

172 """

173 Render the complete content for a page.

174 """

175 # add current page to context

176 context['page'] = self

177

178 # build inner content

179 if self.path.name.endswith('.j2'):

180 # page is a template, so render it

181 with self.path.open('r') as f:

182 tmpl = template_env.from_string(f.read())

183 content = tmpl.render(**context)

184 else:

185 # page isn't a template, so just read it

186 with self.path.open('r') as f:

187 content = f.read()

188

189 # now, wrap that content in the base template

190 context['content'] = content.strip()

191 content = render_template('base.html.j2', context=context).strip()

192

193 # prettify the markup

194 try:

195 return xml.prettify(content)

196 except xml.ParseError as e:

197 logger.error('cannot parse %s: %s', self.filename, e)

198 return content

199

200 def write(self, context: dict):

201 """

202 Write the page to the www directory.

203 """

204 target = pathlib.Path(f'./www/{self.filename}')

205 content = self.render(context)

206 with target.open('w') as f:

207 f.write(content)

208

209 def extract_links(self) -> list[pathlib.Path]:

210 """

211 Returns a list of href or src values.

212 """

213 results = []

214

215 # make a ReferenceParser

216 parser = validate.ReferenceParser(parent='./www/')

217

218 # feed content to parser

219 with self.path.open('r') as f:

220 parser.feed(f.read())

221

222 # collect all the links

223 for reference in parser.references:

224 results.append(reference.path)

225

226 return results

227

228

229Pagination = collections.namedtuple('Pagination', ['next', 'previous'])

230

231

232def paginate_entries(files=[]) -> Pagination:

233 pagination = {}

234

235 for i, this_file in enumerate(files):

236 kwargs = {}

237

238 if i > 0:

239 kwargs['previous'] = files[i - 1].name

240 else:

241 kwargs['previous'] = None

242

243 try:

244 kwargs['next'] = files[i + 1].name

245 except IndexError:

246 kwargs['next'] = None

247

248 pagination[this_file.name] = Pagination(**kwargs)

249

250 return pagination

251

252

253def parse_metadata(content: str) -> dict:

254 metadata = re.compile(

255 r'^\s?$',

256 re.MULTILINE)

257 metadata = [(k, v) for k, v in metadata.findall(content)]

258 metadata = dict([(k.strip(), v.strip()) for k, v in metadata])

259 return metadata