Skip to content

Commit 5f00111

Browse files
author
Oliver Kandler
committed
new option to merge schema and necessary refactorings
1 parent 5a10100 commit 5f00111

3 files changed

Lines changed: 107 additions & 24 deletions

File tree

blurry/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ async def write_html_file(
105105
extra_context["sibling_pages"] = sibling_pages
106106
folder_in_build = convert_content_path_to_directory_in_build(file_data.path)
107107

108-
schema_type = file_data.front_matter.get("@type")
108+
schema_type = file_data.top_level_type
109109
if not schema_type:
110110
raise ValueError(
111111
f"Required @type value missing in file or TOML front matter invalid: "
@@ -193,9 +193,10 @@ async def build(release=True):
193193
file_data_by_directory[directory] = []
194194

195195
# Convert Markdown file to HTML
196-
body, front_matter = convert_markdown_file_to_html(filepath)
196+
body, front_matter, top_level_type = convert_markdown_file_to_html(filepath)
197197
file_data = MarkdownFileData(
198198
body=body,
199+
top_level_type=top_level_type,
199200
front_matter=front_matter,
200201
path=relative_filepath,
201202
)

blurry/markdown/__init__.py

Lines changed: 103 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
from typing import Any
33
from typing import TypeGuard
44

5+
import json
6+
from pyld import jsonld
7+
58
import mistune
69
from mistune import BlockState
710
from mistune.plugins.abbr import abbr
@@ -144,14 +147,109 @@ def is_blurry_renderer(
144147
+ [plugin.load() for plugin in discovered_markdown_plugins],
145148
)
146149

150+
SCHEMA_ORG = json.loads('{ "@vocab": "https://schema.org/" }')
151+
def jsonld_document_loader(secure=False, fragments=[], **kwargs):
152+
"""
153+
Create a Requests document loader.
154+
155+
Can be used to setup extra Requests args such as verify, cert, timeout,
156+
or others.
157+
158+
:param secure: require all requests to use HTTPS (default: False).
159+
:param fragments: the fragments of schema loaded as dicts
160+
:param **kwargs: extra keyword args for Requests get() call.
161+
162+
:return: the RemoteDocument loader function.
163+
"""
164+
from pyld.jsonld import JsonLdError
165+
166+
def loader(ignored, options={}):
167+
"""
168+
Retrieves JSON-LD from the dicts provided as fragments.
169+
170+
:param ignored: this positional paramter is ignored, because the tomls fragments are side loaded
171+
172+
:return: the RemoteDocument.
173+
"""
174+
fragments_str = []
175+
for fragment in fragments:
176+
if not fragment.get('@context'):
177+
fragment['@context'] = SCHEMA_ORG
178+
fragments_str.append(json.dumps(fragment))
179+
# print("==========================")
180+
# print(json.dumps(fragment, indent=2))
181+
182+
result = '[' + ','.join(fragments_str) + ']'
183+
# print(">>>>>>>>> ",result)
184+
185+
doc = {
186+
'contentType': 'application/ld+json',
187+
'contextUrl': None,
188+
'documentUrl': None,
189+
'document': result
190+
}
191+
return doc
192+
193+
return loader
147194

148-
def convert_markdown_file_to_html(filepath: Path) -> tuple[str, dict[str, Any]]:
195+
def add_inferred_schema(local_front_matter: dict, filepath: Path) -> dict:
149196
CONTENT_DIR = get_content_directory()
150197
THUMBNAIL_WIDTH = SETTINGS.get("THUMBNAIL_WIDTH")
198+
BUILD_DIR = get_build_directory()
199+
200+
# Add inferred/computed/relative values
201+
local_front_matter.update({"url": content_path_to_url(filepath.relative_to(CONTENT_DIR))})
202+
if image := local_front_matter.get("image"):
203+
image_path = filepath.parent / Path(image)
204+
local_front_matter["image"] = content_path_to_url(image_path)
205+
# Add thumbnail URL, using the full image if the thumbnail doesn't exist
206+
thumbnail_image_path = add_image_width_to_path(image_path, THUMBNAIL_WIDTH)
207+
thumbnail_image_build_path = BUILD_DIR / thumbnail_image_path.relative_to(
208+
CONTENT_DIR
209+
)
210+
if thumbnail_image_build_path.exists():
211+
local_front_matter["thumbnailUrl"] = build_path_to_url(thumbnail_image_build_path)
212+
else:
213+
local_front_matter["thumbnailUrl"] = local_front_matter["image"]
214+
return local_front_matter
215+
216+
def resolve_front_matter(state: dict, filepath: Path) -> tuple[dict[str, Any], str]:
217+
if SETTINGS.get("FRONT_MATTER_RESOLUTION") == "merge":
218+
try:
219+
global_schema = dict(SETTINGS.get("SCHEMA_DATA", {}))
220+
if not global_schema.get('@context'):
221+
global_schema['@context'] = SCHEMA_ORG
222+
223+
# print("-----")
224+
# print(json.dumps(global_schema, indent=2))
225+
local_schema = state.env.get("front_matter", {})
226+
top_level_type = local_schema.get("@type", None)
227+
if not local_schema.get('@context'):
228+
local_schema['@context'] = SCHEMA_ORG
229+
local_schema = add_inferred_schema(local_schema, filepath)
230+
# print("-----")
231+
# print(json.dumps(local_schema, indent=2))
232+
jsonld.set_document_loader(jsonld_document_loader(fragments=[global_schema, local_schema]))
233+
front_matter: dict[str, Any] = jsonld.compact("ignore", SCHEMA_ORG)
234+
# print("-----")
235+
# print(json.dumps(front_matter, indent=2))
236+
except Exception as e:
237+
print("merging front matter failed:", e)
238+
raise e
239+
else:
240+
# Seed front_matter with schema_data from config file
241+
front_matter: dict[str, Any] = dict(SETTINGS.get("SCHEMA_DATA", {}))
242+
front_matter.update(state.env.get("front_matter", {}))
243+
front_matter = add_inferred_schema(front_matter, filepath)
244+
245+
top_level_type = None
246+
return front_matter, top_level_type
247+
248+
249+
def convert_markdown_file_to_html(filepath: Path) -> tuple[str, dict[str, Any], str]:
151250
if not markdown.renderer:
152251
raise Exception("Blurry markdown renderer not set on Mistune Markdown instance")
153252

154-
BUILD_DIR = get_build_directory()
155253
# Add filepath to the renderer to resolve relative paths
156254
if not is_blurry_renderer(markdown.renderer):
157255
raise Exception(
@@ -164,27 +262,10 @@ def convert_markdown_file_to_html(filepath: Path) -> tuple[str, dict[str, Any]]:
164262
html, state = markdown.parse(markdown_text, state=state)
165263

166264
if not is_str(html):
167-
raise Exception(f"Expected html to be a string but got: {type(html)}")
265+
raise Exception(f"Expected html to be a string but got: {top_level_type(html)}")
168266

169267
# Post-process HTML
170268
html = remove_lazy_loading_from_first_image(html)
171269

172-
# Seed front_matter with schema_data from config file
173-
front_matter: dict[str, Any] = dict(SETTINGS.get("SCHEMA_DATA", {}))
174-
front_matter.update(state.env.get("front_matter", {}))
175-
176-
# Add inferred/computed/relative values
177-
front_matter.update({"url": content_path_to_url(filepath.relative_to(CONTENT_DIR))})
178-
if image := front_matter.get("image"):
179-
image_path = filepath.parent / Path(image)
180-
front_matter["image"] = content_path_to_url(image_path)
181-
# Add thumbnail URL, using the full image if the thumbnail doesn't exist
182-
thumbnail_image_path = add_image_width_to_path(image_path, THUMBNAIL_WIDTH)
183-
thumbnail_image_build_path = BUILD_DIR / thumbnail_image_path.relative_to(
184-
CONTENT_DIR
185-
)
186-
if thumbnail_image_build_path.exists():
187-
front_matter["thumbnailUrl"] = build_path_to_url(thumbnail_image_build_path)
188-
else:
189-
front_matter["thumbnailUrl"] = front_matter["image"]
190-
return html, front_matter
270+
front_matter, top_level_type = resolve_front_matter(state, filepath)
271+
return html, front_matter, top_level_type

blurry/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
@dataclass
99
class MarkdownFileData:
1010
body: str
11+
top_level_type: str
1112
front_matter: dict[str, Any]
1213
path: Path
1314

0 commit comments

Comments
 (0)