22from typing import Any
33from typing import TypeGuard
44
5+ import json
6+ from pyld import jsonld
7+
58import mistune
69from mistune import BlockState
710from mistune .plugins .abbr import abbr
@@ -144,14 +147,109 @@ def is_blurry_renderer(
144147 + [plugin .load () for plugin in discovered_markdown_plugins ],
145148)
146149
150+ SCHEMA_ORG = json .loads ('{ "@vocab": "https://schema.org/" }' )
151+ def jsonld_document_loader (secure = False , fragments = [], ** kwargs ):
152+ """
153+ Create a Requests document loader.
154+
155+ Can be used to setup extra Requests args such as verify, cert, timeout,
156+ or others.
157+
158+ :param secure: require all requests to use HTTPS (default: False).
159+ :param fragments: the fragments of schema loaded as dicts
160+ :param **kwargs: extra keyword args for Requests get() call.
161+
162+ :return: the RemoteDocument loader function.
163+ """
164+ from pyld .jsonld import JsonLdError
165+
166+ def loader (ignored , options = {}):
167+ """
168+ Retrieves JSON-LD from the dicts provided as fragments.
169+
170+ :param ignored: this positional paramter is ignored, because the tomls fragments are side loaded
171+
172+ :return: the RemoteDocument.
173+ """
174+ fragments_str = []
175+ for fragment in fragments :
176+ if not fragment .get ('@context' ):
177+ fragment ['@context' ] = SCHEMA_ORG
178+ fragments_str .append (json .dumps (fragment ))
179+ # print("==========================")
180+ # print(json.dumps(fragment, indent=2))
181+
182+ result = '[' + ',' .join (fragments_str ) + ']'
183+ # print(">>>>>>>>> ",result)
184+
185+ doc = {
186+ 'contentType' : 'application/ld+json' ,
187+ 'contextUrl' : None ,
188+ 'documentUrl' : None ,
189+ 'document' : result
190+ }
191+ return doc
192+
193+ return loader
147194
148- def convert_markdown_file_to_html ( filepath : Path ) -> tuple [ str , dict [ str , Any ]] :
195+ def add_inferred_schema ( local_front_matter : dict , filepath : Path ) -> dict :
149196 CONTENT_DIR = get_content_directory ()
150197 THUMBNAIL_WIDTH = SETTINGS .get ("THUMBNAIL_WIDTH" )
198+ BUILD_DIR = get_build_directory ()
199+
200+ # Add inferred/computed/relative values
201+ local_front_matter .update ({"url" : content_path_to_url (filepath .relative_to (CONTENT_DIR ))})
202+ if image := local_front_matter .get ("image" ):
203+ image_path = filepath .parent / Path (image )
204+ local_front_matter ["image" ] = content_path_to_url (image_path )
205+ # Add thumbnail URL, using the full image if the thumbnail doesn't exist
206+ thumbnail_image_path = add_image_width_to_path (image_path , THUMBNAIL_WIDTH )
207+ thumbnail_image_build_path = BUILD_DIR / thumbnail_image_path .relative_to (
208+ CONTENT_DIR
209+ )
210+ if thumbnail_image_build_path .exists ():
211+ local_front_matter ["thumbnailUrl" ] = build_path_to_url (thumbnail_image_build_path )
212+ else :
213+ local_front_matter ["thumbnailUrl" ] = local_front_matter ["image" ]
214+ return local_front_matter
215+
216+ def resolve_front_matter (state : dict , filepath : Path ) -> tuple [dict [str , Any ], str ]:
217+ if SETTINGS .get ("FRONT_MATTER_RESOLUTION" ) == "merge" :
218+ try :
219+ global_schema = dict (SETTINGS .get ("SCHEMA_DATA" , {}))
220+ if not global_schema .get ('@context' ):
221+ global_schema ['@context' ] = SCHEMA_ORG
222+
223+ # print("-----")
224+ # print(json.dumps(global_schema, indent=2))
225+ local_schema = state .env .get ("front_matter" , {})
226+ top_level_type = local_schema .get ("@type" , None )
227+ if not local_schema .get ('@context' ):
228+ local_schema ['@context' ] = SCHEMA_ORG
229+ local_schema = add_inferred_schema (local_schema , filepath )
230+ # print("-----")
231+ # print(json.dumps(local_schema, indent=2))
232+ jsonld .set_document_loader (jsonld_document_loader (fragments = [global_schema , local_schema ]))
233+ front_matter : dict [str , Any ] = jsonld .compact ("ignore" , SCHEMA_ORG )
234+ # print("-----")
235+ # print(json.dumps(front_matter, indent=2))
236+ except Exception as e :
237+ print ("merging front matter failed:" , e )
238+ raise e
239+ else :
240+ # Seed front_matter with schema_data from config file
241+ front_matter : dict [str , Any ] = dict (SETTINGS .get ("SCHEMA_DATA" , {}))
242+ front_matter .update (state .env .get ("front_matter" , {}))
243+ front_matter = add_inferred_schema (front_matter , filepath )
244+
245+ top_level_type = None
246+ return front_matter , top_level_type
247+
248+
249+ def convert_markdown_file_to_html (filepath : Path ) -> tuple [str , dict [str , Any ], str ]:
151250 if not markdown .renderer :
152251 raise Exception ("Blurry markdown renderer not set on Mistune Markdown instance" )
153252
154- BUILD_DIR = get_build_directory ()
155253 # Add filepath to the renderer to resolve relative paths
156254 if not is_blurry_renderer (markdown .renderer ):
157255 raise Exception (
@@ -164,27 +262,10 @@ def convert_markdown_file_to_html(filepath: Path) -> tuple[str, dict[str, Any]]:
164262 html , state = markdown .parse (markdown_text , state = state )
165263
166264 if not is_str (html ):
167- raise Exception (f"Expected html to be a string but got: { type (html )} " )
265+ raise Exception (f"Expected html to be a string but got: { top_level_type (html )} " )
168266
169267 # Post-process HTML
170268 html = remove_lazy_loading_from_first_image (html )
171269
172- # Seed front_matter with schema_data from config file
173- front_matter : dict [str , Any ] = dict (SETTINGS .get ("SCHEMA_DATA" , {}))
174- front_matter .update (state .env .get ("front_matter" , {}))
175-
176- # Add inferred/computed/relative values
177- front_matter .update ({"url" : content_path_to_url (filepath .relative_to (CONTENT_DIR ))})
178- if image := front_matter .get ("image" ):
179- image_path = filepath .parent / Path (image )
180- front_matter ["image" ] = content_path_to_url (image_path )
181- # Add thumbnail URL, using the full image if the thumbnail doesn't exist
182- thumbnail_image_path = add_image_width_to_path (image_path , THUMBNAIL_WIDTH )
183- thumbnail_image_build_path = BUILD_DIR / thumbnail_image_path .relative_to (
184- CONTENT_DIR
185- )
186- if thumbnail_image_build_path .exists ():
187- front_matter ["thumbnailUrl" ] = build_path_to_url (thumbnail_image_build_path )
188- else :
189- front_matter ["thumbnailUrl" ] = front_matter ["image" ]
190- return html , front_matter
270+ front_matter , top_level_type = resolve_front_matter (state , filepath )
271+ return html , front_matter , top_level_type
0 commit comments