File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -27,28 +27,40 @@ def define_link_data(usernames):
2727 logging .info (f"{ e } ---define_link_data" )
2828 return []
2929
30+
3031def remove_unmatched_tags (text ):
3132 try :
32- # Remove unmatched closing tags at the beginning of the string
33+ # Remove unmatched closing tags at the beginning of the string
3334 text = re .sub (r'^\s*</[^>]+>\s*' , '' , text )
34-
3535 # Regex pattern to find matched or unmatched tags
36- pattern = re .compile (r'(<([^>]+)>.*?</\2>)|(<[^/][^>]*>.*)' , re .DOTALL )
36+ pattern = re .compile (r'(<([^>]+)>.*?</\2>)|(<[^/][^>]*>.*?)(?=<[^/][^>]*>|$ )' , re .DOTALL )
3737 matches = pattern .findall (text )
38-
38+
3939 cleaned_text = ''
40+ open_tags = []
41+
4042 for match in matches :
4143 if match [0 ]: # Full matched <tag>...</tag> pairs
4244 cleaned_text += match [0 ]
4345 elif match [2 ]: # Unmatched opening <tag> tags
46+ # Add the tag to the list of open tags
47+ tag = re .match (r'<([^/][^>]*)>' , match [2 ])
48+ if tag :
49+ tag_name = tag .group (1 ).split ()[0 ]
50+ open_tags .append (tag_name )
4451 cleaned_text += match [2 ]
45-
52+
53+ # Close any unmatched opening tags
54+ while open_tags :
55+ tag = open_tags .pop ()
56+ cleaned_text += f'</{ tag } >'
57+
4658 return cleaned_text
59+
4760 except Exception as e :
4861 print (e )
4962 return text
5063
51-
5264
5365
5466def week_data_formatter (html_content , type ):
You can’t perform that action at this time.
0 commit comments