Skip to content

Commit 527dca8

Browse files
committed
func for remove unmatched tags
1 parent 3a8a049 commit 527dca8

1 file changed

Lines changed: 23 additions & 1 deletion

File tree

v2_utils.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,29 @@ def define_link_data(usernames):
2727
logging.info(f"{e}---define_link_data")
2828
return []
2929

30+
def remove_unmatched_tags(text):
31+
try:
32+
# Remove unmatched closing tags at the beginning of the string
33+
text = re.sub(r'^\s*</[^>]+>\s*', '', text)
34+
35+
# Regex pattern to find matched or unmatched tags
36+
pattern = re.compile(r'(<([^>]+)>.*?</\2>)|(<[^/][^>]*>.*)', re.DOTALL)
37+
matches = pattern.findall(text)
38+
39+
cleaned_text = ''
40+
for match in matches:
41+
if match[0]: # Full matched <tag>...</tag> pairs
42+
cleaned_text += match[0]
43+
elif match[2]: # Unmatched opening <tag> tags
44+
cleaned_text += match[2]
3045

46+
return cleaned_text
47+
except Exception as e:
48+
print(e)
49+
return text
50+
51+
52+
3153

3254
def week_data_formatter(html_content, type):
3355

@@ -46,7 +68,7 @@ def week_data_formatter(html_content, type):
4668
task_list_html = tasks_per_week[i] if i < len(tasks_per_week) else ""
4769
weekly_updates.append({
4870
'week': i + 1,
49-
'content': task_list_html.strip()
71+
'content': remove_unmatched_tags(task_list_html)
5072

5173
})
5274
return weekly_updates

0 commit comments

Comments
 (0)