Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -96,25 +96,30 @@ def clean_llm_response(response: str) -> str:
|
|
| 96 |
|
| 97 |
def expand_snippet_area(full_html: str, snippet: str) -> str:
|
| 98 |
"""
|
| 99 |
-
Given the full HTML and a
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
The traversal stops
|
| 103 |
Returns the outer HTML of the found container.
|
| 104 |
"""
|
| 105 |
allowed_tags = {"div", "table", "iframe"}
|
| 106 |
soup = BeautifulSoup(full_html, "html.parser")
|
|
|
|
|
|
|
| 107 |
candidate = soup.find(lambda tag: snippet in tag.get_text() if tag.get_text() else False)
|
|
|
|
| 108 |
if candidate:
|
| 109 |
current = candidate
|
|
|
|
| 110 |
while current.parent is not None and current.parent.name.lower() != "body" and current.parent.name.lower() != "html":
|
| 111 |
if current.parent.name.lower() in allowed_tags:
|
| 112 |
current = current.parent
|
|
|
|
| 113 |
else:
|
| 114 |
-
|
| 115 |
-
return str(current)
|
| 116 |
else:
|
| 117 |
-
return snippet
|
| 118 |
|
| 119 |
|
| 120 |
def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_key: str, report_html: str,
|
|
|
|
| 96 |
|
| 97 |
def expand_snippet_area(full_html: str, snippet: str) -> str:
|
| 98 |
"""
|
| 99 |
+
Given the full HTML and a snippet of text, this function finds the element containing the snippet
|
| 100 |
+
and traverses upward through the DOM to identify the first container tag that matches the allowed list.
|
| 101 |
+
Allowed container tags include div, table, and iframe.
|
| 102 |
+
The traversal stops as soon as a parent in the allowed list is found, or before reaching <body>.
|
| 103 |
Returns the outer HTML of the found container.
|
| 104 |
"""
|
| 105 |
allowed_tags = {"div", "table", "iframe"}
|
| 106 |
soup = BeautifulSoup(full_html, "html.parser")
|
| 107 |
+
|
| 108 |
+
# Find the tag containing the snippet
|
| 109 |
candidate = soup.find(lambda tag: snippet in tag.get_text() if tag.get_text() else False)
|
| 110 |
+
|
| 111 |
if candidate:
|
| 112 |
current = candidate
|
| 113 |
+
# Traverse upward to find the first matching container in the allowed list
|
| 114 |
while current.parent is not None and current.parent.name.lower() != "body" and current.parent.name.lower() != "html":
|
| 115 |
if current.parent.name.lower() in allowed_tags:
|
| 116 |
current = current.parent
|
| 117 |
+
break # Stop as soon as a match is found
|
| 118 |
else:
|
| 119 |
+
current = current.parent
|
| 120 |
+
return str(current) # Return the outer HTML of the found container
|
| 121 |
else:
|
| 122 |
+
return snippet # If snippet not found, return the snippet itself
|
| 123 |
|
| 124 |
|
| 125 |
def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_key: str, report_html: str,
|