Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -96,30 +96,46 @@ def clean_llm_response(response: str) -> str:
|
|
| 96 |
|
| 97 |
def expand_snippet_area(full_html: str, snippet: str) -> str:
|
| 98 |
"""
|
| 99 |
-
Given the full HTML and a snippet of text, this function finds the element
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
"""
|
| 105 |
-
|
|
|
|
| 106 |
soup = BeautifulSoup(full_html, "html.parser")
|
| 107 |
|
| 108 |
-
# Find the
|
| 109 |
-
candidate = soup.find(lambda tag:
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
else:
|
| 122 |
-
return
|
| 123 |
|
| 124 |
|
| 125 |
def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_key: str, report_html: str,
|
|
@@ -209,7 +225,7 @@ Only output valid JSON."""
|
|
| 209 |
You are a technical editor.
|
| 210 |
Given the following HTML container (with its outer tags) extracted from a larger report and based on the user adjustment request,
|
| 211 |
produce a corrected version by making only the necessary changes. Preserve inline citations, formatting, and context.
|
| 212 |
-
The updated version will put back precisely in the same location.
|
| 213 |
|
| 214 |
// Context
|
| 215 |
- Overall Report HTML:
|
|
|
|
| 96 |
|
| 97 |
def expand_snippet_area(full_html: str, snippet: str) -> str:
|
| 98 |
"""
|
| 99 |
+
Given the full HTML and a snippet of text, this function finds the element that contains the snippet.
|
| 100 |
+
It then uses an iterative while loop to traverse upward (from the immediate parent to the top)
|
| 101 |
+
until the highest level iframe is reached or (if no iframe is present) until a div or table is
|
| 102 |
+
encountered—the first allowed container (div or table) found is used. If neither an iframe nor
|
| 103 |
+
an allowed container is found, it returns the candidate element itself.
|
| 104 |
"""
|
| 105 |
+
# Define allowed container tags for the non-iframe case.
|
| 106 |
+
allowed_tags = {"div", "table"}
|
| 107 |
soup = BeautifulSoup(full_html, "html.parser")
|
| 108 |
|
| 109 |
+
# Find the candidate element that contains the snippet.
|
| 110 |
+
candidate = soup.find(lambda tag: tag.get_text() and snippet in tag.get_text())
|
| 111 |
+
if not candidate:
|
| 112 |
+
return snippet
|
| 113 |
+
|
| 114 |
+
iframe_candidate = None
|
| 115 |
+
allowed_candidate = None
|
| 116 |
+
|
| 117 |
+
# Start with the candidate's direct parent.
|
| 118 |
+
current = candidate.parent
|
| 119 |
+
while current is not None and current.name.lower() != "body":
|
| 120 |
+
tag_name = current.name.lower()
|
| 121 |
+
if tag_name == "iframe":
|
| 122 |
+
# Whenever we find an iframe, update the iframe candidate.
|
| 123 |
+
iframe_candidate = current
|
| 124 |
+
elif tag_name in allowed_tags and allowed_candidate is None:
|
| 125 |
+
# Record the first encountered allowed container if no iframe was encountered yet.
|
| 126 |
+
allowed_candidate = current
|
| 127 |
+
# Move upward.
|
| 128 |
+
current = current.parent
|
| 129 |
+
|
| 130 |
+
# If an iframe was encountered, return the outermost iframe.
|
| 131 |
+
if iframe_candidate is not None:
|
| 132 |
+
return str(iframe_candidate)
|
| 133 |
+
# Otherwise, if a div or table was found, return that.
|
| 134 |
+
elif allowed_candidate is not None:
|
| 135 |
+
return str(allowed_candidate)
|
| 136 |
+
# Otherwise, return the candidate element itself.
|
| 137 |
else:
|
| 138 |
+
return str(candidate)
|
| 139 |
|
| 140 |
|
| 141 |
def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_key: str, report_html: str,
|
|
|
|
| 225 |
You are a technical editor.
|
| 226 |
Given the following HTML container (with its outer tags) extracted from a larger report and based on the user adjustment request,
|
| 227 |
produce a corrected version by making only the necessary changes. Preserve inline citations, formatting, and context.
|
| 228 |
+
The updated version will put back precisely in the same location, the output should have the same outer tags.
|
| 229 |
|
| 230 |
// Context
|
| 231 |
- Overall Report HTML:
|