Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -88,10 +88,25 @@ Your Answer:"""
|
|
| 88 |
updated_history = chat_history + [[user_message, answer]]
|
| 89 |
return updated_history, ""
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
def expand_snippet_area(full_html: str, snippet: str) -> str:
|
| 93 |
"""
|
| 94 |
-
Given the full HTML and a small snippet (e.g., containing a keyword),
|
| 95 |
find the element in which that snippet appears and traverse upward through the DOM
|
| 96 |
until a larger container is reached. Allowed container tags include div, table, iframe, and section.
|
| 97 |
The traversal stops when the parent is no longer in the allowed list or before reaching <body>.
|
|
@@ -162,51 +177,24 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
|
|
| 162 |
Fine-tunes an HTML report based on a user’s correction request by processing complete container elements.
|
| 163 |
|
| 164 |
Process Overview:
|
| 165 |
-
1.
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
and the user adjustment request, and outputs a corrected version.
|
| 172 |
-
4. The code then replaces the original container with the updated version in the BeautifulSoup object.
|
| 173 |
-
5. If new inline citations have been introduced (beyond those in the reference table), a final LLM call updates
|
| 174 |
-
the reference table.
|
| 175 |
-
6. A summary of all corrections is appended to the QA log.
|
| 176 |
-
|
| 177 |
-
Parameters:
|
| 178 |
-
adjustment_request: A string such as "the visual after 'xyz' is not displaying properly, please fix it" or
|
| 179 |
-
"the introduction should be more detailed, adjust it" etc.
|
| 180 |
-
openai_api_key: OpenAI API Key.
|
| 181 |
-
serpapi_api_key: SERPAPI API Key.
|
| 182 |
-
report_html: A string containing the full HTML report.
|
| 183 |
-
initial_request: The original research query or request.
|
| 184 |
-
qa: Existing clarification Q&A log.
|
| 185 |
-
target_style: The stylistic guidelines the report should follow.
|
| 186 |
-
knowledge_crumbs: Aggregated source or search result content.
|
| 187 |
-
complementary_guidance: Additional instructions.
|
| 188 |
-
|
| 189 |
-
Returns:
|
| 190 |
-
A tuple (updated_report_html, updated_qa) with the corrected report and updated QA log.
|
| 191 |
"""
|
| 192 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
| 193 |
os.environ["SERPAPI_API_KEY"] = serpapi_api_key
|
| 194 |
|
| 195 |
logging.info("fine_tune_report: Starting fine-tuning process based on the adjustment request.")
|
| 196 |
|
| 197 |
-
#
|
| 198 |
-
# Step 1: Identify unique string(s) that are representative of the targeted area.
|
| 199 |
-
#
|
| 200 |
-
# The prompt now asks the LLM to extract one or more unique plain text strings (without HTML)
|
| 201 |
-
# that appear in the targeted area(s) identified by the user adjustment request. These strings
|
| 202 |
-
# will be used to locate the corresponding container elements.
|
| 203 |
-
# ---------------------------------------------------------------
|
| 204 |
prompt_identify = (
|
| 205 |
f"You are a meticulous technical editor. Below is the full report HTML together with a "
|
| 206 |
-
f"user adjustment request.
|
| 207 |
-
f"that
|
| 208 |
-
f"
|
| 209 |
-
f"as they appear in the report so that they can be used to accurately locate the relevant section(s).\n\n"
|
| 210 |
f"Full Report HTML:\n{report_html}\n\n"
|
| 211 |
f"User Adjustment Request:\n{adjustment_request}\n\n"
|
| 212 |
f"Only output valid JSON."
|
|
@@ -216,7 +204,7 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
|
|
| 216 |
logging.info(f"fine_tune_report: Raw unique string identification response: {response_identify}")
|
| 217 |
|
| 218 |
try:
|
| 219 |
-
response_identify =
|
| 220 |
id_data = json.loads(response_identify)
|
| 221 |
unique_strings = id_data.get("identified_unique_strings", [])
|
| 222 |
except Exception as e:
|
|
@@ -227,39 +215,30 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
|
|
| 227 |
logging.warning("fine_tune_report: No unique strings were identified for adjustment. Returning original report.")
|
| 228 |
return report_html, qa
|
| 229 |
|
| 230 |
-
#
|
| 231 |
-
# Step 2: For each unique string, locate its corresponding container.
|
| 232 |
-
# ---------------------------------------------------------------
|
| 233 |
soup = BeautifulSoup(report_html, "html.parser")
|
| 234 |
corrections_summary = []
|
| 235 |
for uniq_str in unique_strings:
|
| 236 |
uniq_str = uniq_str.strip()
|
| 237 |
-
# Use expand_snippet_area to get the full container outer HTML that encloses the unique text.
|
| 238 |
container_html = expand_snippet_area(report_html, uniq_str)
|
| 239 |
if not container_html:
|
| 240 |
logging.warning(f"fine_tune_report: Could not locate a container for unique string: {uniq_str}")
|
| 241 |
continue
|
| 242 |
-
# Now, search the soup for a tag that includes this container HTML.
|
| 243 |
candidate = soup.find(lambda tag: container_html in str(tag))
|
| 244 |
if not candidate:
|
| 245 |
-
logging.warning(f"fine_tune_report: The container for
|
| 246 |
continue
|
| 247 |
|
| 248 |
original_container_html = str(candidate)
|
| 249 |
logging.info("fine_tune_report: Found container for unique string adjustment.")
|
| 250 |
|
| 251 |
-
# ---------------------------------------------------------------
|
| 252 |
# Step 3: Call the LLM to adjust this container.
|
| 253 |
-
#
|
| 254 |
-
# Pass the entire container HTML, the full report context, and the adjustment request.
|
| 255 |
-
# The LLM should output a JSON object with the keys "improved" and "summary".
|
| 256 |
-
# ---------------------------------------------------------------
|
| 257 |
prompt_adjust = (
|
| 258 |
f"You are a technical editor. Given the following HTML container (with its outer tags) extracted "
|
| 259 |
f"from a larger report and based on the user adjustment request, produce a corrected version by making "
|
| 260 |
-
f"only the necessary changes. Preserve
|
| 261 |
-
f"remains consistent with the overall report style. Output
|
| 262 |
-
f"\"improved\" (the corrected container's full HTML) and \"summary\" (a brief explanation of the changes
|
| 263 |
f"Overall Report HTML:\n{report_html}\n\n"
|
| 264 |
f"Original Container to Adjust:\n{original_container_html}\n\n"
|
| 265 |
f"User Adjustment Request:\n{adjustment_request}\n\n"
|
|
@@ -269,9 +248,8 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
|
|
| 269 |
|
| 270 |
response_adjust = openai_call(prompt=prompt_adjust, model="o3-mini", max_tokens_param=2000, temperature=0.0)
|
| 271 |
logging.info(f"fine_tune_report: Raw container adjustment response: {response_adjust}")
|
| 272 |
-
|
| 273 |
try:
|
| 274 |
-
response_adjust =
|
| 275 |
adjust_data = json.loads(response_adjust)
|
| 276 |
corrected_container = adjust_data.get("improved", "").strip()
|
| 277 |
container_summary = adjust_data.get("summary", "").strip()
|
|
@@ -280,31 +258,25 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
|
|
| 280 |
continue
|
| 281 |
|
| 282 |
if not corrected_container:
|
| 283 |
-
logging.warning("fine_tune_report: No improved container was
|
| 284 |
continue
|
| 285 |
|
| 286 |
corrections_summary.append(f"Container corrected: {container_summary}")
|
| 287 |
-
|
| 288 |
-
# ---------------------------------------------------------------
|
| 289 |
-
# Step 4: Replace the original container with the corrected container in the BeautifulSoup object.
|
| 290 |
-
# ---------------------------------------------------------------
|
| 291 |
candidate.replace_with(BeautifulSoup(corrected_container, "html.parser"))
|
| 292 |
-
logging.info("fine_tune_report: Updated container re-injected
|
| 293 |
|
| 294 |
-
# Get the updated report HTML from the modified soup.
|
| 295 |
updated_report_html = str(soup)
|
| 296 |
|
| 297 |
-
#
|
| 298 |
-
# Step 5: (Optional) Update the reference table if new inline citations exist.
|
| 299 |
-
# ---------------------------------------------------------------
|
| 300 |
prompt_refs = (
|
| 301 |
f"You are a technical editor. Review the following updated report HTML. "
|
| 302 |
-
f"If
|
| 303 |
-
f"generate an updated Reference Summary Table as valid HTML
|
| 304 |
f"Updated Report HTML:\n{updated_report_html}"
|
| 305 |
)
|
| 306 |
updated_refs = openai_call(prompt=prompt_refs, model="o3-mini", max_tokens_param=1000, temperature=0.5)
|
| 307 |
-
updated_refs =
|
| 308 |
|
| 309 |
if updated_refs:
|
| 310 |
soup_updated = BeautifulSoup(updated_report_html, "html.parser")
|
|
@@ -317,22 +289,20 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
|
|
| 317 |
next_sibling.replace_with(new_ref_html)
|
| 318 |
logging.info("fine_tune_report: Reference table updated successfully.")
|
| 319 |
except Exception as e:
|
| 320 |
-
logging.error(f"fine_tune_report: Error
|
| 321 |
else:
|
| 322 |
-
logging.info("fine_tune_report: No sibling
|
| 323 |
updated_report_html = str(soup_updated)
|
| 324 |
else:
|
| 325 |
-
logging.info("fine_tune_report: No reference table heading found; reference update
|
| 326 |
else:
|
| 327 |
logging.info("fine_tune_report: No updated reference table returned; leaving references unchanged.")
|
| 328 |
|
| 329 |
-
#
|
| 330 |
-
# Step 6: Append a summary of corrections to the existing QA log.
|
| 331 |
-
# ---------------------------------------------------------------
|
| 332 |
global_summary = "Corrections Applied Based on User Request:\n" + "\n".join(corrections_summary)
|
| 333 |
updated_qa = qa.strip() + "\n----------\n" + global_summary
|
| 334 |
|
| 335 |
-
logging.info("fine_tune_report: Fine-tuning
|
| 336 |
return updated_report_html, updated_qa
|
| 337 |
|
| 338 |
def suggest_improvements(report_html: str, openai_api_key: str, serpapi_api_key: str) -> str:
|
|
|
|
| 88 |
updated_history = chat_history + [[user_message, answer]]
|
| 89 |
return updated_history, ""
|
| 90 |
|
| 91 |
+
def clean_llm_response(response: str) -> str:
|
| 92 |
+
"""
|
| 93 |
+
Clean the raw LLM response by removing code fences and replacing newline characters
|
| 94 |
+
with spaces so that the resulting string is valid JSON.
|
| 95 |
+
"""
|
| 96 |
+
# Remove any leading/trailing whitespace and code fence markers
|
| 97 |
+
cleaned = response.strip()
|
| 98 |
+
if cleaned.startswith("```"):
|
| 99 |
+
cleaned = cleaned.lstrip("```")
|
| 100 |
+
if cleaned.endswith("```"):
|
| 101 |
+
cleaned = cleaned.rstrip("```")
|
| 102 |
+
# Remove newline characters (replace with space) and collapse multiple spaces
|
| 103 |
+
cleaned = cleaned.replace("\n", " ")
|
| 104 |
+
cleaned = re.sub(r'\s+', ' ', cleaned)
|
| 105 |
+
return cleaned.strip()
|
| 106 |
|
| 107 |
def expand_snippet_area(full_html: str, snippet: str) -> str:
|
| 108 |
"""
|
| 109 |
+
Given the full HTML and a small snippet (e.g., containing a keyword such as "abc"),
|
| 110 |
find the element in which that snippet appears and traverse upward through the DOM
|
| 111 |
until a larger container is reached. Allowed container tags include div, table, iframe, and section.
|
| 112 |
The traversal stops when the parent is no longer in the allowed list or before reaching <body>.
|
|
|
|
| 177 |
Fine-tunes an HTML report based on a user’s correction request by processing complete container elements.
|
| 178 |
|
| 179 |
Process Overview:
|
| 180 |
+
1. Submits full report HTML and the user adjustment request to the LLM and gets back one or more unique plain text strings.
|
| 181 |
+
2. For each unique string, uses BeautifulSoup (and expand_snippet_area) to retrieve the outer container (<div>, <table>, or <iframe>).
|
| 182 |
+
3. For each container, calls the LLM (with full report context, crumbs, and adjustment request) to output a corrected version.
|
| 183 |
+
4. Replaces the original container with the corrected version.
|
| 184 |
+
5. Optionally updates the reference table if new inline citations appear.
|
| 185 |
+
6. Appends a corrections summary to the QA log.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
"""
|
| 187 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
| 188 |
os.environ["SERPAPI_API_KEY"] = serpapi_api_key
|
| 189 |
|
| 190 |
logging.info("fine_tune_report: Starting fine-tuning process based on the adjustment request.")
|
| 191 |
|
| 192 |
+
# Step 1: Identify unique plain text string(s) that pinpoint the targeted area.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
prompt_identify = (
|
| 194 |
f"You are a meticulous technical editor. Below is the full report HTML together with a "
|
| 195 |
+
f"user adjustment request. Extract one or more unique plain-text string(s) (without any HTML tags or formatting) "
|
| 196 |
+
f"that uniquely appear in the area targeted by the adjustment request. Output them in a JSON object with the key "
|
| 197 |
+
f"\"identified_unique_strings\" mapped to a list of strings.\n\n"
|
|
|
|
| 198 |
f"Full Report HTML:\n{report_html}\n\n"
|
| 199 |
f"User Adjustment Request:\n{adjustment_request}\n\n"
|
| 200 |
f"Only output valid JSON."
|
|
|
|
| 204 |
logging.info(f"fine_tune_report: Raw unique string identification response: {response_identify}")
|
| 205 |
|
| 206 |
try:
|
| 207 |
+
response_identify = clean_llm_response(response_identify)
|
| 208 |
id_data = json.loads(response_identify)
|
| 209 |
unique_strings = id_data.get("identified_unique_strings", [])
|
| 210 |
except Exception as e:
|
|
|
|
| 215 |
logging.warning("fine_tune_report: No unique strings were identified for adjustment. Returning original report.")
|
| 216 |
return report_html, qa
|
| 217 |
|
| 218 |
+
# Step 2: For each unique string, locate the corresponding outer container.
|
|
|
|
|
|
|
| 219 |
soup = BeautifulSoup(report_html, "html.parser")
|
| 220 |
corrections_summary = []
|
| 221 |
for uniq_str in unique_strings:
|
| 222 |
uniq_str = uniq_str.strip()
|
|
|
|
| 223 |
container_html = expand_snippet_area(report_html, uniq_str)
|
| 224 |
if not container_html:
|
| 225 |
logging.warning(f"fine_tune_report: Could not locate a container for unique string: {uniq_str}")
|
| 226 |
continue
|
|
|
|
| 227 |
candidate = soup.find(lambda tag: container_html in str(tag))
|
| 228 |
if not candidate:
|
| 229 |
+
logging.warning(f"fine_tune_report: The container for unique string was not found: {uniq_str}")
|
| 230 |
continue
|
| 231 |
|
| 232 |
original_container_html = str(candidate)
|
| 233 |
logging.info("fine_tune_report: Found container for unique string adjustment.")
|
| 234 |
|
|
|
|
| 235 |
# Step 3: Call the LLM to adjust this container.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
prompt_adjust = (
|
| 237 |
f"You are a technical editor. Given the following HTML container (with its outer tags) extracted "
|
| 238 |
f"from a larger report and based on the user adjustment request, produce a corrected version by making "
|
| 239 |
+
f"only the necessary changes. Preserve inline citations, formatting, and context. Ensure the updated content "
|
| 240 |
+
f"remains consistent with the overall report style. Output a JSON object with two keys: "
|
| 241 |
+
f"\"improved\" (the corrected container's full HTML) and \"summary\" (a brief explanation of the changes).\n\n"
|
| 242 |
f"Overall Report HTML:\n{report_html}\n\n"
|
| 243 |
f"Original Container to Adjust:\n{original_container_html}\n\n"
|
| 244 |
f"User Adjustment Request:\n{adjustment_request}\n\n"
|
|
|
|
| 248 |
|
| 249 |
response_adjust = openai_call(prompt=prompt_adjust, model="o3-mini", max_tokens_param=2000, temperature=0.0)
|
| 250 |
logging.info(f"fine_tune_report: Raw container adjustment response: {response_adjust}")
|
|
|
|
| 251 |
try:
|
| 252 |
+
response_adjust = clean_llm_response(response_adjust)
|
| 253 |
adjust_data = json.loads(response_adjust)
|
| 254 |
corrected_container = adjust_data.get("improved", "").strip()
|
| 255 |
container_summary = adjust_data.get("summary", "").strip()
|
|
|
|
| 258 |
continue
|
| 259 |
|
| 260 |
if not corrected_container:
|
| 261 |
+
logging.warning("fine_tune_report: No improved container was generated; skipping this container.")
|
| 262 |
continue
|
| 263 |
|
| 264 |
corrections_summary.append(f"Container corrected: {container_summary}")
|
| 265 |
+
# Step 4: Replace the original container with the updated container.
|
|
|
|
|
|
|
|
|
|
| 266 |
candidate.replace_with(BeautifulSoup(corrected_container, "html.parser"))
|
| 267 |
+
logging.info("fine_tune_report: Updated container re-injected.")
|
| 268 |
|
|
|
|
| 269 |
updated_report_html = str(soup)
|
| 270 |
|
| 271 |
+
# Step 5: (Optional) Update reference table if needed.
|
|
|
|
|
|
|
| 272 |
prompt_refs = (
|
| 273 |
f"You are a technical editor. Review the following updated report HTML. "
|
| 274 |
+
f"If any new inline citations (e.g., [x]) have been introduced that are not in the original reference table, "
|
| 275 |
+
f"generate an updated Reference Summary Table as valid HTML. Output only the updated reference table HTML with no commentary.\n\n"
|
| 276 |
f"Updated Report HTML:\n{updated_report_html}"
|
| 277 |
)
|
| 278 |
updated_refs = openai_call(prompt=prompt_refs, model="o3-mini", max_tokens_param=1000, temperature=0.5)
|
| 279 |
+
updated_refs = clean_llm_response(updated_refs)
|
| 280 |
|
| 281 |
if updated_refs:
|
| 282 |
soup_updated = BeautifulSoup(updated_report_html, "html.parser")
|
|
|
|
| 289 |
next_sibling.replace_with(new_ref_html)
|
| 290 |
logging.info("fine_tune_report: Reference table updated successfully.")
|
| 291 |
except Exception as e:
|
| 292 |
+
logging.error(f"fine_tune_report: Error updating reference table: {e}")
|
| 293 |
else:
|
| 294 |
+
logging.info("fine_tune_report: No sibling after reference heading; skipping update.")
|
| 295 |
updated_report_html = str(soup_updated)
|
| 296 |
else:
|
| 297 |
+
logging.info("fine_tune_report: No reference table heading found; skipping reference update.")
|
| 298 |
else:
|
| 299 |
logging.info("fine_tune_report: No updated reference table returned; leaving references unchanged.")
|
| 300 |
|
| 301 |
+
# Step 6: Append corrections summary to the QA log.
|
|
|
|
|
|
|
| 302 |
global_summary = "Corrections Applied Based on User Request:\n" + "\n".join(corrections_summary)
|
| 303 |
updated_qa = qa.strip() + "\n----------\n" + global_summary
|
| 304 |
|
| 305 |
+
logging.info("fine_tune_report: Fine-tuning complete.")
|
| 306 |
return updated_report_html, updated_qa
|
| 307 |
|
| 308 |
def suggest_improvements(report_html: str, openai_api_key: str, serpapi_api_key: str) -> str:
|