Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -162,52 +162,69 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
|
|
| 162 |
initial_request: str, qa: str, target_style: str, knowledge_crumbs: str,
|
| 163 |
complementary_guidance: str) -> (str, str):
|
| 164 |
"""
|
| 165 |
-
Fine-tunes an HTML report based on a user’s correction request.
|
| 166 |
|
| 167 |
-
|
| 168 |
-
1.
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
Parameters:
|
| 177 |
-
adjustment_request:
|
|
|
|
| 178 |
openai_api_key: OpenAI API Key.
|
| 179 |
serpapi_api_key: SERPAPI API Key.
|
| 180 |
-
report_html:
|
| 181 |
-
initial_request: The original research query
|
| 182 |
-
qa: Existing clarification Q&A.
|
| 183 |
-
target_style: The
|
| 184 |
-
knowledge_crumbs: Aggregated source/
|
| 185 |
-
complementary_guidance:
|
| 186 |
-
|
| 187 |
Returns:
|
| 188 |
-
A tuple (updated_report_html, updated_qa)
|
| 189 |
"""
|
| 190 |
import os
|
| 191 |
import json
|
| 192 |
import logging
|
| 193 |
from bs4 import BeautifulSoup
|
| 194 |
|
| 195 |
-
# Set API keys
|
| 196 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
| 197 |
os.environ["SERPAPI_API_KEY"] = serpapi_api_key
|
| 198 |
|
| 199 |
logging.info("fine_tune_report: Starting fine-tuning process based on the adjustment request.")
|
| 200 |
|
| 201 |
-
#
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
response_identify = openai_call(prompt=prompt_identify, model="o3-mini", max_tokens_param=1500, temperature=0)
|
| 210 |
logging.info(f"fine_tune_report: Raw snippet identification response: {response_identify}")
|
|
|
|
| 211 |
try:
|
| 212 |
response_identify = response_identify.strip().strip("```")
|
| 213 |
id_data = json.loads(response_identify)
|
|
@@ -216,66 +233,90 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
|
|
| 216 |
logging.error(f"fine_tune_report: Error parsing identified snippets JSON: {e}")
|
| 217 |
identified_snippets = []
|
| 218 |
|
| 219 |
-
# If no snippets were identified, log an error and fall back (optional: you may choose to return without changes).
|
| 220 |
if not identified_snippets:
|
| 221 |
-
logging.warning("fine_tune_report: No specific snippets were identified for adjustment. Returning original report.")
|
| 222 |
return report_html, qa
|
| 223 |
|
| 224 |
-
#
|
|
|
|
|
|
|
| 225 |
soup = BeautifulSoup(report_html, "html.parser")
|
| 226 |
updated_report_html = report_html
|
| 227 |
corrections_summary = []
|
| 228 |
|
| 229 |
for snippet in identified_snippets:
|
| 230 |
snippet = snippet.strip()
|
| 231 |
-
#
|
| 232 |
-
|
| 233 |
-
|
|
|
|
| 234 |
continue
|
| 235 |
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
response_adjust = openai_call(prompt=prompt_adjust, model="o3-mini", max_tokens_param=2000, temperature=0.0)
|
| 248 |
logging.info(f"fine_tune_report: Raw adjustment response: {response_adjust}")
|
|
|
|
| 249 |
try:
|
| 250 |
response_adjust = response_adjust.strip().strip("```")
|
| 251 |
adjust_data = json.loads(response_adjust)
|
| 252 |
-
|
| 253 |
snippet_summary = adjust_data.get("summary", "").strip()
|
| 254 |
except Exception as e:
|
| 255 |
logging.error(f"fine_tune_report: Error parsing snippet adjustment JSON: {e}")
|
| 256 |
continue
|
| 257 |
|
| 258 |
-
if not
|
| 259 |
-
logging.warning("fine_tune_report: No improved
|
| 260 |
continue
|
| 261 |
|
| 262 |
-
corrections_summary.append(f"
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
updated_refs = openai_call(prompt=prompt_refs, model="o3-mini", max_tokens_param=1000, temperature=0.5)
|
| 274 |
updated_refs = updated_refs.strip().strip("```")
|
| 275 |
|
| 276 |
if updated_refs:
|
| 277 |
soup_updated = BeautifulSoup(updated_report_html, "html.parser")
|
| 278 |
-
# Look for a heading that includes "Reference Summary Table"
|
| 279 |
ref_heading = soup_updated.find(lambda tag: tag.name in ["h1", "h2", "h3", "h4"] and "Reference Summary Table" in tag.get_text())
|
| 280 |
if ref_heading:
|
| 281 |
next_sibling = ref_heading.find_next_sibling()
|
|
@@ -287,14 +328,16 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
|
|
| 287 |
except Exception as e:
|
| 288 |
logging.error(f"fine_tune_report: Error replacing the reference table: {e}")
|
| 289 |
else:
|
| 290 |
-
logging.info("fine_tune_report: No sibling element found after
|
| 291 |
updated_report_html = str(soup_updated)
|
| 292 |
else:
|
| 293 |
-
logging.info("fine_tune_report: No
|
| 294 |
else:
|
| 295 |
-
logging.info("fine_tune_report:
|
| 296 |
|
| 297 |
-
#
|
|
|
|
|
|
|
| 298 |
global_summary = "Corrections Applied Based on User Request:\n" + "\n".join(corrections_summary)
|
| 299 |
updated_qa = qa.strip() + "\n----------\n" + global_summary
|
| 300 |
|
|
|
|
| 162 |
initial_request: str, qa: str, target_style: str, knowledge_crumbs: str,
|
| 163 |
complementary_guidance: str) -> (str, str):
|
| 164 |
"""
|
| 165 |
+
Fine-tunes an HTML report based on a user’s correction request by processing complete container elements.
|
| 166 |
|
| 167 |
+
Process Overview:
|
| 168 |
+
1. The function submits the full report HTML along with the user’s adjustment request to the LLM.
|
| 169 |
+
The prompt instructs the model to output a JSON object containing the minimal unique container(s)
|
| 170 |
+
(including their outer HTML—e.g. <iframe>, <div>, or <table>) that correspond to the content that needs
|
| 171 |
+
fixing.
|
| 172 |
+
2. The identified container snippet(s) are then located in the report using BeautifulSoup.
|
| 173 |
+
3. For each container, a second LLM call is made to generate a corrected version that integrates the user
|
| 174 |
+
instructions while maintaining context, citations, and overall style.
|
| 175 |
+
4. The old container markup is replaced by the corrected version directly in the BeautifulSoup object.
|
| 176 |
+
5. Finally, if new inline citations have been introduced (beyond those in the reference table), a final LLM
|
| 177 |
+
call updates the reference table.
|
| 178 |
+
6. A summary of the corrections is appended to the QA log.
|
| 179 |
|
| 180 |
Parameters:
|
| 181 |
+
adjustment_request: A string such as "the visual after 'xyz' is not displaying properly, please fix it" or
|
| 182 |
+
"the introduction should be more detailed, adjust it" etc.
|
| 183 |
openai_api_key: OpenAI API Key.
|
| 184 |
serpapi_api_key: SERPAPI API Key.
|
| 185 |
+
report_html: A string containing the full HTML report.
|
| 186 |
+
initial_request: The original research query or request.
|
| 187 |
+
qa: Existing clarification Q&A log.
|
| 188 |
+
target_style: The stylistic guidelines the report should follow.
|
| 189 |
+
knowledge_crumbs: Aggregated source/search result content.
|
| 190 |
+
complementary_guidance: Additional instructions.
|
| 191 |
+
|
| 192 |
Returns:
|
| 193 |
+
A tuple (updated_report_html, updated_qa) with the corrected report and updated QA log.
|
| 194 |
"""
|
| 195 |
import os
|
| 196 |
import json
|
| 197 |
import logging
|
| 198 |
from bs4 import BeautifulSoup
|
| 199 |
|
| 200 |
+
# Set API keys as environment variables for downstream calls.
|
| 201 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
| 202 |
os.environ["SERPAPI_API_KEY"] = serpapi_api_key
|
| 203 |
|
| 204 |
logging.info("fine_tune_report: Starting fine-tuning process based on the adjustment request.")
|
| 205 |
|
| 206 |
+
# ---------------------------------------------------------------
|
| 207 |
+
# Step 1: Identify container snippet(s) needing adjustment.
|
| 208 |
+
#
|
| 209 |
+
# The prompt instructs the LLM to scan the full report and output a JSON object
|
| 210 |
+
# with a key "identified_snippets" that contains complete HTML container elements
|
| 211 |
+
# (including their outer tags) that uniquely correspond to the section(s) which
|
| 212 |
+
# should be adjusted per the user request.
|
| 213 |
+
# ---------------------------------------------------------------
|
| 214 |
+
prompt_identify = (
|
| 215 |
+
f"You are a meticulous technical editor. Below is the full report HTML together with a "
|
| 216 |
+
f"user adjustment request. Identify the minimal, unique container(s) that capture the key content "
|
| 217 |
+
f"relevant to the user instruction. The containers may be complete HTML elements such as a <div>, <iframe>, "
|
| 218 |
+
f"<table>, etc. Output a JSON object with the key \"identified_snippets\" that maps to a list of these container "
|
| 219 |
+
f"HTML snippets ONLY (include the outer tags). No commentary or additional text should be present.\n\n"
|
| 220 |
+
f"Full Report HTML:\n{report_html}\n\n"
|
| 221 |
+
f"User Adjustment Request:\n{adjustment_request}\n\n"
|
| 222 |
+
f"Only output valid JSON."
|
| 223 |
+
)
|
| 224 |
|
| 225 |
response_identify = openai_call(prompt=prompt_identify, model="o3-mini", max_tokens_param=1500, temperature=0)
|
| 226 |
logging.info(f"fine_tune_report: Raw snippet identification response: {response_identify}")
|
| 227 |
+
|
| 228 |
try:
|
| 229 |
response_identify = response_identify.strip().strip("```")
|
| 230 |
id_data = json.loads(response_identify)
|
|
|
|
| 233 |
logging.error(f"fine_tune_report: Error parsing identified snippets JSON: {e}")
|
| 234 |
identified_snippets = []
|
| 235 |
|
|
|
|
| 236 |
if not identified_snippets:
|
| 237 |
+
logging.warning("fine_tune_report: No specific container snippets were identified for adjustment. Returning original report.")
|
| 238 |
return report_html, qa
|
| 239 |
|
| 240 |
+
# ---------------------------------------------------------------
|
| 241 |
+
# Step 2: For each identified container snippet, find it in the report.
|
| 242 |
+
# ---------------------------------------------------------------
|
| 243 |
soup = BeautifulSoup(report_html, "html.parser")
|
| 244 |
updated_report_html = report_html
|
| 245 |
corrections_summary = []
|
| 246 |
|
| 247 |
for snippet in identified_snippets:
|
| 248 |
snippet = snippet.strip()
|
| 249 |
+
# Use BeautifulSoup to search for a tag whose complete outer HTML contains the snippet.
|
| 250 |
+
candidate = soup.find(lambda tag: snippet in str(tag))
|
| 251 |
+
if not candidate:
|
| 252 |
+
logging.warning(f"fine_tune_report: The snippet could not be uniquely located in the report:\n{snippet}")
|
| 253 |
continue
|
| 254 |
|
| 255 |
+
original_container_html = str(candidate)
|
| 256 |
+
logging.info("fine_tune_report: Found container snippet for adjustment.")
|
| 257 |
+
|
| 258 |
+
# ---------------------------------------------------------------
|
| 259 |
+
# Step 3: Send a prompt to the LLM to correct this container.
|
| 260 |
+
#
|
| 261 |
+
# Here the LLM is given the entire current container (the extracted full HTML)
|
| 262 |
+
# and the full report context (and other guidance) and is asked to produce a corrected
|
| 263 |
+
# version that applies the adjustment request.
|
| 264 |
+
# ---------------------------------------------------------------
|
| 265 |
+
prompt_adjust = (
|
| 266 |
+
f"You are a technical editor. Given the following HTML container (with its outer tags) "
|
| 267 |
+
f"extracted from a larger report and based on the user adjustment request, produce a corrected "
|
| 268 |
+
f"version by making only the changes required. Preserve existing inline citations, formatting, and contextual details. "
|
| 269 |
+
f"Ensure the updated content remains consistent with the overall report style. Output your answer as a JSON object "
|
| 270 |
+
f"with exactly two keys: \"improved\" (the corrected container's full HTML) and \"summary\" (a brief explanation of the changes applied).\n\n"
|
| 271 |
+
f"Overall Report HTML:\n{report_html}\n\n"
|
| 272 |
+
f"Original Container to Adjust:\n{original_container_html}\n\n"
|
| 273 |
+
f"User Adjustment Request:\n{adjustment_request}\n\n"
|
| 274 |
+
f"Additional Guidance:\nTarget Style: {target_style}\nKnowledge Crumbs: {knowledge_crumbs}\nComplementary Guidance: {complementary_guidance}\n\n"
|
| 275 |
+
f"Only output valid JSON."
|
| 276 |
+
)
|
| 277 |
+
|
| 278 |
response_adjust = openai_call(prompt=prompt_adjust, model="o3-mini", max_tokens_param=2000, temperature=0.0)
|
| 279 |
logging.info(f"fine_tune_report: Raw adjustment response: {response_adjust}")
|
| 280 |
+
|
| 281 |
try:
|
| 282 |
response_adjust = response_adjust.strip().strip("```")
|
| 283 |
adjust_data = json.loads(response_adjust)
|
| 284 |
+
corrected_container = adjust_data.get("improved", "").strip()
|
| 285 |
snippet_summary = adjust_data.get("summary", "").strip()
|
| 286 |
except Exception as e:
|
| 287 |
logging.error(f"fine_tune_report: Error parsing snippet adjustment JSON: {e}")
|
| 288 |
continue
|
| 289 |
|
| 290 |
+
if not corrected_container:
|
| 291 |
+
logging.warning("fine_tune_report: No improved container was returned by the LLM; skipping this snippet.")
|
| 292 |
continue
|
| 293 |
|
| 294 |
+
corrections_summary.append(f"Container corrected: {snippet_summary}")
|
| 295 |
+
|
| 296 |
+
# ---------------------------------------------------------------
|
| 297 |
+
# Step 4: Replace the original container in the BeautifulSoup object.
|
| 298 |
+
# ---------------------------------------------------------------
|
| 299 |
+
candidate.replace_with(BeautifulSoup(corrected_container, "html.parser"))
|
| 300 |
+
logging.info("fine_tune_report: Container snippet replaced.")
|
| 301 |
+
|
| 302 |
+
# Get the updated report HTML from the modified soup.
|
| 303 |
+
updated_report_html = str(soup)
|
| 304 |
+
|
| 305 |
+
# ---------------------------------------------------------------
|
| 306 |
+
# Step 5: Update the reference table if any new inline citations exist.
|
| 307 |
+
# ---------------------------------------------------------------
|
| 308 |
+
prompt_refs = (
|
| 309 |
+
f"You are a technical editor. Review the following updated report HTML. "
|
| 310 |
+
f"If there are any new inline citations (e.g., [x]) not present in the original reference table, "
|
| 311 |
+
f"generate an updated Reference Summary Table as valid HTML containing all references. Output only the HTML code for the updated reference table with no commentary.\n\n"
|
| 312 |
+
f"Updated Report HTML:\n{updated_report_html}"
|
| 313 |
+
)
|
| 314 |
updated_refs = openai_call(prompt=prompt_refs, model="o3-mini", max_tokens_param=1000, temperature=0.5)
|
| 315 |
updated_refs = updated_refs.strip().strip("```")
|
| 316 |
|
| 317 |
if updated_refs:
|
| 318 |
soup_updated = BeautifulSoup(updated_report_html, "html.parser")
|
| 319 |
+
# Look for a heading that includes something like "Reference Summary Table"
|
| 320 |
ref_heading = soup_updated.find(lambda tag: tag.name in ["h1", "h2", "h3", "h4"] and "Reference Summary Table" in tag.get_text())
|
| 321 |
if ref_heading:
|
| 322 |
next_sibling = ref_heading.find_next_sibling()
|
|
|
|
| 328 |
except Exception as e:
|
| 329 |
logging.error(f"fine_tune_report: Error replacing the reference table: {e}")
|
| 330 |
else:
|
| 331 |
+
logging.info("fine_tune_report: No sibling element found after reference heading; skipping reference update.")
|
| 332 |
updated_report_html = str(soup_updated)
|
| 333 |
else:
|
| 334 |
+
logging.info("fine_tune_report: No reference table heading found; reference update skipped.")
|
| 335 |
else:
|
| 336 |
+
logging.info("fine_tune_report: No updated reference table returned; leaving references unchanged.")
|
| 337 |
|
| 338 |
+
# ---------------------------------------------------------------
|
| 339 |
+
# Step 6: Append a summary of corrections to the existing QA log.
|
| 340 |
+
# ---------------------------------------------------------------
|
| 341 |
global_summary = "Corrections Applied Based on User Request:\n" + "\n".join(corrections_summary)
|
| 342 |
updated_qa = qa.strip() + "\n----------\n" + global_summary
|
| 343 |
|