Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -451,22 +451,41 @@ def remove_text_from_html(report_html: str, text_to_remove: str) -> str:
|
|
| 451 |
child.replace_with(new_text)
|
| 452 |
return str(soup)
|
| 453 |
|
| 454 |
-
def fix_visual_after_section(report_html: str, section_title: str,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
soup = BeautifulSoup(report_html, "html.parser")
|
| 456 |
-
|
| 457 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
if header:
|
| 459 |
-
#
|
| 460 |
-
iframe = header.
|
| 461 |
if iframe and iframe.has_attr("srcdoc"):
|
| 462 |
current_code = iframe["srcdoc"]
|
| 463 |
-
prompt =
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
return str(soup)
|
| 471 |
|
| 472 |
def snippet_in_tag(tag: Tag, snippet: str) -> bool:
|
|
@@ -674,38 +693,36 @@ Only output valid JSON with no comments or code fences."""
|
|
| 674 |
|
| 675 |
updated_report_html = str(soup)
|
| 676 |
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
logging.
|
|
|
|
| 702 |
else:
|
| 703 |
-
logging.info("fine_tune_report: No
|
| 704 |
-
updated_report_html = str(soup_updated)
|
| 705 |
else:
|
| 706 |
-
logging.info("fine_tune_report: No reference table
|
| 707 |
-
else:
|
| 708 |
-
logging.info("fine_tune_report: No updated reference table returned; leaving unchanged.")
|
| 709 |
|
| 710 |
global_summary = "Corrections Applied Based on User Request:\n" + "\n".join(corrections_summary)
|
| 711 |
updated_qa = qa.strip() + "\n----------\n" + global_summary
|
|
@@ -754,18 +771,16 @@ def improve_report_from_chat(user_message: str, chat_history: list, report_text:
|
|
| 754 |
adjustment_request = user_message.replace("@improve", "").strip()
|
| 755 |
|
| 756 |
# --- CASE 1: Removal request ---
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
text_to_remove = text_removal_match.group(1).strip()
|
| 761 |
updated_report = remove_text_from_html(report_text, text_to_remove)
|
| 762 |
answer = f"Removed text: '{text_to_remove}' from the report."
|
| 763 |
chat_history.append([user_message, answer])
|
| 764 |
return chat_history, "", updated_report
|
| 765 |
|
| 766 |
-
# --- CASE 2:
|
| 767 |
-
|
| 768 |
-
visual_fix_match = re.search(r"fix visual after section\s+([^\:]+)(?::\s*(.*))?", adjustment_request, re.I)
|
| 769 |
if visual_fix_match:
|
| 770 |
section_name = visual_fix_match.group(1).strip()
|
| 771 |
extra_instructions = visual_fix_match.group(2).strip() if visual_fix_match.group(2) else ""
|
|
@@ -774,7 +789,7 @@ def improve_report_from_chat(user_message: str, chat_history: list, report_text:
|
|
| 774 |
chat_history.append([user_message, answer])
|
| 775 |
return chat_history, "", updated_report
|
| 776 |
|
| 777 |
-
# --- DEFAULT: Proceed with
|
| 778 |
updated_report, _ = fine_tune_report(
|
| 779 |
adjustment_request,
|
| 780 |
os.getenv("OPENAI_API_KEY"),
|
|
@@ -938,7 +953,7 @@ You are a technical editor.
|
|
| 938 |
|
| 939 |
Review the following expanded report HTML.
|
| 940 |
If any new inline citations (e.g., [x]) have been introduced that are not in the original reference table,
|
| 941 |
-
generate an updated
|
| 942 |
Updated Report HTML:\n{updated_report_html}"""
|
| 943 |
)
|
| 944 |
updated_refs = llm_call(prompt=prompt_refs, model="o3-mini", temperature=0, max_tokens_param=1000)
|
|
@@ -946,7 +961,7 @@ Updated Report HTML:\n{updated_report_html}"""
|
|
| 946 |
|
| 947 |
if updated_refs:
|
| 948 |
soup_updated = BeautifulSoup(updated_report_html, "html.parser")
|
| 949 |
-
ref_heading = soup_updated.find(lambda tag: tag.name in ["h1", "h2", "h3", "h4"] and "
|
| 950 |
if ref_heading:
|
| 951 |
next_sibling = ref_heading.find_next_sibling()
|
| 952 |
if next_sibling:
|
|
@@ -2240,9 +2255,9 @@ The report must follow this writing style: {reportstyle}.
|
|
| 2240 |
--------------- Citations -----------
|
| 2241 |
- The report must include inline citations (e.g., [1], [2], etc.) from real sources provided in the search results below - be selective, don't put it at every sentence or every paragraph.
|
| 2242 |
Note: citations sources in-line need to be in this format: blablabla - Source [x] / "pdf" is not a source, provide the title or author
|
| 2243 |
-
- The name of the reference table should be: "
|
| 2244 |
- The reference table at the end containing the citations details should have 4 columns: the ref number, the title of the document, the author(s, the URL - with hyperlink)
|
| 2245 |
-
- The report MUST include a
|
| 2246 |
- For the reference citations, add systematically the urls from the Learnings (no need to put them in numbered list format since we alredy have the [x] that serves as number list)
|
| 2247 |
- Do not add any inline citations reference in the visual and graph placeholders descriptions belo, you can add them in focus though.
|
| 2248 |
- Do not make false references / citations. It has to be grounded from the sources in the rsearch results / crumbs below (no example.com/... type references!)
|
|
@@ -2401,7 +2416,7 @@ Use the following report structure with consistency:
|
|
| 2401 |
- Introduction
|
| 2402 |
- [Sections and sub-sections, depending on the size and relevant topic - including visual, graph and focus placeholders]
|
| 2403 |
- Conclusion
|
| 2404 |
-
- References
|
| 2405 |
- Report ending formatting (as mentioned before)
|
| 2406 |
|
| 2407 |
{{Do not add anything after - no conclusive meta comment or content}}
|
|
|
|
| 451 |
child.replace_with(new_text)
|
| 452 |
return str(soup)
|
| 453 |
|
| 454 |
+
def fix_visual_after_section(report_html: str, section_title: str, extra_instructions: str) -> str:
|
| 455 |
+
"""
|
| 456 |
+
Given a report HTML (as a string), the target section name (from inside [[ ]]) and any extra instructions,
|
| 457 |
+
this function finds the first header (preferably an <h1>) that contains the section name (ignoring ones such as "Table of Contents"),
|
| 458 |
+
then finds the first <iframe> after that header, sends its srcdoc to the LLM for improvement, and reinjects the improved code.
|
| 459 |
+
"""
|
| 460 |
soup = BeautifulSoup(report_html, "html.parser")
|
| 461 |
+
header = None
|
| 462 |
+
# Look for header tags (h1–h4) that contain the target, excluding common TOC headings
|
| 463 |
+
for tag in soup.find_all(["h1", "h2", "h3", "h4"]):
|
| 464 |
+
text = tag.get_text(strip=True)
|
| 465 |
+
if section_title.lower() in text.lower() and "table of contents" not in text.lower():
|
| 466 |
+
header = tag
|
| 467 |
+
break
|
| 468 |
if header:
|
| 469 |
+
# Look for the first <iframe> after that header
|
| 470 |
+
iframe = header.find_next("iframe")
|
| 471 |
if iframe and iframe.has_attr("srcdoc"):
|
| 472 |
current_code = iframe["srcdoc"]
|
| 473 |
+
prompt = (
|
| 474 |
+
f"Improve the following visual code by simplifying its formatting (e.g., adjust parenthesis, remove extra styling) "
|
| 475 |
+
f"based on these extra instructions: {extra_instructions}\n\n"
|
| 476 |
+
f"Current code:\n{current_code}\n\n"
|
| 477 |
+
"Return only the improved code (no extra commentary)."
|
| 478 |
+
)
|
| 479 |
+
improved_code = llm_call(prompt=prompt, model="o3-mini", temperature=0, max_tokens_param=1500).strip()
|
| 480 |
+
# Check that the improvement is valid and not an error message.
|
| 481 |
+
if improved_code and not improved_code.lower().startswith("error: empty response"):
|
| 482 |
+
iframe["srcdoc"] = improved_code
|
| 483 |
+
else:
|
| 484 |
+
logging.error("fix_visual_after_section: LLM returned an empty or error response.")
|
| 485 |
+
else:
|
| 486 |
+
logging.error(f"fix_visual_after_section: No iframe found after section '{section_title}'.")
|
| 487 |
+
else:
|
| 488 |
+
logging.error(f"fix_visual_after_section: Section '{section_title}' not found.")
|
| 489 |
return str(soup)
|
| 490 |
|
| 491 |
def snippet_in_tag(tag: Tag, snippet: str) -> bool:
|
|
|
|
| 693 |
|
| 694 |
updated_report_html = str(soup)
|
| 695 |
|
| 696 |
+
# Step 5 (and 6): Update the reference table if needed.
|
| 697 |
+
prompt_refs = (
|
| 698 |
+
f"\nYou are a technical editor.\n\n"
|
| 699 |
+
"Review the following updated report HTML. If any new inline citations (e.g., [x]) have been added that are not in the original reference table,\n"
|
| 700 |
+
"generate an updated Reference Summary Table as valid HTML. Output only the updated table without any additional comments.\n\n"
|
| 701 |
+
f"Updated Report HTML:\n{updated_report_html}"
|
| 702 |
+
)
|
| 703 |
+
# Increase token limit to 1500 for this call
|
| 704 |
+
updated_refs = llm_call(prompt=prompt_refs, model="o3-mini", temperature=0, max_tokens_param=1500)
|
| 705 |
+
updated_refs = updated_refs.strip().strip("```").strip()
|
| 706 |
+
|
| 707 |
+
if updated_refs and not updated_refs.lower().startswith("error: empty response"):
|
| 708 |
+
soup_updated = BeautifulSoup(updated_report_html, "html.parser")
|
| 709 |
+
ref_heading = soup_updated.find(lambda tag: tag.name in ["h1", "h2", "h3", "h4"] and "reference summary table" in tag.get_text(strip=True).lower())
|
| 710 |
+
if ref_heading:
|
| 711 |
+
next_sibling = ref_heading.find_next_sibling()
|
| 712 |
+
if next_sibling:
|
| 713 |
+
try:
|
| 714 |
+
new_ref_html = BeautifulSoup(updated_refs, "html.parser")
|
| 715 |
+
next_sibling.replace_with(new_ref_html)
|
| 716 |
+
logging.info("fine_tune_report: Reference table updated successfully.")
|
| 717 |
+
except Exception as e:
|
| 718 |
+
logging.error("fine_tune_report: Error updating reference table: %s", e)
|
| 719 |
+
else:
|
| 720 |
+
logging.info("fine_tune_report: No sibling after reference heading; skipping reference update.")
|
| 721 |
+
updated_report_html = str(soup_updated)
|
| 722 |
else:
|
| 723 |
+
logging.info("fine_tune_report: No reference table heading found; reference update skipped.")
|
|
|
|
| 724 |
else:
|
| 725 |
+
logging.info("fine_tune_report: No valid updated reference table returned; leaving unchanged.")
|
|
|
|
|
|
|
| 726 |
|
| 727 |
global_summary = "Corrections Applied Based on User Request:\n" + "\n".join(corrections_summary)
|
| 728 |
updated_qa = qa.strip() + "\n----------\n" + global_summary
|
|
|
|
| 771 |
adjustment_request = user_message.replace("@improve", "").strip()
|
| 772 |
|
| 773 |
# --- CASE 1: Removal request ---
|
| 774 |
+
removal_match = re.search(r"remove the following text:\s*\[\[([^\]]+)\]\]", adjustment_request, re.I)
|
| 775 |
+
if removal_match:
|
| 776 |
+
text_to_remove = removal_match.group(1).strip()
|
|
|
|
| 777 |
updated_report = remove_text_from_html(report_text, text_to_remove)
|
| 778 |
answer = f"Removed text: '{text_to_remove}' from the report."
|
| 779 |
chat_history.append([user_message, answer])
|
| 780 |
return chat_history, "", updated_report
|
| 781 |
|
| 782 |
+
# --- CASE 2: Visual fix request ---
|
| 783 |
+
visual_fix_match = re.search(r"fix visual after section\s+\[\[([^\]]+)\]\](?::\s*(.*))?", adjustment_request, re.I)
|
|
|
|
| 784 |
if visual_fix_match:
|
| 785 |
section_name = visual_fix_match.group(1).strip()
|
| 786 |
extra_instructions = visual_fix_match.group(2).strip() if visual_fix_match.group(2) else ""
|
|
|
|
| 789 |
chat_history.append([user_message, answer])
|
| 790 |
return chat_history, "", updated_report
|
| 791 |
|
| 792 |
+
# --- DEFAULT: Proceed with existing LLM-based improvement (fine_tune_report) ---
|
| 793 |
updated_report, _ = fine_tune_report(
|
| 794 |
adjustment_request,
|
| 795 |
os.getenv("OPENAI_API_KEY"),
|
|
|
|
| 953 |
|
| 954 |
Review the following expanded report HTML.
|
| 955 |
If any new inline citations (e.g., [x]) have been introduced that are not in the original reference table,
|
| 956 |
+
generate an updated References Summary Table as valid HTML. Output only the updated reference table HTML with no explanations.
|
| 957 |
Updated Report HTML:\n{updated_report_html}"""
|
| 958 |
)
|
| 959 |
updated_refs = llm_call(prompt=prompt_refs, model="o3-mini", temperature=0, max_tokens_param=1000)
|
|
|
|
| 961 |
|
| 962 |
if updated_refs:
|
| 963 |
soup_updated = BeautifulSoup(updated_report_html, "html.parser")
|
| 964 |
+
ref_heading = soup_updated.find(lambda tag: tag.name in ["h1", "h2", "h3", "h4"] and "References Summary Table" in tag.get_text())
|
| 965 |
if ref_heading:
|
| 966 |
next_sibling = ref_heading.find_next_sibling()
|
| 967 |
if next_sibling:
|
|
|
|
| 2255 |
--------------- Citations -----------
|
| 2256 |
- The report must include inline citations (e.g., [1], [2], etc.) from real sources provided in the search results below - be selective, don't put it at every sentence or every paragraph.
|
| 2257 |
Note: citations sources in-line need to be in this format: blablabla - Source [x] / "pdf" is not a source, provide the title or author
|
| 2258 |
+
- The name of the reference table should be: "References Summary Table"
|
| 2259 |
- The reference table at the end containing the citations details should have 4 columns: the ref number, the title of the document, the author(s, the URL - with hyperlink)
|
| 2260 |
+
- The report MUST include a References Summary Table with between 10 (for a 8 page report) and 30 references (for a 40 pages report). All inline citations (e.g., [1], [2], …) present in the report and in any focus placeholders MUST have a corresponding entry in this table with its full URL.
|
| 2261 |
- For the reference citations, add systematically the urls from the Learnings (no need to put them in numbered list format since we alredy have the [x] that serves as number list)
|
| 2262 |
- Do not add any inline citations reference in the visual and graph placeholders descriptions belo, you can add them in focus though.
|
| 2263 |
- Do not make false references / citations. It has to be grounded from the sources in the rsearch results / crumbs below (no example.com/... type references!)
|
|
|
|
| 2416 |
- Introduction
|
| 2417 |
- [Sections and sub-sections, depending on the size and relevant topic - including visual, graph and focus placeholders]
|
| 2418 |
- Conclusion
|
| 2419 |
+
- References Summary Table
|
| 2420 |
- Report ending formatting (as mentioned before)
|
| 2421 |
|
| 2422 |
{{Do not add anything after - no conclusive meta comment or content}}
|