Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -453,39 +453,82 @@ def remove_text_from_html(report_html: str, text_to_remove: str) -> str:
|
|
| 453 |
|
| 454 |
def fix_visual_after_section(report_html: str, section_title: str, extra_instructions: str) -> str:
|
| 455 |
"""
|
| 456 |
-
Given a report HTML
|
| 457 |
-
this function finds the first header (
|
| 458 |
-
then finds the first <iframe>
|
|
|
|
|
|
|
|
|
|
| 459 |
"""
|
| 460 |
soup = BeautifulSoup(report_html, "html.parser")
|
| 461 |
header = None
|
| 462 |
-
# Look for header tags (h1–h4) that contain the
|
| 463 |
for tag in soup.find_all(["h1", "h2", "h3", "h4"]):
|
| 464 |
-
|
| 465 |
-
if section_title.lower() in
|
| 466 |
header = tag
|
| 467 |
break
|
| 468 |
-
if header:
|
| 469 |
-
# Look for the first <iframe> after that header
|
| 470 |
-
iframe = header.find_next("iframe")
|
| 471 |
-
if iframe and iframe.has_attr("srcdoc"):
|
| 472 |
-
current_code = iframe["srcdoc"]
|
| 473 |
-
prompt = (
|
| 474 |
-
f"Improve the following visual code by simplifying its formatting (e.g., adjust parenthesis, remove extra styling) "
|
| 475 |
-
f"based on these extra instructions: {extra_instructions}\n\n"
|
| 476 |
-
f"Current code:\n{current_code}\n\n"
|
| 477 |
-
"Return only the improved code (no extra commentary)."
|
| 478 |
-
)
|
| 479 |
-
improved_code = llm_call(prompt=prompt, model="o3-mini", temperature=0, max_tokens_param=1500).strip()
|
| 480 |
-
# Check that the improvement is valid and not an error message.
|
| 481 |
-
if improved_code and not improved_code.lower().startswith("error: empty response"):
|
| 482 |
-
iframe["srcdoc"] = improved_code
|
| 483 |
-
else:
|
| 484 |
-
logging.error("fix_visual_after_section: LLM returned an empty or error response.")
|
| 485 |
-
else:
|
| 486 |
-
logging.error(f"fix_visual_after_section: No iframe found after section '{section_title}'.")
|
| 487 |
-
else:
|
| 488 |
logging.error(f"fix_visual_after_section: Section '{section_title}' not found.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
return str(soup)
|
| 490 |
|
| 491 |
def snippet_in_tag(tag: Tag, snippet: str) -> bool:
|
|
|
|
| 453 |
|
| 454 |
def fix_visual_after_section(report_html: str, section_title: str, extra_instructions: str) -> str:
|
| 455 |
"""
|
| 456 |
+
Given a report HTML, a target section name (extracted from within [[...]]), and extra instructions,
|
| 457 |
+
this function finds the first header (h1–h4) that contains the section name (ignoring common TOC headers),
|
| 458 |
+
then finds the first <iframe> following that header.
|
| 459 |
+
It sends the current iframe 'srcdoc' (which should contain a mermaid diagram)
|
| 460 |
+
to the LLM with extra instructions and expects back a corrected mermaid code (starting with a mermaid diagram keyword).
|
| 461 |
+
It then rebuilds and reassigns the iframe's srcdoc with the improved code.
|
| 462 |
"""
|
| 463 |
soup = BeautifulSoup(report_html, "html.parser")
|
| 464 |
header = None
|
| 465 |
+
# Look for header tags (h1–h4) that contain the section title (ignoring "table of contents")
|
| 466 |
for tag in soup.find_all(["h1", "h2", "h3", "h4"]):
|
| 467 |
+
header_text = tag.get_text(strip=True)
|
| 468 |
+
if section_title.lower() in header_text.lower() and "table of contents" not in header_text.lower():
|
| 469 |
header = tag
|
| 470 |
break
|
| 471 |
+
if not header:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
logging.error(f"fix_visual_after_section: Section '{section_title}' not found.")
|
| 473 |
+
return report_html
|
| 474 |
+
|
| 475 |
+
# Find the first <iframe> after the header
|
| 476 |
+
iframe = header.find_next("iframe")
|
| 477 |
+
if not iframe or not iframe.has_attr("srcdoc"):
|
| 478 |
+
logging.error(f"fix_visual_after_section: No <iframe> found after section '{section_title}'.")
|
| 479 |
+
return report_html
|
| 480 |
+
|
| 481 |
+
current_code = iframe["srcdoc"]
|
| 482 |
+
logging.info(f"fix_visual_after_section: Current iframe srcdoc length: {len(current_code)}")
|
| 483 |
+
|
| 484 |
+
# Build prompt: ask the LLM to simplify/adjust the mermaid code.
|
| 485 |
+
prompt = f"""Improve the following mermaid visualization code.
|
| 486 |
+
Extra instructions: {extra_instructions}
|
| 487 |
+
Current mermaid code:
|
| 488 |
+
{current_code}
|
| 489 |
+
|
| 490 |
+
Return only the improved mermaid code (the content that should go inside the <div class="mermaid">...</div>) with no extra wrapping HTML or commentary.
|
| 491 |
+
Ensure that the output starts with one of the following keywords: mindmap, flowchart, sequence, gantt, or pie."""
|
| 492 |
+
|
| 493 |
+
improved_code = llm_call(prompt=prompt, model="o3-mini", temperature=0, max_tokens_param=1500)
|
| 494 |
+
improved_code = improved_code.strip().strip("```").strip()
|
| 495 |
+
logging.info(f"fix_visual_after_section: Improved code received from LLM:\n{improved_code}")
|
| 496 |
+
|
| 497 |
+
# Verify that the result starts with a valid mermaid diagram keyword.
|
| 498 |
+
valid_keywords = ["mindmap", "flowchart", "sequence", "gantt", "pie"]
|
| 499 |
+
if not any(improved_code.lower().startswith(keyword) for keyword in valid_keywords):
|
| 500 |
+
logging.error("fix_visual_after_section: Improved code does not start with a valid mermaid keyword.")
|
| 501 |
+
return report_html
|
| 502 |
+
|
| 503 |
+
# Rebuild the srcdoc using the improved mermaid code.
|
| 504 |
+
new_srcdoc = f"""<!DOCTYPE html>
|
| 505 |
+
<html>
|
| 506 |
+
<head>
|
| 507 |
+
<script src="https://cdn.jsdelivr.net/npm/mermaid@10.4.0/dist/mermaid.min.js"></script>
|
| 508 |
+
<script>
|
| 509 |
+
mermaid.initialize({{ startOnLoad: true, securityLevel: "loose", theme: "default" }});
|
| 510 |
+
</script>
|
| 511 |
+
<style>
|
| 512 |
+
.mermaid {{
|
| 513 |
+
background-color: #f7f2f2;
|
| 514 |
+
color: black;
|
| 515 |
+
padding: 10px;
|
| 516 |
+
border-radius: 5px;
|
| 517 |
+
border: 1px solid #ccc;
|
| 518 |
+
max-width: 1200px;
|
| 519 |
+
margin: 0 auto;
|
| 520 |
+
min-height: 400px;
|
| 521 |
+
}}
|
| 522 |
+
</style>
|
| 523 |
+
</head>
|
| 524 |
+
<body>
|
| 525 |
+
<div class="mermaid">
|
| 526 |
+
{improved_code}
|
| 527 |
+
</div>
|
| 528 |
+
</body>
|
| 529 |
+
</html>"""
|
| 530 |
+
iframe["srcdoc"] = new_srcdoc
|
| 531 |
+
logging.info("fix_visual_after_section: Successfully updated the iframe srcdoc with improved mermaid code.")
|
| 532 |
return str(soup)
|
| 533 |
|
| 534 |
def snippet_in_tag(tag: Tag, snippet: str) -> bool:
|