Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,8 +17,6 @@ from datetime import datetime
|
|
| 17 |
from reportlab.lib.pagesizes import A4
|
| 18 |
from xhtml2pdf import pisa
|
| 19 |
import chromedriver_autoinstaller
|
| 20 |
-
from selenium import webdriver
|
| 21 |
-
from selenium.webdriver.chrome.options import Options
|
| 22 |
|
| 23 |
# Set up logging basic configuration
|
| 24 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
@@ -32,27 +30,20 @@ MAX_MESSAGE_LENGTH = 1048576
|
|
| 32 |
# Helper functions for external APIs and PDF Processing
|
| 33 |
# =============================================================================
|
| 34 |
|
| 35 |
-
def
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
# Load the HTML content directly using a data URL.
|
| 50 |
-
driver.get(f"data:text/html;charset=utf-8,{html_content}")
|
| 51 |
-
time.sleep(2) # Allow time for dynamic content to render
|
| 52 |
-
screenshot_png = driver.get_screenshot_as_png()
|
| 53 |
-
return screenshot_png
|
| 54 |
-
finally:
|
| 55 |
-
driver.quit()
|
| 56 |
|
| 57 |
def generate_visual_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
|
| 58 |
prompt = (f"""
|
|
@@ -74,6 +65,7 @@ Keep in mind the:
|
|
| 74 |
- White background (#ffffff)
|
| 75 |
- overall dimension capped at 500px x 500px
|
| 76 |
- no introduction, conclusions or code fences -> Output the result directly
|
|
|
|
| 77 |
|
| 78 |
// Important
|
| 79 |
- Make the visuals content rich, there's no point having a visual if its content has no real value.
|
|
@@ -822,25 +814,22 @@ def validate_visual_html(html: str) -> bool:
|
|
| 822 |
return all(checks)
|
| 823 |
|
| 824 |
class ReportGenerator:
|
| 825 |
-
def __init__(self):
|
| 826 |
-
|
|
|
|
| 827 |
|
| 828 |
def generate_report_html(self, solution_content: str, metadata: dict = None) -> str:
|
| 829 |
-
# Normalize text and
|
| 830 |
solution_content = unicodedata.normalize('NFKC', solution_content)
|
| 831 |
solution_content = re.sub(r'[\u2010\u2011\u2012\u2013\u2014\u2015]', "-", solution_content)
|
| 832 |
-
# Remove markdown hyperlink syntax: replace [text](link) with just text.
|
| 833 |
solution_content = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', solution_content)
|
| 834 |
-
# Convert markdown to HTML using the "extra" and "tables" extensions to support numbering and table syntax.
|
| 835 |
html_content = markdown.markdown(solution_content, extensions=['extra', 'tables'])
|
| 836 |
-
# Insert explicit breaks for main report sections as needed.
|
| 837 |
html_content = html_content.replace("<h2>Table of Contents</h2>", "<div class='page-break'></div><h2>Table of Contents</h2>")
|
| 838 |
html_content = html_content.replace("<h2>Introduction</h2>", "<div class='page-break'></div><h2>Introduction</h2>")
|
| 839 |
html_content = html_content.replace("<h2>Conclusion</h2>", "<div class='page-break'></div><h2>Conclusion</h2>")
|
| 840 |
html_content = html_content.replace("<h2>References</h2>", "<div class='page-break'></div><h2>References</h2>")
|
| 841 |
html_content = html_content.replace("<h2>Surprise-Me Extension Report</h2>", "<div class='page-break'></div><h2>Surprise-Me Extension Report</h2>")
|
| 842 |
|
| 843 |
-
# Build header using metadata if provided.
|
| 844 |
date_str = datetime.now().strftime("%Y-%m-%d")
|
| 845 |
header = ""
|
| 846 |
if metadata:
|
|
@@ -848,13 +837,13 @@ class ReportGenerator:
|
|
| 848 |
<p>Author: {metadata.get('User name', 'N/A')}</p>
|
| 849 |
<p>Date: {metadata.get('Date', date_str)}</p>
|
| 850 |
<hr/>"""
|
| 851 |
-
#
|
| 852 |
full_html = f"""
|
| 853 |
<html>
|
| 854 |
<head>
|
| 855 |
<meta charset="utf-8" />
|
| 856 |
<style>
|
| 857 |
-
body {{ font-family: Helvetica, sans-serif; margin: 40px; }}
|
| 858 |
h1 {{ font-size: 24pt; margin-bottom: 12px; text-align: left; }}
|
| 859 |
h2 {{ font-size: 20pt; margin-bottom: 10px; text-align: left; }}
|
| 860 |
h3 {{ font-size: 18pt; margin-bottom: 8px; text-align: left; }}
|
|
@@ -867,7 +856,6 @@ class ReportGenerator:
|
|
| 867 |
.page-break {{ page-break-before: always; }}
|
| 868 |
</style>
|
| 869 |
<script>
|
| 870 |
-
// You may add your JavaScript here if needed.
|
| 871 |
console.log("Report loaded successfully.");
|
| 872 |
</script>
|
| 873 |
</head>
|
|
@@ -891,64 +879,69 @@ class ReportGenerator:
|
|
| 891 |
options.add_argument("--headless")
|
| 892 |
options.add_argument("--disable-gpu")
|
| 893 |
options.add_argument("--no-sandbox")
|
| 894 |
-
options.add_argument("--window-size=1920,1080")
|
| 895 |
|
| 896 |
-
# Ensure you have ChromeDriver installed and in your PATH.
|
| 897 |
driver = webdriver.Chrome(options=options)
|
| 898 |
try:
|
| 899 |
-
# Load the HTML content directly using a data URL.
|
| 900 |
driver.get(f"data:text/html;charset=utf-8,{html_content}")
|
| 901 |
-
time.sleep(2)
|
| 902 |
screenshot_png = driver.get_screenshot_as_png()
|
| 903 |
return screenshot_png
|
| 904 |
finally:
|
| 905 |
-
driver.quit()
|
|
|
|
| 906 |
|
| 907 |
def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
|
| 908 |
# Generate the full HTML report
|
| 909 |
html_report = self.generate_report_html(solution_content, metadata)
|
| 910 |
|
| 911 |
-
#
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
-
|
| 929 |
-
|
| 930 |
-
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
|
| 943 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 944 |
pdf_buffer = io.BytesIO()
|
| 945 |
pisa_status = pisa.CreatePDF(html_report, dest=pdf_buffer)
|
| 946 |
-
|
| 947 |
-
# If errors are found during PDF conversion, use the fallback method.
|
| 948 |
if pisa_status.err:
|
| 949 |
-
logging.warning("PDF conversion issues detected - attempting
|
| 950 |
return self.fallback_pdf_generation(html_report)
|
| 951 |
-
|
| 952 |
return pdf_buffer.getvalue()
|
| 953 |
|
| 954 |
def handle_generate_report(query_name: str, user_name: str, final_report: str):
|
|
|
|
| 17 |
from reportlab.lib.pagesizes import A4
|
| 18 |
from xhtml2pdf import pisa
|
| 19 |
import chromedriver_autoinstaller
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# Set up logging basic configuration
|
| 22 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
|
|
| 30 |
# Helper functions for external APIs and PDF Processing
|
| 31 |
# =============================================================================
|
| 32 |
|
| 33 |
+
def replace_focus_placeholders(report_html: str, context: str, initial_query: str, crumbs: str) -> str:
|
| 34 |
+
pattern = r"\[\[Focus Placeholder (\d+):(.*?)\]\]"
|
| 35 |
+
def placeholder_replacer(match):
|
| 36 |
+
placeholder_num = match.group(1)
|
| 37 |
+
instructions = match.group(2).strip()
|
| 38 |
+
logging.info(f"Generating focus box {placeholder_num}")
|
| 39 |
+
try:
|
| 40 |
+
focus_html = generate_focus_snippet(instructions, context, initial_query, crumbs)
|
| 41 |
+
# Wrap the entire focus placeholder in a single div to preserve block-level grouping.
|
| 42 |
+
return f'<!-- Focus {placeholder_num} Start --><div class="focus-placeholder" style="background: #ede8e8; border: 1px solid black; padding: 10px; margin: 10px 0;">\n{focus_html}\n</div><!-- Focus {placeholder_num} End -->'
|
| 43 |
+
except Exception as e:
|
| 44 |
+
logging.error(f"Focus {placeholder_num} failed: {str(e)}")
|
| 45 |
+
return f'<!-- ERROR GENERATING FOCUS {placeholder_num} -->'
|
| 46 |
+
return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
def generate_visual_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
|
| 49 |
prompt = (f"""
|
|
|
|
| 65 |
- White background (#ffffff)
|
| 66 |
- overall dimension capped at 500px x 500px
|
| 67 |
- no introduction, conclusions or code fences -> Output the result directly
|
| 68 |
+
- create only the content for the mermaid (do not add comments of #color coding and stuffs inside the mermaid code), it's supposed to be only focused on the mermaid code required to render it
|
| 69 |
|
| 70 |
// Important
|
| 71 |
- Make the visuals content rich, there's no point having a visual if its content has no real value.
|
|
|
|
| 814 |
return all(checks)
|
| 815 |
|
| 816 |
class ReportGenerator:
|
| 817 |
+
def __init__(self, render_with_selenium: bool = False):
|
| 818 |
+
# Flag to determine if we are rendering the final PDF with Selenium
|
| 819 |
+
self.render_with_selenium = render_with_selenium
|
| 820 |
|
| 821 |
def generate_report_html(self, solution_content: str, metadata: dict = None) -> str:
|
| 822 |
+
# Normalize text and fix dash characters.
|
| 823 |
solution_content = unicodedata.normalize('NFKC', solution_content)
|
| 824 |
solution_content = re.sub(r'[\u2010\u2011\u2012\u2013\u2014\u2015]', "-", solution_content)
|
|
|
|
| 825 |
solution_content = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', solution_content)
|
|
|
|
| 826 |
html_content = markdown.markdown(solution_content, extensions=['extra', 'tables'])
|
|
|
|
| 827 |
html_content = html_content.replace("<h2>Table of Contents</h2>", "<div class='page-break'></div><h2>Table of Contents</h2>")
|
| 828 |
html_content = html_content.replace("<h2>Introduction</h2>", "<div class='page-break'></div><h2>Introduction</h2>")
|
| 829 |
html_content = html_content.replace("<h2>Conclusion</h2>", "<div class='page-break'></div><h2>Conclusion</h2>")
|
| 830 |
html_content = html_content.replace("<h2>References</h2>", "<div class='page-break'></div><h2>References</h2>")
|
| 831 |
html_content = html_content.replace("<h2>Surprise-Me Extension Report</h2>", "<div class='page-break'></div><h2>Surprise-Me Extension Report</h2>")
|
| 832 |
|
|
|
|
| 833 |
date_str = datetime.now().strftime("%Y-%m-%d")
|
| 834 |
header = ""
|
| 835 |
if metadata:
|
|
|
|
| 837 |
<p>Author: {metadata.get('User name', 'N/A')}</p>
|
| 838 |
<p>Date: {metadata.get('Date', date_str)}</p>
|
| 839 |
<hr/>"""
|
| 840 |
+
# Force a white background for the entire page (overriding any light grey)
|
| 841 |
full_html = f"""
|
| 842 |
<html>
|
| 843 |
<head>
|
| 844 |
<meta charset="utf-8" />
|
| 845 |
<style>
|
| 846 |
+
body {{ font-family: Helvetica, sans-serif; margin: 40px; background: white; }}
|
| 847 |
h1 {{ font-size: 24pt; margin-bottom: 12px; text-align: left; }}
|
| 848 |
h2 {{ font-size: 20pt; margin-bottom: 10px; text-align: left; }}
|
| 849 |
h3 {{ font-size: 18pt; margin-bottom: 8px; text-align: left; }}
|
|
|
|
| 856 |
.page-break {{ page-break-before: always; }}
|
| 857 |
</style>
|
| 858 |
<script>
|
|
|
|
| 859 |
console.log("Report loaded successfully.");
|
| 860 |
</script>
|
| 861 |
</head>
|
|
|
|
| 879 |
options.add_argument("--headless")
|
| 880 |
options.add_argument("--disable-gpu")
|
| 881 |
options.add_argument("--no-sandbox")
|
| 882 |
+
options.add_argument("--window-size=1920,1080")
|
| 883 |
|
|
|
|
| 884 |
driver = webdriver.Chrome(options=options)
|
| 885 |
try:
|
|
|
|
| 886 |
driver.get(f"data:text/html;charset=utf-8,{html_content}")
|
| 887 |
+
time.sleep(2)
|
| 888 |
screenshot_png = driver.get_screenshot_as_png()
|
| 889 |
return screenshot_png
|
| 890 |
finally:
|
| 891 |
+
driver.quit()
|
| 892 |
+
|
| 893 |
|
| 894 |
def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
|
| 895 |
# Generate the full HTML report
|
| 896 |
html_report = self.generate_report_html(solution_content, metadata)
|
| 897 |
|
| 898 |
+
# Optionally pre-process the HTML only if we are converting via pisa.
|
| 899 |
+
if not self.render_with_selenium:
|
| 900 |
+
from bs4 import BeautifulSoup
|
| 901 |
+
soup = BeautifulSoup(html_report, "html.parser")
|
| 902 |
+
# Replace mermaid iframes with placeholders only if not using Selenium (since Selenium can render JS)
|
| 903 |
+
for iframe in soup.find_all("iframe"):
|
| 904 |
+
srcdoc = iframe.get("srcdoc", "")
|
| 905 |
+
if "mermaid" in srcdoc:
|
| 906 |
+
placeholder = soup.new_tag("div", **{"class": "mermaid-placeholder"})
|
| 907 |
+
placeholder.string = "Mermaid diagram placeholder (not rendered in PDF)"
|
| 908 |
+
iframe.replace_with(placeholder)
|
| 909 |
+
html_report = str(soup)
|
| 910 |
+
# Inject CSS for mermaid placeholders
|
| 911 |
+
html_report = html_report.replace("<style>", """<style>
|
| 912 |
+
@media print {
|
| 913 |
+
.mermaid-placeholder {
|
| 914 |
+
width: 500px;
|
| 915 |
+
height: 500px;
|
| 916 |
+
border: 1px solid #ccc;
|
| 917 |
+
display: flex;
|
| 918 |
+
align-items: center;
|
| 919 |
+
justify-content: center;
|
| 920 |
+
font-size: 12pt;
|
| 921 |
+
color: #666;
|
| 922 |
+
margin: 10px auto;
|
| 923 |
+
}
|
| 924 |
+
.visual-container { page-break-inside: avoid; }
|
| 925 |
+
svg { max-width: 100% !important; height: auto !important; }
|
| 926 |
+
}
|
| 927 |
+
""")
|
| 928 |
+
# Wrap the alignment assessment (if present) in a div to control overflow.
|
| 929 |
+
html_report = html_report.replace(
|
| 930 |
+
"<p><b>Report alignment assessment:</b>",
|
| 931 |
+
"<div style='max-width:100%; word-wrap: break-word;'><p><b>Report alignment assessment:</b>"
|
| 932 |
+
)
|
| 933 |
+
# Ensure closing tag for the added div.
|
| 934 |
+
html_report = html_report.replace("</body>", "</div></body>")
|
| 935 |
+
|
| 936 |
+
# Convert HTML to PDF using xhtml2pdf.
|
| 937 |
+
import io
|
| 938 |
pdf_buffer = io.BytesIO()
|
| 939 |
pisa_status = pisa.CreatePDF(html_report, dest=pdf_buffer)
|
| 940 |
+
|
|
|
|
| 941 |
if pisa_status.err:
|
| 942 |
+
logging.warning("PDF conversion issues detected - attempting Selenium fallback")
|
| 943 |
return self.fallback_pdf_generation(html_report)
|
| 944 |
+
|
| 945 |
return pdf_buffer.getvalue()
|
| 946 |
|
| 947 |
def handle_generate_report(query_name: str, user_name: str, final_report: str):
|