Spaces:

10gen
/

deepsearchitv2

Running

App Files Files Community

Guiyom commited on Mar 11, 2025

Commit

fb148fd

verified ·

1 Parent(s): 47e5625

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -73

app.py CHANGED Viewed

@@ -2918,7 +2918,11 @@ class ReportGenerator:
         date_str = datetime.now().strftime("%Y-%m-%d")
         header = ""
         if metadata:
-            header = f"<p>Search Query: {metadata.get('Query name', 'N/A')}<br>Author: {metadata.get('User name', 'N/A')} | Date: {metadata.get('Date', date_str)}</p>"
         soup = BeautifulSoup(html_report, "html.parser")
         body_tag = soup.body
         if body_tag:
@@ -2928,79 +2932,46 @@ class ReportGenerator:
         # Parse the HTML
         logging.info(f"ReportGenerator: soup report generated:\n{soup}")
-        # Find all mermaid visual iframes (assumed to have class "visual-frame")
-        mermaid_iframes = soup.find_all("iframe", class_="visual-frame")
-        if self.render_with_selenium and mermaid_iframes:
             import base64, tempfile, time
             import chromedriver_autoinstaller
             chromedriver_autoinstaller.install()
             from selenium import webdriver
             from selenium.webdriver.chrome.options import Options
             from selenium.webdriver.chrome.service import Service
             options = Options()
-            # For debugging, you may temporarily disable headless mode by commenting the next line.
             options.add_argument("--headless")
             options.add_argument("--no-sandbox")
             options.add_argument("--disable-dev-shm-usage")
-            # Increase window size to capture more content
-            options.add_argument("--window-size=1600,1200")
             options.add_argument("--force-device-scale-factor=2")
             service = Service(log_path=os.devnull)
             driver = webdriver.Chrome(service=service, options=options)
-            logging.info(f"Found {len(mermaid_iframes)} visual iframes to process.")
             for iframe in mermaid_iframes:
                 srcdoc = iframe.get("srcdoc")
                 if srcdoc:
                     with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as tmp_file:
                         tmp_file.write(srcdoc.encode("utf-8"))
                         tmp_file.flush()
                         file_url = "file://" + tmp_file.name
-                    driver.get(file_url)
-                    try:
-                        # Wait up to 20 seconds until either a .mermaid element or an <svg> element is present.
-                        wait = WebDriverWait(driver, 20)
-                        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".mermaid, svg")))
-                    except Exception as e:
-                        logging.error("No rendered element found in iframe: " + str(e))
-                    # Scroll down to ensure all content is in view.
-                    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
-                    # Wait an extra 3 seconds to allow heavy visuals to finish rendering.
-                    time.sleep(3)
                     screenshot_png = driver.get_screenshot_as_png()
-                    from PIL import Image
-                    from io import BytesIO
-                    img = Image.open(BytesIO(screenshot_png))
-                    # Log the image size for debugging (width, height)
-                    logging.info("Captured screenshot dimensions: " + str(img.size))
-                    # Crop the screenshot to remove extra spaces if possible.
-                    bbox = img.getbbox()
-                    if bbox:
-                        cropped_img = img.crop(bbox)
-                    else:
-                        cropped_img = img
-                    buffer = BytesIO()
-                    cropped_img.save(buffer, format='PNG')
-                    cropped_png = buffer.getvalue()
-                    b64_img = base64.b64encode(cropped_png).decode("utf-8")
                     new_tag = soup.new_tag("img")
-                    new_tag["style"] = "max-width: 100%; display: block; margin: auto; page-break-after: avoid;"
                     new_tag["src"] = "data:image/png;base64," + b64_img
                     iframe.replace_with(new_tag)
             driver.quit()
-        else:
-            logging.info("Skipping Selenium-based visual conversion since render_with_selenium is disabled or no iframes found.")
         # Instead of converting the entire soup (which may include nested <html> tags), extract only the content within <body>
         body_tag = soup.find("body")
         body_content = body_tag.decode_contents() if body_tag else ""
@@ -3013,12 +2984,10 @@ class ReportGenerator:
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <style>
         body {{ font-family: Helvetica, sans-serif; margin: 40px; background: white; }}
-        h1 {{ font-size: 20pt; margin-bottom: 12px; text-align: left; font-weight: bold;}}
-        h2 {{ font-size: 16pt; margin-bottom: 10px; text-align: left; font-weight: bold;}}
-        h3 {{ font-size: 14pt; margin-bottom: 8px; text-align: left; font-weight: bold;}}
-        h4 {{ font-size: 12pt; text-align: left; font-weight: bold;}}
         p {{ font-size: 11pt; line-height: 1.5; margin-bottom: 10px; white-space: pre-wrap; }}
-        table {{ border: 1px solid black; }}
         pre, div {{ white-space: pre-wrap; }}
         ol, ul {{ font-size: 11pt; margin-left: 20px; line-height: 1.5; }}
         hr {{ border: 1px solid #ccc; margin: 20px 0; }}
@@ -3044,11 +3013,12 @@ class ReportGenerator:
         logging.info(f"ReportGenerator: Final HTML for PDF conversion:\n{cleaned_string}")
         # Crafting compliance
-        final_html = final_html.replace("<h1","<br><br><br><h1").replace("</h1>","</h1><br>")
-        final_html = final_html.replace("<h2","<br><br><b><h2").replace("</h2>","</b></h2><br>")
-        final_html = final_html.replace("<h3","<br><br><h3").replace("</h3>","</b></h3><br>")
-        final_html = final_html.replace("<h4","<br><h4")
-        final_html = final_html.replace("<div","<br><div")
         final_html = final_html.replace("<table>","<br><table>")
         # Generate the final PDF from final_html using xhtml2pdf (A4 layout)
@@ -3067,30 +3037,19 @@ body {
   padding: 0;
 }
 h1 {
-  font-size: 20pt;
   margin-bottom: 12px;
-  text-align: left;
-  font-weight: bold;
 }
 h2 {
-  font-size: 16pt;
   margin-bottom: 10px;
-  text-align: left;
-  font-weight: bold;
 }
 h3 {
-  font-size: 14pt;
   margin-bottom: 8px;
   text-align: left;
-  font-weight: bold;
-}
-h4 {
-  font-size: 12pt;
-  text-align: left;
-  font-weight: bold;
-}
-table {
-  border: 1px solid black;
 }
 p {
   font-size: 11pt;
@@ -3132,7 +3091,7 @@ th {
 def handle_generate_report(query_name: str, user_name: str, final_report: str):
     try:
-        report_generator = ReportGenerator(render_with_selenium=False)  # Enable Selenium
         metadata = {
             "Query name": query_name,
             "User name": user_name,

         date_str = datetime.now().strftime("%Y-%m-%d")
         header = ""
         if metadata:
+            header = f"""
+<h1>Search Query: {metadata.get('Query name', 'N/A')}</h1>
+<p>Author: {metadata.get('User name', 'N/A')}</p>
+<p>Date: {metadata.get('Date', date_str)}</p>
+<hr/>"""
         soup = BeautifulSoup(html_report, "html.parser")
         body_tag = soup.body
         if body_tag:
         # Parse the HTML
         logging.info(f"ReportGenerator: soup report generated:\n{soup}")
+        # Find all mermaid visual iframes (assumed to have class "mermaid-frame")
+        mermaid_iframes = soup.find_all("iframe", class_="mermaid-frame")
+        if mermaid_iframes:
+            # Set up Selenium with a window size and high DPI for better image resolution
             import base64, tempfile, time
             import chromedriver_autoinstaller
             chromedriver_autoinstaller.install()
+            # (Removed the explicit print statement to keep logs clean)
             from selenium import webdriver
             from selenium.webdriver.chrome.options import Options
             from selenium.webdriver.chrome.service import Service
             options = Options()
             options.add_argument("--headless")
             options.add_argument("--no-sandbox")
             options.add_argument("--disable-dev-shm-usage")
+            options.add_argument("--window-size=1200,1200")
             options.add_argument("--force-device-scale-factor=2")
             service = Service(log_path=os.devnull)
             driver = webdriver.Chrome(service=service, options=options)
             for iframe in mermaid_iframes:
+                # Assume the iframe has its content in srcdoc (as generated in generate_visual_snippet)
                 srcdoc = iframe.get("srcdoc")
                 if srcdoc:
                     with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as tmp_file:
                         tmp_file.write(srcdoc.encode("utf-8"))
                         tmp_file.flush()
                         file_url = "file://" + tmp_file.name
+                    driver.get(file_url)
+                    time.sleep(3)  # Allow time for JavaScript (e.g., mermaid) to render
                     screenshot_png = driver.get_screenshot_as_png()
+                    b64_img = base64.b64encode(screenshot_png).decode("utf-8")
                     new_tag = soup.new_tag("img")
                     new_tag["src"] = "data:image/png;base64," + b64_img
+                    new_tag["style"] = "max-width: 500px; display: block; margin: auto;"
                     iframe.replace_with(new_tag)
             driver.quit()
         # Instead of converting the entire soup (which may include nested <html> tags), extract only the content within <body>
         body_tag = soup.find("body")
         body_content = body_tag.decode_contents() if body_tag else ""
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <style>
         body {{ font-family: Helvetica, sans-serif; margin: 40px; background: white; }}
+        h1 {{ font-size: 24pt; margin-bottom: 12px; text-align: left; }}
+        h2 {{ font-size: 20pt; margin-bottom: 10px; text-align: left; }}
+        h3 {{ font-size: 18pt; margin-bottom: 8px; text-align: left; }}
         p {{ font-size: 11pt; line-height: 1.5; margin-bottom: 10px; white-space: pre-wrap; }}
         pre, div {{ white-space: pre-wrap; }}
         ol, ul {{ font-size: 11pt; margin-left: 20px; line-height: 1.5; }}
         hr {{ border: 1px solid #ccc; margin: 20px 0; }}
         logging.info(f"ReportGenerator: Final HTML for PDF conversion:\n{cleaned_string}")
         # Crafting compliance
+        final_html = final_html.replace("<h1>","<br><br><br><h1>").replace("</h1>","</h1><br>")
+        final_html = final_html.replace("<h2>","<br><br><b><h2>").replace("</h2>","</b></h2><br>")
+        final_html = final_html.replace("<h3>","<br><br><h3>").replace("</h3>","</b></h3><br>")
+        final_html = final_html.replace("<h4>","<br><h4>")
+        #final_html = final_html.replace("<p>","<br><p>")
+        final_html = final_html.replace("<div>","<br><div>")
         final_html = final_html.replace("<table>","<br><table>")
         # Generate the final PDF from final_html using xhtml2pdf (A4 layout)
   padding: 0;
 }
 h1 {
+  font-size: 24pt;
   margin-bottom: 12px;
+  text-align: left;
 }
 h2 {
+  font-size: 18pt;
   margin-bottom: 10px;
+  text-align: left;
 }
 h3 {
+  font-size: 16pt;
   margin-bottom: 8px;
   text-align: left;
 }
 p {
   font-size: 11pt;
 def handle_generate_report(query_name: str, user_name: str, final_report: str):
     try:
+        report_generator = ReportGenerator(render_with_selenium=False)
         metadata = {
             "Query name": query_name,
             "User name": user_name,