Spaces:

10gen
/

deepsearchitv2

Runtime error

App Files Files Community

Guiyom commited on Feb 17, 2025

Commit

aaf17eb

verified ·

1 Parent(s): f987582

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -45

app.py CHANGED Viewed

@@ -936,58 +936,89 @@ class ReportGenerator:
         return full_html
     def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
-        # Generate the full HTML report (including text, focus placeholders, and mermaid visuals)
         html_report = self.generate_report_html(solution_content, metadata)
-        # Ensure chromedriver is installed
-        import chromedriver_autoinstaller
-        chromedriver_autoinstaller.install()
-        from selenium import webdriver
-        from selenium.webdriver.chrome.options import Options
-        from selenium.common.exceptions import WebDriverException
         import tempfile
         import time
-        import io
-        from PIL import Image
-        # Set up Selenium Chrome options (similar to your working screenshot code)
-        options = Options()
-        options.add_argument('--headless')
-        options.add_argument('--no-sandbox')
-        options.add_argument('--disable-dev-shm-usage')
-        options.add_argument("--window-size=1920,1080")
-        # Do not force the binary location here; let the webdriver find it from PATH or via chromedriver_autoinstaller
-        try:
             driver = webdriver.Chrome(options=options)
-        except WebDriverException as e:
-            logging.error("Unable to obtain driver for chrome")
-            raise e
-        try:
-            # Write the HTML report to a temporary file
-            tmp_html = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
-            tmp_html.write(html_report.encode('utf-8'))
-            tmp_html.flush()
-            file_url = "file://" + tmp_html.name
-            # Load the HTML file in headless Chrome
-            driver.get(file_url)
-            driver.implicitly_wait(10)
-            time.sleep(5)  # Allow extra time for JavaScript (e.g., Mermaid) to fully render
-            # Take a full-page screenshot
-            screenshot_png = driver.get_screenshot_as_png()
-            # Convert the PNG screenshot to PDF using PIL
-            image = Image.open(io.BytesIO(screenshot_png))
-            pdf_io = io.BytesIO()
-            image.save(pdf_io, format='PDF')
-            pdf_bytes = pdf_io.getvalue()
-            return pdf_bytes
-        finally:
             driver.quit()
 def handle_generate_report(query_name: str, user_name: str, final_report: str):
     try:

         return full_html
     def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
+        # Generate the full HTML report (includes text, focus placeholders, and mermaid visuals as iframes)
         html_report = self.generate_report_html(solution_content, metadata)
+        # --- NEW STEP: Replace mermaid iframes with static images captured via Selenium ---
+        from bs4 import BeautifulSoup
         import tempfile
         import time
+        import base64
+        # Parse the HTML
+        soup = BeautifulSoup(html_report, "html.parser")
+        # Find all mermaid visual iframes (assumed to have class "mermaid-frame")
+        mermaid_iframes = soup.find_all("iframe", class_="mermaid-frame")
+        if mermaid_iframes:
+            # Set up Selenium with a window size for visuals (e.g., 600x600 to capture a 500x500 content area)
+            from selenium import webdriver
+            from selenium.webdriver.chrome.options import Options
+            options = Options()
+            options.add_argument("--headless")
+            options.add_argument("--no-sandbox")
+            options.add_argument("--disable-dev-shm-usage")
+            options.add_argument("--window-size=600,600")
+            # Do not override binary_location since chromedriver_autoinstaller handles it
             driver = webdriver.Chrome(options=options)
+            for iframe in mermaid_iframes:
+                # Assume the iframe has its content in srcdoc attribute (as generated in generate_visual_snippet)
+                srcdoc = iframe.get("srcdoc")
+                if srcdoc:
+                    # Write the iframe content to a temporary HTML file
+                    with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as tmp_file:
+                        tmp_file.write(srcdoc.encode("utf-8"))
+                        tmp_file.flush()
+                        file_url = "file://" + tmp_file.name
+                    # Load the visual-only HTML in Selenium
+                    driver.get(file_url)
+                    # Allow time for JavaScript (mermaid) to render properly
+                    time.sleep(3)
+                    # Capture screenshot (the screenshot will capture the entire window)
+                    screenshot_png = driver.get_screenshot_as_png()
+                    # Optionally, you can crop/resize the image using PIL if necessary.
+                    # For simplicity, we assume the window was sized appropriately.
+                    b64_img = base64.b64encode(screenshot_png).decode("utf-8")
+                    # Create a new image tag with the captured screenshot
+                    new_tag = soup.new_tag("img")
+                    new_tag["src"] = "data:image/png;base64," + b64_img
+                    # Set style attributes to control the size in the final PDF
+                    new_tag["style"] = "max-width: 500px; display: block; margin: auto;"
+                    # Replace the original iframe with the new image tag
+                    iframe.replace_with(new_tag)
             driver.quit()
+        # Convert the (possibly) modified HTML back to a string
+        final_html = str(soup)
+        # --- Generate the final PDF from final_html using xhtml2pdf (A4 layout) ---
+        import io
+        from xhtml2pdf import pisa
+        pdf_buffer = io.BytesIO()
+        pisa_status = pisa.CreatePDF(final_html, dest=pdf_buffer,
+                                     link_callback=lambda uri, rel: uri,  # Adjust if needed for local assets
+                                     default_css="""
+            @page {
+              size: A4;
+              margin: 0.5in;
+            }
+            body {
+              font-family: Helvetica, sans-serif;
+              background: white;
+              margin: 0;
+              padding: 0;
+            }
+            """)
+        if pisa_status.err:
+            # If there is an error, you can either log it or fallback to another method
+            logging.error("Error generating PDF with xhtml2pdf.")
+            return None  # or your fallback_pdf_generation(html_report)
+        return pdf_buffer.getvalue()
 def handle_generate_report(query_name: str, user_name: str, final_report: str):
     try: