Guiyom commited on
Commit
aaf17eb
·
verified ·
1 Parent(s): f987582

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -45
app.py CHANGED
@@ -936,58 +936,89 @@ class ReportGenerator:
936
  return full_html
937
 
938
  def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
939
- # Generate the full HTML report (including text, focus placeholders, and mermaid visuals)
940
  html_report = self.generate_report_html(solution_content, metadata)
941
 
942
- # Ensure chromedriver is installed
943
- import chromedriver_autoinstaller
944
- chromedriver_autoinstaller.install()
945
-
946
- from selenium import webdriver
947
- from selenium.webdriver.chrome.options import Options
948
- from selenium.common.exceptions import WebDriverException
949
  import tempfile
950
  import time
951
- import io
952
- from PIL import Image
953
-
954
- # Set up Selenium Chrome options (similar to your working screenshot code)
955
- options = Options()
956
- options.add_argument('--headless')
957
- options.add_argument('--no-sandbox')
958
- options.add_argument('--disable-dev-shm-usage')
959
- options.add_argument("--window-size=1920,1080")
960
- # Do not force the binary location here; let the webdriver find it from PATH or via chromedriver_autoinstaller
961
 
962
- try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
963
  driver = webdriver.Chrome(options=options)
964
- except WebDriverException as e:
965
- logging.error("Unable to obtain driver for chrome")
966
- raise e
967
-
968
- try:
969
- # Write the HTML report to a temporary file
970
- tmp_html = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
971
- tmp_html.write(html_report.encode('utf-8'))
972
- tmp_html.flush()
973
- file_url = "file://" + tmp_html.name
974
-
975
- # Load the HTML file in headless Chrome
976
- driver.get(file_url)
977
- driver.implicitly_wait(10)
978
- time.sleep(5) # Allow extra time for JavaScript (e.g., Mermaid) to fully render
979
-
980
- # Take a full-page screenshot
981
- screenshot_png = driver.get_screenshot_as_png()
982
-
983
- # Convert the PNG screenshot to PDF using PIL
984
- image = Image.open(io.BytesIO(screenshot_png))
985
- pdf_io = io.BytesIO()
986
- image.save(pdf_io, format='PDF')
987
- pdf_bytes = pdf_io.getvalue()
988
- return pdf_bytes
989
- finally:
 
990
  driver.quit()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991
 
992
  def handle_generate_report(query_name: str, user_name: str, final_report: str):
993
  try:
 
936
  return full_html
937
 
938
  def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
939
+ # Generate the full HTML report (includes text, focus placeholders, and mermaid visuals as iframes)
940
  html_report = self.generate_report_html(solution_content, metadata)
941
 
942
+ # --- NEW STEP: Replace mermaid iframes with static images captured via Selenium ---
943
+ from bs4 import BeautifulSoup
 
 
 
 
 
944
  import tempfile
945
  import time
946
+ import base64
 
 
 
 
 
 
 
 
 
947
 
948
+ # Parse the HTML
949
+ soup = BeautifulSoup(html_report, "html.parser")
950
+
951
+ # Find all mermaid visual iframes (assumed to have class "mermaid-frame")
952
+ mermaid_iframes = soup.find_all("iframe", class_="mermaid-frame")
953
+
954
+ if mermaid_iframes:
955
+ # Set up Selenium with a window size for visuals (e.g., 600x600 to capture a 500x500 content area)
956
+ from selenium import webdriver
957
+ from selenium.webdriver.chrome.options import Options
958
+ options = Options()
959
+ options.add_argument("--headless")
960
+ options.add_argument("--no-sandbox")
961
+ options.add_argument("--disable-dev-shm-usage")
962
+ options.add_argument("--window-size=600,600")
963
+ # Do not override binary_location since chromedriver_autoinstaller handles it
964
  driver = webdriver.Chrome(options=options)
965
+
966
+ for iframe in mermaid_iframes:
967
+ # Assume the iframe has its content in srcdoc attribute (as generated in generate_visual_snippet)
968
+ srcdoc = iframe.get("srcdoc")
969
+ if srcdoc:
970
+ # Write the iframe content to a temporary HTML file
971
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as tmp_file:
972
+ tmp_file.write(srcdoc.encode("utf-8"))
973
+ tmp_file.flush()
974
+ file_url = "file://" + tmp_file.name
975
+
976
+ # Load the visual-only HTML in Selenium
977
+ driver.get(file_url)
978
+ # Allow time for JavaScript (mermaid) to render properly
979
+ time.sleep(3)
980
+ # Capture screenshot (the screenshot will capture the entire window)
981
+ screenshot_png = driver.get_screenshot_as_png()
982
+ # Optionally, you can crop/resize the image using PIL if necessary.
983
+ # For simplicity, we assume the window was sized appropriately.
984
+ b64_img = base64.b64encode(screenshot_png).decode("utf-8")
985
+ # Create a new image tag with the captured screenshot
986
+ new_tag = soup.new_tag("img")
987
+ new_tag["src"] = "data:image/png;base64," + b64_img
988
+ # Set style attributes to control the size in the final PDF
989
+ new_tag["style"] = "max-width: 500px; display: block; margin: auto;"
990
+ # Replace the original iframe with the new image tag
991
+ iframe.replace_with(new_tag)
992
  driver.quit()
993
+
994
+ # Convert the (possibly) modified HTML back to a string
995
+ final_html = str(soup)
996
+
997
+ # --- Generate the final PDF from final_html using xhtml2pdf (A4 layout) ---
998
+ import io
999
+ from xhtml2pdf import pisa
1000
+
1001
+ pdf_buffer = io.BytesIO()
1002
+ pisa_status = pisa.CreatePDF(final_html, dest=pdf_buffer,
1003
+ link_callback=lambda uri, rel: uri, # Adjust if needed for local assets
1004
+ default_css="""
1005
+ @page {
1006
+ size: A4;
1007
+ margin: 0.5in;
1008
+ }
1009
+ body {
1010
+ font-family: Helvetica, sans-serif;
1011
+ background: white;
1012
+ margin: 0;
1013
+ padding: 0;
1014
+ }
1015
+ """)
1016
+ if pisa_status.err:
1017
+ # If there is an error, you can either log it or fallback to another method
1018
+ logging.error("Error generating PDF with xhtml2pdf.")
1019
+ return None # or your fallback_pdf_generation(html_report)
1020
+
1021
+ return pdf_buffer.getvalue()
1022
 
1023
  def handle_generate_report(query_name: str, user_name: str, final_report: str):
1024
  try: