Guiyom commited on
Commit
fb148fd
·
verified ·
1 Parent(s): 47e5625

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -73
app.py CHANGED
@@ -2918,7 +2918,11 @@ class ReportGenerator:
2918
  date_str = datetime.now().strftime("%Y-%m-%d")
2919
  header = ""
2920
  if metadata:
2921
- header = f"<p>Search Query: {metadata.get('Query name', 'N/A')}<br>Author: {metadata.get('User name', 'N/A')} | Date: {metadata.get('Date', date_str)}</p>"
 
 
 
 
2922
  soup = BeautifulSoup(html_report, "html.parser")
2923
  body_tag = soup.body
2924
  if body_tag:
@@ -2928,79 +2932,46 @@ class ReportGenerator:
2928
  # Parse the HTML
2929
  logging.info(f"ReportGenerator: soup report generated:\n{soup}")
2930
 
2931
- # Find all mermaid visual iframes (assumed to have class "visual-frame")
2932
- mermaid_iframes = soup.find_all("iframe", class_="visual-frame")
2933
 
2934
- if self.render_with_selenium and mermaid_iframes:
 
2935
  import base64, tempfile, time
2936
  import chromedriver_autoinstaller
2937
  chromedriver_autoinstaller.install()
 
2938
  from selenium import webdriver
2939
  from selenium.webdriver.chrome.options import Options
2940
  from selenium.webdriver.chrome.service import Service
2941
-
2942
  options = Options()
2943
- # For debugging, you may temporarily disable headless mode by commenting the next line.
2944
  options.add_argument("--headless")
2945
  options.add_argument("--no-sandbox")
2946
  options.add_argument("--disable-dev-shm-usage")
2947
- # Increase window size to capture more content
2948
- options.add_argument("--window-size=1600,1200")
2949
  options.add_argument("--force-device-scale-factor=2")
2950
  service = Service(log_path=os.devnull)
2951
  driver = webdriver.Chrome(service=service, options=options)
2952
 
2953
- logging.info(f"Found {len(mermaid_iframes)} visual iframes to process.")
2954
-
2955
  for iframe in mermaid_iframes:
 
2956
  srcdoc = iframe.get("srcdoc")
2957
  if srcdoc:
2958
  with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as tmp_file:
2959
  tmp_file.write(srcdoc.encode("utf-8"))
2960
  tmp_file.flush()
2961
  file_url = "file://" + tmp_file.name
2962
-
2963
- driver.get(file_url)
2964
- try:
2965
- # Wait up to 20 seconds until either a .mermaid element or an <svg> element is present.
2966
- wait = WebDriverWait(driver, 20)
2967
- wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".mermaid, svg")))
2968
- except Exception as e:
2969
- logging.error("No rendered element found in iframe: " + str(e))
2970
-
2971
- # Scroll down to ensure all content is in view.
2972
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
2973
- # Wait an extra 3 seconds to allow heavy visuals to finish rendering.
2974
- time.sleep(3)
2975
 
 
 
2976
  screenshot_png = driver.get_screenshot_as_png()
2977
-
2978
- from PIL import Image
2979
- from io import BytesIO
2980
- img = Image.open(BytesIO(screenshot_png))
2981
- # Log the image size for debugging (width, height)
2982
- logging.info("Captured screenshot dimensions: " + str(img.size))
2983
- # Crop the screenshot to remove extra spaces if possible.
2984
- bbox = img.getbbox()
2985
- if bbox:
2986
- cropped_img = img.crop(bbox)
2987
- else:
2988
- cropped_img = img
2989
- buffer = BytesIO()
2990
- cropped_img.save(buffer, format='PNG')
2991
- cropped_png = buffer.getvalue()
2992
-
2993
- b64_img = base64.b64encode(cropped_png).decode("utf-8")
2994
  new_tag = soup.new_tag("img")
2995
- new_tag["style"] = "max-width: 100%; display: block; margin: auto; page-break-after: avoid;"
2996
  new_tag["src"] = "data:image/png;base64," + b64_img
 
2997
  iframe.replace_with(new_tag)
2998
-
2999
  driver.quit()
3000
- else:
3001
- logging.info("Skipping Selenium-based visual conversion since render_with_selenium is disabled or no iframes found.")
3002
-
3003
-
3004
  # Instead of converting the entire soup (which may include nested <html> tags), extract only the content within <body>
3005
  body_tag = soup.find("body")
3006
  body_content = body_tag.decode_contents() if body_tag else ""
@@ -3013,12 +2984,10 @@ class ReportGenerator:
3013
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
3014
  <style>
3015
  body {{ font-family: Helvetica, sans-serif; margin: 40px; background: white; }}
3016
- h1 {{ font-size: 20pt; margin-bottom: 12px; text-align: left; font-weight: bold;}}
3017
- h2 {{ font-size: 16pt; margin-bottom: 10px; text-align: left; font-weight: bold;}}
3018
- h3 {{ font-size: 14pt; margin-bottom: 8px; text-align: left; font-weight: bold;}}
3019
- h4 {{ font-size: 12pt; text-align: left; font-weight: bold;}}
3020
  p {{ font-size: 11pt; line-height: 1.5; margin-bottom: 10px; white-space: pre-wrap; }}
3021
- table {{ border: 1px solid black; }}
3022
  pre, div {{ white-space: pre-wrap; }}
3023
  ol, ul {{ font-size: 11pt; margin-left: 20px; line-height: 1.5; }}
3024
  hr {{ border: 1px solid #ccc; margin: 20px 0; }}
@@ -3044,11 +3013,12 @@ class ReportGenerator:
3044
  logging.info(f"ReportGenerator: Final HTML for PDF conversion:\n{cleaned_string}")
3045
 
3046
  # Crafting compliance
3047
- final_html = final_html.replace("<h1","<br><br><br><h1").replace("</h1>","</h1><br>")
3048
- final_html = final_html.replace("<h2","<br><br><b><h2").replace("</h2>","</b></h2><br>")
3049
- final_html = final_html.replace("<h3","<br><br><h3").replace("</h3>","</b></h3><br>")
3050
- final_html = final_html.replace("<h4","<br><h4")
3051
- final_html = final_html.replace("<div","<br><div")
 
3052
  final_html = final_html.replace("<table>","<br><table>")
3053
 
3054
  # Generate the final PDF from final_html using xhtml2pdf (A4 layout)
@@ -3067,30 +3037,19 @@ body {
3067
  padding: 0;
3068
  }
3069
  h1 {
3070
- font-size: 20pt;
3071
  margin-bottom: 12px;
3072
- text-align: left;
3073
- font-weight: bold;
3074
  }
3075
  h2 {
3076
- font-size: 16pt;
3077
  margin-bottom: 10px;
3078
- text-align: left;
3079
- font-weight: bold;
3080
  }
3081
  h3 {
3082
- font-size: 14pt;
3083
  margin-bottom: 8px;
3084
  text-align: left;
3085
- font-weight: bold;
3086
- }
3087
- h4 {
3088
- font-size: 12pt;
3089
- text-align: left;
3090
- font-weight: bold;
3091
- }
3092
- table {
3093
- border: 1px solid black;
3094
  }
3095
  p {
3096
  font-size: 11pt;
@@ -3132,7 +3091,7 @@ th {
3132
 
3133
  def handle_generate_report(query_name: str, user_name: str, final_report: str):
3134
  try:
3135
- report_generator = ReportGenerator(render_with_selenium=False) # Enable Selenium
3136
  metadata = {
3137
  "Query name": query_name,
3138
  "User name": user_name,
 
2918
  date_str = datetime.now().strftime("%Y-%m-%d")
2919
  header = ""
2920
  if metadata:
2921
+ header = f"""
2922
+ <h1>Search Query: {metadata.get('Query name', 'N/A')}</h1>
2923
+ <p>Author: {metadata.get('User name', 'N/A')}</p>
2924
+ <p>Date: {metadata.get('Date', date_str)}</p>
2925
+ <hr/>"""
2926
  soup = BeautifulSoup(html_report, "html.parser")
2927
  body_tag = soup.body
2928
  if body_tag:
 
2932
  # Parse the HTML
2933
  logging.info(f"ReportGenerator: soup report generated:\n{soup}")
2934
 
2935
+ # Find all mermaid visual iframes (assumed to have class "mermaid-frame")
2936
+ mermaid_iframes = soup.find_all("iframe", class_="mermaid-frame")
2937
 
2938
+ if mermaid_iframes:
2939
+ # Set up Selenium with a window size and high DPI for better image resolution
2940
  import base64, tempfile, time
2941
  import chromedriver_autoinstaller
2942
  chromedriver_autoinstaller.install()
2943
+ # (Removed the explicit print statement to keep logs clean)
2944
  from selenium import webdriver
2945
  from selenium.webdriver.chrome.options import Options
2946
  from selenium.webdriver.chrome.service import Service
 
2947
  options = Options()
 
2948
  options.add_argument("--headless")
2949
  options.add_argument("--no-sandbox")
2950
  options.add_argument("--disable-dev-shm-usage")
2951
+ options.add_argument("--window-size=1200,1200")
 
2952
  options.add_argument("--force-device-scale-factor=2")
2953
  service = Service(log_path=os.devnull)
2954
  driver = webdriver.Chrome(service=service, options=options)
2955
 
 
 
2956
  for iframe in mermaid_iframes:
2957
+ # Assume the iframe has its content in srcdoc (as generated in generate_visual_snippet)
2958
  srcdoc = iframe.get("srcdoc")
2959
  if srcdoc:
2960
  with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as tmp_file:
2961
  tmp_file.write(srcdoc.encode("utf-8"))
2962
  tmp_file.flush()
2963
  file_url = "file://" + tmp_file.name
 
 
 
 
 
 
 
 
 
 
 
 
 
2964
 
2965
+ driver.get(file_url)
2966
+ time.sleep(3) # Allow time for JavaScript (e.g., mermaid) to render
2967
  screenshot_png = driver.get_screenshot_as_png()
2968
+ b64_img = base64.b64encode(screenshot_png).decode("utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2969
  new_tag = soup.new_tag("img")
 
2970
  new_tag["src"] = "data:image/png;base64," + b64_img
2971
+ new_tag["style"] = "max-width: 500px; display: block; margin: auto;"
2972
  iframe.replace_with(new_tag)
 
2973
  driver.quit()
2974
+
 
 
 
2975
  # Instead of converting the entire soup (which may include nested <html> tags), extract only the content within <body>
2976
  body_tag = soup.find("body")
2977
  body_content = body_tag.decode_contents() if body_tag else ""
 
2984
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
2985
  <style>
2986
  body {{ font-family: Helvetica, sans-serif; margin: 40px; background: white; }}
2987
+ h1 {{ font-size: 24pt; margin-bottom: 12px; text-align: left; }}
2988
+ h2 {{ font-size: 20pt; margin-bottom: 10px; text-align: left; }}
2989
+ h3 {{ font-size: 18pt; margin-bottom: 8px; text-align: left; }}
 
2990
  p {{ font-size: 11pt; line-height: 1.5; margin-bottom: 10px; white-space: pre-wrap; }}
 
2991
  pre, div {{ white-space: pre-wrap; }}
2992
  ol, ul {{ font-size: 11pt; margin-left: 20px; line-height: 1.5; }}
2993
  hr {{ border: 1px solid #ccc; margin: 20px 0; }}
 
3013
  logging.info(f"ReportGenerator: Final HTML for PDF conversion:\n{cleaned_string}")
3014
 
3015
  # Crafting compliance
3016
+ final_html = final_html.replace("<h1>","<br><br><br><h1>").replace("</h1>","</h1><br>")
3017
+ final_html = final_html.replace("<h2>","<br><br><b><h2>").replace("</h2>","</b></h2><br>")
3018
+ final_html = final_html.replace("<h3>","<br><br><h3>").replace("</h3>","</b></h3><br>")
3019
+ final_html = final_html.replace("<h4>","<br><h4>")
3020
+ #final_html = final_html.replace("<p>","<br><p>")
3021
+ final_html = final_html.replace("<div>","<br><div>")
3022
  final_html = final_html.replace("<table>","<br><table>")
3023
 
3024
  # Generate the final PDF from final_html using xhtml2pdf (A4 layout)
 
3037
  padding: 0;
3038
  }
3039
  h1 {
3040
+ font-size: 24pt;
3041
  margin-bottom: 12px;
3042
+ text-align: left;
 
3043
  }
3044
  h2 {
3045
+ font-size: 18pt;
3046
  margin-bottom: 10px;
3047
+ text-align: left;
 
3048
  }
3049
  h3 {
3050
+ font-size: 16pt;
3051
  margin-bottom: 8px;
3052
  text-align: left;
 
 
 
 
 
 
 
 
 
3053
  }
3054
  p {
3055
  font-size: 11pt;
 
3091
 
3092
  def handle_generate_report(query_name: str, user_name: str, final_report: str):
3093
  try:
3094
+ report_generator = ReportGenerator(render_with_selenium=False)
3095
  metadata = {
3096
  "Query name": query_name,
3097
  "User name": user_name,