Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -2918,7 +2918,11 @@ class ReportGenerator:
|
|
| 2918 |
date_str = datetime.now().strftime("%Y-%m-%d")
|
| 2919 |
header = ""
|
| 2920 |
if metadata:
|
| 2921 |
-
header = f"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2922 |
soup = BeautifulSoup(html_report, "html.parser")
|
| 2923 |
body_tag = soup.body
|
| 2924 |
if body_tag:
|
|
@@ -2928,79 +2932,46 @@ class ReportGenerator:
|
|
| 2928 |
# Parse the HTML
|
| 2929 |
logging.info(f"ReportGenerator: soup report generated:\n{soup}")
|
| 2930 |
|
| 2931 |
-
# Find all mermaid visual iframes (assumed to have class "
|
| 2932 |
-
mermaid_iframes = soup.find_all("iframe", class_="
|
| 2933 |
|
| 2934 |
-
if
|
|
|
|
| 2935 |
import base64, tempfile, time
|
| 2936 |
import chromedriver_autoinstaller
|
| 2937 |
chromedriver_autoinstaller.install()
|
|
|
|
| 2938 |
from selenium import webdriver
|
| 2939 |
from selenium.webdriver.chrome.options import Options
|
| 2940 |
from selenium.webdriver.chrome.service import Service
|
| 2941 |
-
|
| 2942 |
options = Options()
|
| 2943 |
-
# For debugging, you may temporarily disable headless mode by commenting the next line.
|
| 2944 |
options.add_argument("--headless")
|
| 2945 |
options.add_argument("--no-sandbox")
|
| 2946 |
options.add_argument("--disable-dev-shm-usage")
|
| 2947 |
-
|
| 2948 |
-
options.add_argument("--window-size=1600,1200")
|
| 2949 |
options.add_argument("--force-device-scale-factor=2")
|
| 2950 |
service = Service(log_path=os.devnull)
|
| 2951 |
driver = webdriver.Chrome(service=service, options=options)
|
| 2952 |
|
| 2953 |
-
logging.info(f"Found {len(mermaid_iframes)} visual iframes to process.")
|
| 2954 |
-
|
| 2955 |
for iframe in mermaid_iframes:
|
|
|
|
| 2956 |
srcdoc = iframe.get("srcdoc")
|
| 2957 |
if srcdoc:
|
| 2958 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as tmp_file:
|
| 2959 |
tmp_file.write(srcdoc.encode("utf-8"))
|
| 2960 |
tmp_file.flush()
|
| 2961 |
file_url = "file://" + tmp_file.name
|
| 2962 |
-
|
| 2963 |
-
driver.get(file_url)
|
| 2964 |
-
try:
|
| 2965 |
-
# Wait up to 20 seconds until either a .mermaid element or an <svg> element is present.
|
| 2966 |
-
wait = WebDriverWait(driver, 20)
|
| 2967 |
-
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".mermaid, svg")))
|
| 2968 |
-
except Exception as e:
|
| 2969 |
-
logging.error("No rendered element found in iframe: " + str(e))
|
| 2970 |
-
|
| 2971 |
-
# Scroll down to ensure all content is in view.
|
| 2972 |
-
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
| 2973 |
-
# Wait an extra 3 seconds to allow heavy visuals to finish rendering.
|
| 2974 |
-
time.sleep(3)
|
| 2975 |
|
|
|
|
|
|
|
| 2976 |
screenshot_png = driver.get_screenshot_as_png()
|
| 2977 |
-
|
| 2978 |
-
from PIL import Image
|
| 2979 |
-
from io import BytesIO
|
| 2980 |
-
img = Image.open(BytesIO(screenshot_png))
|
| 2981 |
-
# Log the image size for debugging (width, height)
|
| 2982 |
-
logging.info("Captured screenshot dimensions: " + str(img.size))
|
| 2983 |
-
# Crop the screenshot to remove extra spaces if possible.
|
| 2984 |
-
bbox = img.getbbox()
|
| 2985 |
-
if bbox:
|
| 2986 |
-
cropped_img = img.crop(bbox)
|
| 2987 |
-
else:
|
| 2988 |
-
cropped_img = img
|
| 2989 |
-
buffer = BytesIO()
|
| 2990 |
-
cropped_img.save(buffer, format='PNG')
|
| 2991 |
-
cropped_png = buffer.getvalue()
|
| 2992 |
-
|
| 2993 |
-
b64_img = base64.b64encode(cropped_png).decode("utf-8")
|
| 2994 |
new_tag = soup.new_tag("img")
|
| 2995 |
-
new_tag["style"] = "max-width: 100%; display: block; margin: auto; page-break-after: avoid;"
|
| 2996 |
new_tag["src"] = "data:image/png;base64," + b64_img
|
|
|
|
| 2997 |
iframe.replace_with(new_tag)
|
| 2998 |
-
|
| 2999 |
driver.quit()
|
| 3000 |
-
|
| 3001 |
-
logging.info("Skipping Selenium-based visual conversion since render_with_selenium is disabled or no iframes found.")
|
| 3002 |
-
|
| 3003 |
-
|
| 3004 |
# Instead of converting the entire soup (which may include nested <html> tags), extract only the content within <body>
|
| 3005 |
body_tag = soup.find("body")
|
| 3006 |
body_content = body_tag.decode_contents() if body_tag else ""
|
|
@@ -3013,12 +2984,10 @@ class ReportGenerator:
|
|
| 3013 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 3014 |
<style>
|
| 3015 |
body {{ font-family: Helvetica, sans-serif; margin: 40px; background: white; }}
|
| 3016 |
-
h1 {{ font-size:
|
| 3017 |
-
h2 {{ font-size:
|
| 3018 |
-
h3 {{ font-size:
|
| 3019 |
-
h4 {{ font-size: 12pt; text-align: left; font-weight: bold;}}
|
| 3020 |
p {{ font-size: 11pt; line-height: 1.5; margin-bottom: 10px; white-space: pre-wrap; }}
|
| 3021 |
-
table {{ border: 1px solid black; }}
|
| 3022 |
pre, div {{ white-space: pre-wrap; }}
|
| 3023 |
ol, ul {{ font-size: 11pt; margin-left: 20px; line-height: 1.5; }}
|
| 3024 |
hr {{ border: 1px solid #ccc; margin: 20px 0; }}
|
|
@@ -3044,11 +3013,12 @@ class ReportGenerator:
|
|
| 3044 |
logging.info(f"ReportGenerator: Final HTML for PDF conversion:\n{cleaned_string}")
|
| 3045 |
|
| 3046 |
# Crafting compliance
|
| 3047 |
-
final_html = final_html.replace("<h1","<br><br><br><h1").replace("</h1>","</h1><br>")
|
| 3048 |
-
final_html = final_html.replace("<h2","<br><br><b><h2").replace("</h2>","</b></h2><br>")
|
| 3049 |
-
final_html = final_html.replace("<h3","<br><br><h3").replace("</h3>","</b></h3><br>")
|
| 3050 |
-
final_html = final_html.replace("<h4","<br><h4")
|
| 3051 |
-
final_html = final_html.replace("<
|
|
|
|
| 3052 |
final_html = final_html.replace("<table>","<br><table>")
|
| 3053 |
|
| 3054 |
# Generate the final PDF from final_html using xhtml2pdf (A4 layout)
|
|
@@ -3067,30 +3037,19 @@ body {
|
|
| 3067 |
padding: 0;
|
| 3068 |
}
|
| 3069 |
h1 {
|
| 3070 |
-
font-size:
|
| 3071 |
margin-bottom: 12px;
|
| 3072 |
-
text-align: left;
|
| 3073 |
-
font-weight: bold;
|
| 3074 |
}
|
| 3075 |
h2 {
|
| 3076 |
-
font-size:
|
| 3077 |
margin-bottom: 10px;
|
| 3078 |
-
text-align: left;
|
| 3079 |
-
font-weight: bold;
|
| 3080 |
}
|
| 3081 |
h3 {
|
| 3082 |
-
font-size:
|
| 3083 |
margin-bottom: 8px;
|
| 3084 |
text-align: left;
|
| 3085 |
-
font-weight: bold;
|
| 3086 |
-
}
|
| 3087 |
-
h4 {
|
| 3088 |
-
font-size: 12pt;
|
| 3089 |
-
text-align: left;
|
| 3090 |
-
font-weight: bold;
|
| 3091 |
-
}
|
| 3092 |
-
table {
|
| 3093 |
-
border: 1px solid black;
|
| 3094 |
}
|
| 3095 |
p {
|
| 3096 |
font-size: 11pt;
|
|
@@ -3132,7 +3091,7 @@ th {
|
|
| 3132 |
|
| 3133 |
def handle_generate_report(query_name: str, user_name: str, final_report: str):
|
| 3134 |
try:
|
| 3135 |
-
report_generator = ReportGenerator(render_with_selenium=False)
|
| 3136 |
metadata = {
|
| 3137 |
"Query name": query_name,
|
| 3138 |
"User name": user_name,
|
|
|
|
| 2918 |
date_str = datetime.now().strftime("%Y-%m-%d")
|
| 2919 |
header = ""
|
| 2920 |
if metadata:
|
| 2921 |
+
header = f"""
|
| 2922 |
+
<h1>Search Query: {metadata.get('Query name', 'N/A')}</h1>
|
| 2923 |
+
<p>Author: {metadata.get('User name', 'N/A')}</p>
|
| 2924 |
+
<p>Date: {metadata.get('Date', date_str)}</p>
|
| 2925 |
+
<hr/>"""
|
| 2926 |
soup = BeautifulSoup(html_report, "html.parser")
|
| 2927 |
body_tag = soup.body
|
| 2928 |
if body_tag:
|
|
|
|
| 2932 |
# Parse the HTML
|
| 2933 |
logging.info(f"ReportGenerator: soup report generated:\n{soup}")
|
| 2934 |
|
| 2935 |
+
# Find all mermaid visual iframes (assumed to have class "mermaid-frame")
|
| 2936 |
+
mermaid_iframes = soup.find_all("iframe", class_="mermaid-frame")
|
| 2937 |
|
| 2938 |
+
if mermaid_iframes:
|
| 2939 |
+
# Set up Selenium with a window size and high DPI for better image resolution
|
| 2940 |
import base64, tempfile, time
|
| 2941 |
import chromedriver_autoinstaller
|
| 2942 |
chromedriver_autoinstaller.install()
|
| 2943 |
+
# (Removed the explicit print statement to keep logs clean)
|
| 2944 |
from selenium import webdriver
|
| 2945 |
from selenium.webdriver.chrome.options import Options
|
| 2946 |
from selenium.webdriver.chrome.service import Service
|
|
|
|
| 2947 |
options = Options()
|
|
|
|
| 2948 |
options.add_argument("--headless")
|
| 2949 |
options.add_argument("--no-sandbox")
|
| 2950 |
options.add_argument("--disable-dev-shm-usage")
|
| 2951 |
+
options.add_argument("--window-size=1200,1200")
|
|
|
|
| 2952 |
options.add_argument("--force-device-scale-factor=2")
|
| 2953 |
service = Service(log_path=os.devnull)
|
| 2954 |
driver = webdriver.Chrome(service=service, options=options)
|
| 2955 |
|
|
|
|
|
|
|
| 2956 |
for iframe in mermaid_iframes:
|
| 2957 |
+
# Assume the iframe has its content in srcdoc (as generated in generate_visual_snippet)
|
| 2958 |
srcdoc = iframe.get("srcdoc")
|
| 2959 |
if srcdoc:
|
| 2960 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as tmp_file:
|
| 2961 |
tmp_file.write(srcdoc.encode("utf-8"))
|
| 2962 |
tmp_file.flush()
|
| 2963 |
file_url = "file://" + tmp_file.name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2964 |
|
| 2965 |
+
driver.get(file_url)
|
| 2966 |
+
time.sleep(3) # Allow time for JavaScript (e.g., mermaid) to render
|
| 2967 |
screenshot_png = driver.get_screenshot_as_png()
|
| 2968 |
+
b64_img = base64.b64encode(screenshot_png).decode("utf-8")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2969 |
new_tag = soup.new_tag("img")
|
|
|
|
| 2970 |
new_tag["src"] = "data:image/png;base64," + b64_img
|
| 2971 |
+
new_tag["style"] = "max-width: 500px; display: block; margin: auto;"
|
| 2972 |
iframe.replace_with(new_tag)
|
|
|
|
| 2973 |
driver.quit()
|
| 2974 |
+
|
|
|
|
|
|
|
|
|
|
| 2975 |
# Instead of converting the entire soup (which may include nested <html> tags), extract only the content within <body>
|
| 2976 |
body_tag = soup.find("body")
|
| 2977 |
body_content = body_tag.decode_contents() if body_tag else ""
|
|
|
|
| 2984 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 2985 |
<style>
|
| 2986 |
body {{ font-family: Helvetica, sans-serif; margin: 40px; background: white; }}
|
| 2987 |
+
h1 {{ font-size: 24pt; margin-bottom: 12px; text-align: left; }}
|
| 2988 |
+
h2 {{ font-size: 20pt; margin-bottom: 10px; text-align: left; }}
|
| 2989 |
+
h3 {{ font-size: 18pt; margin-bottom: 8px; text-align: left; }}
|
|
|
|
| 2990 |
p {{ font-size: 11pt; line-height: 1.5; margin-bottom: 10px; white-space: pre-wrap; }}
|
|
|
|
| 2991 |
pre, div {{ white-space: pre-wrap; }}
|
| 2992 |
ol, ul {{ font-size: 11pt; margin-left: 20px; line-height: 1.5; }}
|
| 2993 |
hr {{ border: 1px solid #ccc; margin: 20px 0; }}
|
|
|
|
| 3013 |
logging.info(f"ReportGenerator: Final HTML for PDF conversion:\n{cleaned_string}")
|
| 3014 |
|
| 3015 |
# Crafting compliance
|
| 3016 |
+
final_html = final_html.replace("<h1>","<br><br><br><h1>").replace("</h1>","</h1><br>")
|
| 3017 |
+
final_html = final_html.replace("<h2>","<br><br><b><h2>").replace("</h2>","</b></h2><br>")
|
| 3018 |
+
final_html = final_html.replace("<h3>","<br><br><h3>").replace("</h3>","</b></h3><br>")
|
| 3019 |
+
final_html = final_html.replace("<h4>","<br><h4>")
|
| 3020 |
+
#final_html = final_html.replace("<p>","<br><p>")
|
| 3021 |
+
final_html = final_html.replace("<div>","<br><div>")
|
| 3022 |
final_html = final_html.replace("<table>","<br><table>")
|
| 3023 |
|
| 3024 |
# Generate the final PDF from final_html using xhtml2pdf (A4 layout)
|
|
|
|
| 3037 |
padding: 0;
|
| 3038 |
}
|
| 3039 |
h1 {
|
| 3040 |
+
font-size: 24pt;
|
| 3041 |
margin-bottom: 12px;
|
| 3042 |
+
text-align: left;
|
|
|
|
| 3043 |
}
|
| 3044 |
h2 {
|
| 3045 |
+
font-size: 18pt;
|
| 3046 |
margin-bottom: 10px;
|
| 3047 |
+
text-align: left;
|
|
|
|
| 3048 |
}
|
| 3049 |
h3 {
|
| 3050 |
+
font-size: 16pt;
|
| 3051 |
margin-bottom: 8px;
|
| 3052 |
text-align: left;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3053 |
}
|
| 3054 |
p {
|
| 3055 |
font-size: 11pt;
|
|
|
|
| 3091 |
|
| 3092 |
def handle_generate_report(query_name: str, user_name: str, final_report: str):
|
| 3093 |
try:
|
| 3094 |
+
report_generator = ReportGenerator(render_with_selenium=False)
|
| 3095 |
metadata = {
|
| 3096 |
"Query name": query_name,
|
| 3097 |
"User name": user_name,
|