Spaces:

10gen
/

deepsearchitv2

Runtime error

App Files Files Community

Guiyom commited on Feb 15, 2025

Commit

074c65c

verified ·

1 Parent(s): 734557d

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -46

app.py CHANGED Viewed

@@ -33,59 +33,66 @@ def call_visual_llm(prompt: str) -> str:
     # Replace 'openai_call' with your actual API call function.
     response = openai_call(prompt, model="o3-mini", max_tokens_param=10000)
     # Remove code fences if the model returns them.
-    response = response.strip().strip("```").strip("html").strip()
     return response
 def generate_visual_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
     prompt = (f"""
-Generate a complete, self-contained HTML code snippet that includes inline CSS and JavaScript (only to call relevant libraries).
 The code should display a simple but effective and elegant visualization based on the following requirements:
 {placeholder_text}
-// Reference
-The visual is expected to be integrated within a report generated for the user, it should make use of any relevant information from:
-- the initial user query:
-{initial_query}
-- the overall context
-{context}
-- some knowledge material gathered from search engines
-{crumbs}
-// Requirements
-- the dimensions should be less than 500px height and 500px width (it should be printable once the report is converted to pdf)
-- use a font no larger than 10, with bold and italic if needed
-- if for a specific shape the background is dark, the text should be white (and vice versa if the background is clear)
-- Use HTML5 elements if necessary
-- Display either:
-o chart (histogram, curve) with the proper call to a js library (ex: d3.js or plotly)
-o a diagram (in the style of a mindmap, or five forces, or a flow chart)
-- keep it simple but effective to convey the message
-// IMPORTTANT
-- output only the code
-- no extra explanation
-- no code fences
-- do not add <html> </html> or  <!DOCTYPE html>, the snippet will be integrated in a html code body part at a pre-defined location
-"""
     )
     result = call_visual_llm(prompt)
-    logging.info(f"The code produced for this visual placeholder:\n{placeholder_text}\n\n\n {result}\n\n")
     return result
 def replace_visual_placeholders(report_html: str, context: str, initial_query: str, crumbs: str) -> str:
-    pattern = r"\[\[Visual Placeholder \d+:(.*?)\]\]"  # Regex to match placeholders
     def placeholder_replacer(match):
-        placeholder_instructions = match.group(1).strip()  # Extract and strip placeholder instructions
-        logging.info(f"Generating visual for placeholder: {placeholder_instructions}")
-        # Call the visual generation function:
-        visual_html = generate_visual_snippet(placeholder_instructions, context, initial_query, crumbs)
-        return visual_html
-    # Replace all matches in the HTML
-    new_report_html = re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
-    return new_report_html
 def get_random_header():
     headers = [
@@ -617,6 +624,15 @@ def refine_query(query: str, openai_api_key: str) -> str:
     logging.info(f"refine_query: Refined query: {refined}")
     return refined
 class ReportGenerator:
     def __init__(self):
         pass
@@ -676,18 +692,46 @@ class ReportGenerator:
         logging.info("ReportGenerator: HTML report generated successfully.")
         return full_html
     def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
-        """
-        Generate a PDF report by first creating the HTML report and converting it with xhtml2pdf.
-        """
-        # Create the HTML content
         html_report = self.generate_report_html(solution_content, metadata)
-        # Convert the HTML to PDF using xhtml2pdf (pisa)
         pdf_buffer = io.BytesIO()
         pisa_status = pisa.CreatePDF(html_report, dest=pdf_buffer)
         if pisa_status.err:
-            raise Exception("Error converting HTML to PDF")
-        logging.info("ReportGenerator: PDF report generated successfully from HTML.")
         return pdf_buffer.getvalue()
 def handle_generate_report(query_name: str, user_name: str, final_report: str):
@@ -882,8 +926,6 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
     alignment_assessment = assess_report_alignment(final_report, initial_query, followup_clarifications)
     final_report += f"""
     <p><b>Report alignment assessment:</b>
     {alignment_assessment}</p>"""

     # Replace 'openai_call' with your actual API call function.
     response = openai_call(prompt, model="o3-mini", max_tokens_param=10000)
     # Remove code fences if the model returns them.
+    response = response.strip().strip("```").strip()
     return response
 def generate_visual_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
     prompt = (f"""
+Generate a complete, self-contained HTML code snippet that includes inline CSS and JavaScript.
 The code should display a simple but effective and elegant visualization based on the following requirements:
 {placeholder_text}
+// Critical Requirements
+- Use ONLY SVG elements or Plotly.js for compatibility with PDF rendering
+- White background for all elements (#ffffff)
+- No external dependencies except CDN-hosted Plotly/D3
+- Include explicit width/height attributes in SVG tags
+- Font size minimum 12px for readability in PDF
+- Include all required <script> tags for libraries
+- Add 'xmlns="http://www.w3.org/2000/svg"' attribute to SVG tags
+- Use high-contrast colors (no dark backgrounds)
+- Include descriptive <title> elements for accessibility
+// Important
+- Make the visuals content rich, there's no point having a visual if its content is pointless.
+- It has to convey some relevant insights.
+- Take a deep breath, think step by step and think it well.
+// Preferred Patterns
+<svg width="500" height="500" xmlns="http://www.w3.org/2000/svg">
+  <!-- SVG elements here -->
+</svg>
+OR
+<div id="chart">
+  <!-- Plotly chart -->
+</div>
+<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
+""")
     )
     result = call_visual_llm(prompt)
+    logging.info(f"The code produced for this visual placeholder:\n{placeholder_text}\n\n {result}\n\n")
     return result
 def replace_visual_placeholders(report_html: str, context: str, initial_query: str, crumbs: str) -> str:
+    pattern = r"\[\[Visual Placeholder (\d+):(.*?)\]\]"  # Capture placeholder number
+    replacements = []
     def placeholder_replacer(match):
+        placeholder_num = match.group(1)
+        instructions = match.group(2).strip()
+        logging.info(f"Generating visual {placeholder_num}")
+        try:
+            visual_html = generate_visual_snippet(instructions, context, initial_query, crumbs)
+            # Add error boundary and logging
+            return f'<!-- Visual {placeholder_num} Start -->\n{visual_html}\n<!-- Visual {placeholder_num} End -->'
+        except Exception as e:
+            logging.error(f"Visual {placeholder_num} failed: {str(e)}")
+            return f'<!-- ERROR GENERATING VISUAL {placeholder_num} -->'
+    return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
 def get_random_header():
     headers = [
     logging.info(f"refine_query: Refined query: {refined}")
     return refined
+def validate_visual_html(html: str) -> bool:
+    """Basic sanity check for generated visuals"""
+    checks = [
+        ("<svg" in html) or ("plotly" in html.lower()),
+        "background" not in html.lower() or "#fff" in html.lower(),
+        not re.search(r"color\s*:\s*#000000", html, re.I)
+    ]
+    return all(checks)
 class ReportGenerator:
     def __init__(self):
         pass
         logging.info("ReportGenerator: HTML report generated successfully.")
         return full_html
+    def fallback_pdf_generation(self, html_content: str) -> bytes:
+        """Convert HTML to PDF using screenshot fallback"""
+        from selenium import webdriver
+        from selenium.webdriver.chrome.options import Options
+        options = Options()
+        options.add_argument("--headless")
+        options.add_argument("--disable-gpu")
+        options.add_argument("--no-sandbox")
+        options.add_argument("--window-size=1920,1080")
+        driver = webdriver.Chrome(options=options)
+        try:
+            driver.get(f"data:text/html;charset=utf-8,{html_content}")
+            time.sleep(2)  # Allow charts to render
+            return driver.get_screenshot_as_png()
+        finally:
+            driver.quit()
     def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
+        # Convert dynamic JS charts to static images
         html_report = self.generate_report_html(solution_content, metadata)
+        # Add PDF-specific CSS
+        html_report = html_report.replace("<style>", """<style>
+            @media print {
+                .visual-container { page-break-inside: avoid; }
+                svg { max-width: 100% !important; height: auto !important; }
+            }
+        """)
+        # Convert to PDF
         pdf_buffer = io.BytesIO()
         pisa_status = pisa.CreatePDF(html_report, dest=pdf_buffer)
+        # Fallback for JS charts
         if pisa_status.err:
+            logging.warning("PDF conversion issues detected - attempting image fallback")
+            return self.fallback_pdf_generation(html_report)
         return pdf_buffer.getvalue()
 def handle_generate_report(query_name: str, user_name: str, final_report: str):
     alignment_assessment = assess_report_alignment(final_report, initial_query, followup_clarifications)
     final_report += f"""
     <p><b>Report alignment assessment:</b>
     {alignment_assessment}</p>"""