Spaces:

10gen
/

deepsearchitv2

Runtime error

App Files Files Community

Guiyom commited on Feb 22, 2025

Commit

36989f3

verified ·

1 Parent(s): 77ddce7

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -23

app.py CHANGED Viewed

@@ -33,29 +33,64 @@ TOTAL_SUMMARIZED_WORDS = 0
 # Helper functions for external APIs and PDF Processing
 # =============================================================================
-def replace_focus_placeholders(report_html: str, context: str, initial_query: str, crumbs: str) -> str:
-    pattern = r"\[\[Focus Placeholder (\d+):(.*?)\]\]"  # Capture placeholder number
     def placeholder_replacer(match):
         placeholder_num = match.group(1)
         instructions = match.group(2).strip()
-        logging.info(f"Generating focus box {placeholder_num}")
         try:
-            focus_html = generate_focus_snippet(instructions, context, initial_query, crumbs)
-            # Remove any outer HTML or body tags from the generated snippet:
-            focus_html = re.sub(r"<\/?(html|head|body)[^>]*>", "", focus_html, flags=re.DOTALL|re.IGNORECASE).strip()
-            # Now wrap in a single container with reduced font size:
-            return (
-                f'<!-- Focus {placeholder_num} Start -->'
-                f'<p style="text-align: center;">-----------------------------------------------------------------------</p>'
-                f'{focus_html}'
-                f'<p style="text-align: center;">-----------------------------------------------------------------------</p>'
-                f'<!-- Focus {placeholder_num} End -->'
-            )
         except Exception as e:
-            logging.error(f"Focus {placeholder_num} failed: {str(e)}")
-            return f'<!-- ERROR GENERATING FOCUS {placeholder_num} -->'
     return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
 def generate_visual_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
     # remove special lines
     def remove_special_lines(input_string):
@@ -215,7 +250,7 @@ mindmap
     #result = remove_special_lines(result)
     #result = process_multiline_string(result)
-    htmloutput = f"""<iframe class="mermaid-frame" srcdoc='
         <!DOCTYPE html>
         <html>
             <head>
@@ -266,6 +301,29 @@ def replace_visual_placeholders(report_html: str, context: str, initial_query: s
     return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
 def generate_focus_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
     prompt = (f"""
 Generate a complete, self-contained inner-HTML code within the core tags - discard the <html><head><body> opening and closing tags
@@ -668,6 +726,28 @@ Important: after [[ put "Visual Placeholder n:" explicitly (with n as the ref nu
 - 2 visual placeholders cannot be in the same or in 2 consecutive sections
 Note: the placeholders will then be processed separtely by a llm to generate the specific code to display each of them so the instruction need to be clear enough.
 // Focus placeholders
 - To drill down on specific topic that would be deserve to be developped extensively separately, create special focus placeholders in [[...]] double backets
 Note: outside of the placeholder, do not make reference in the report to "focus placeholders" just mention the "Focus box n".
@@ -717,7 +797,7 @@ Output the report directly without any introductory meta comments.
 // Report ending
 - End the report with the following sequence:
-    <iframe class="mermaid-frame" srcdoc='
     <!DOCTYPE html>
     <html>
     </head>
@@ -1026,7 +1106,7 @@ class ReportGenerator:
         return html_content
     def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
-        # Generate the full HTML report (including text, focus placeholders, and mermaid visuals as iframes)
         html_report = self.generate_report_html(solution_content)
         # Add header
@@ -1043,10 +1123,10 @@ class ReportGenerator:
         # Parse the HTML
         logging.info(f"ReportGenerator: soup report generated:\n{soup}")
-        # Find all mermaid visual iframes (assumed to have class "mermaid-frame")
-        mermaid_iframes = soup.find_all("iframe", class_="mermaid-frame")
-        if mermaid_iframes:
             # Set up Selenium with a window size and high DPI for better image resolution
             import base64, tempfile, time
             import chromedriver_autoinstaller
@@ -1064,7 +1144,7 @@ class ReportGenerator:
             service = Service(log_path=os.devnull)
             driver = webdriver.Chrome(service=service, options=options)
-            for iframe in mermaid_iframes:
                 # Assume the iframe has its content in srcdoc (as generated in generate_visual_snippet)
                 srcdoc = iframe.get("srcdoc")
                 if srcdoc:
@@ -1475,6 +1555,7 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
     # --- NEW STEP: Post-process final_report to replace visual and focus placeholders ---
     final_report = replace_visual_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
     final_report = replace_focus_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
     alignment_assessment = assess_report_alignment(final_report, initial_query, followup_clarifications)
     final_report = final_report.replace(

 # Helper functions for external APIs and PDF Processing
 # =============================================================================
+def generate_graph_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
+    prompt = f"""
+Generate a full htmal code (including css and javascript) code displaying a simple but effective and elegant graph based on the following requirements:
+{placeholder_text}
+It will be integrated in a broader report (focus on the mermaid formatting though) about:
+{initial_query}
+// Sources:
+Keep in mind the:
+- context:
+{context}
+- the knowledge inputs
+{crumbs}
+// Requirements
+- use elaborate but effective visual through call to the D3.js library.
+- no introduction, conclusions or code fences -> Output the result directly
+// Important
+- Make the visuals content rich, there's no point having a visual if its content has no real value.
+- It has to convey some relevant insights.
+- Take a deep breath, think step by step and think it well.
+- Use your judgement to decide between box plots, bubble charts, calendar view, chord diagrams, histograms, ...
+- Your response should start with <html> and end with </html> - no intro before, no comments after.
+"""
+    result = openai_call(prompt, model="o3-mini", max_tokens_param=10000)
+    result = result.strip().strip("```").strip()
+    htmloutput = f"""<iframe class="visual-frame" srcdoc='
+        <!DOCTYPE html>
+{result}
+</iframe>
+        """
+    logging.info(f"The code produced for this graph placeholder:\n{placeholder_text}\n\n {htmloutput}\n\n")
+    return htmloutput
+def replace_graph_placeholders(report_html: str, context: str, initial_query: str, crumbs: str) -> str:
+    pattern = r"\[\[Graph Placeholder (\d+):(.*?)\]\]"  # Capture placeholder number
+    replacements = []
     def placeholder_replacer(match):
         placeholder_num = match.group(1)
         instructions = match.group(2).strip()
+        logging.info(f"Generating graph {placeholder_num}")
         try:
+            visual_html = generate_visual_snippet(instructions, context, initial_query, crumbs)
+            # Add error boundary and logging
+            return f'<!-- Graph {placeholder_num} Start -->\n{visual_html}\n<!-- Graph {placeholder_num} End -->'
         except Exception as e:
+            logging.error(f"Graph {placeholder_num} failed: {str(e)}")
+            return f'<!-- ERROR GENERATING GRAPH {placeholder_num} -->'
     return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
 def generate_visual_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
     # remove special lines
     def remove_special_lines(input_string):
     #result = remove_special_lines(result)
     #result = process_multiline_string(result)
+    htmloutput = f"""<iframe class="visual-frame" srcdoc='
         <!DOCTYPE html>
         <html>
             <head>
     return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
+def replace_focus_placeholders(report_html: str, context: str, initial_query: str, crumbs: str) -> str:
+    pattern = r"\[\[Focus Placeholder (\d+):(.*?)\]\]"  # Capture placeholder number
+    def placeholder_replacer(match):
+        placeholder_num = match.group(1)
+        instructions = match.group(2).strip()
+        logging.info(f"Generating focus box {placeholder_num}")
+        try:
+            focus_html = generate_focus_snippet(instructions, context, initial_query, crumbs)
+            # Remove any outer HTML or body tags from the generated snippet:
+            focus_html = re.sub(r"<\/?(html|head|body)[^>]*>", "", focus_html, flags=re.DOTALL|re.IGNORECASE).strip()
+            # Now wrap in a single container with reduced font size:
+            return (
+                f'<!-- Focus {placeholder_num} Start -->'
+                f'<p style="text-align: center;">-----------------------------------------------------------------------</p>'
+                f'{focus_html}'
+                f'<p style="text-align: center;">-----------------------------------------------------------------------</p>'
+                f'<!-- Focus {placeholder_num} End -->'
+            )
+        except Exception as e:
+            logging.error(f"Focus {placeholder_num} failed: {str(e)}")
+            return f'<!-- ERROR GENERATING FOCUS {placeholder_num} -->'
+    return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
 def generate_focus_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
     prompt = (f"""
 Generate a complete, self-contained inner-HTML code within the core tags - discard the <html><head><body> opening and closing tags
 - 2 visual placeholders cannot be in the same or in 2 consecutive sections
 Note: the placeholders will then be processed separtely by a llm to generate the specific code to display each of them so the instruction need to be clear enough.
+// Graph placeholders
+- Create special graphe placeholders that will be rendered in d3.js afterwards based on your guidance:
+[[Graph Placeholder n:
+- Purpose of this graph is:...
+- Relevant data to generate it:...
+- Visual guidance:...
+]]
+with n as the reference number
+Important: after [[ put "Graph Placeholder n:" explicitly (with n as the ref number of the focus box created). This will be used in a regex
+- All types of graphs from d3.js library can be generated // Take this into consideration when providing the instructions for the graph data
+- do not make reference in the report to "graph placeholders" just mention graph.
+- in the placeholder, no need to add the references to the source, but make sure ALL of the data points required has a source from the learning and reference material hereafter
+- these placeholders text should contain:
+    o the purpose of the future graph
+    o the relevant data to generate it
+    o the guidance in terms of look&feel (ex: red colors, bar chart style)
+    note: Be specific if you want some particular color used, keep it consistent across the report.
+- there should be at least {round(pages/4,0)} of these graphs placeholders within the report
+- 2 graph placeholders cannot be in the same or in 2 consecutive sections
+Note: the placeholders will then be processed separtely by a llm to generate the specific code to display each of them so the instruction need to be clear enough.
 // Focus placeholders
 - To drill down on specific topic that would be deserve to be developped extensively separately, create special focus placeholders in [[...]] double backets
 Note: outside of the placeholder, do not make reference in the report to "focus placeholders" just mention the "Focus box n".
 // Report ending
 - End the report with the following sequence:
+    <iframe class="visual-frame" srcdoc='
     <!DOCTYPE html>
     <html>
     </head>
         return html_content
     def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
+        # Generate the full HTML report (including text, focus placeholders, and visuals as iframes)
         html_report = self.generate_report_html(solution_content)
         # Add header
         # Parse the HTML
         logging.info(f"ReportGenerator: soup report generated:\n{soup}")
+        # Find all mermaid visual iframes (assumed to have class "visual-frame")
+        visual_iframes = soup.find_all("iframe", class_="visual-frame")
+        if visual_iframes:
             # Set up Selenium with a window size and high DPI for better image resolution
             import base64, tempfile, time
             import chromedriver_autoinstaller
             service = Service(log_path=os.devnull)
             driver = webdriver.Chrome(service=service, options=options)
+            for iframe in visual_iframes:
                 # Assume the iframe has its content in srcdoc (as generated in generate_visual_snippet)
                 srcdoc = iframe.get("srcdoc")
                 if srcdoc:
     # --- NEW STEP: Post-process final_report to replace visual and focus placeholders ---
     final_report = replace_visual_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
     final_report = replace_focus_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
+    final_report = replace_graph_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
     alignment_assessment = assess_report_alignment(final_report, initial_query, followup_clarifications)
     final_report = final_report.replace(