Spaces:

10gen
/

deepsearchitv2

Runtime error

App Files Files Community

Guiyom commited on Mar 7, 2025

Commit

65fa006

verified ·

1 Parent(s): 508a2e2

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -18

app.py CHANGED Viewed

@@ -1719,16 +1719,19 @@ def compress_text(text: str, target_length: int) -> str:
 def generate_final_report(initial_query: str, context: str, reportstyle: str, learnings: list, visited_urls: list,
                           aggregated_crumbs: str, references: list, pages: int = 8) -> str:
     """
-    Revised generate_final_report with explicit type conversions for max_tokens values
-    and a fallback for incomplete JSON parsing.
-    This function:
       1. Generates a JSON skeleton outlining the report sections and placeholder allocations.
-      2. For each core section, generates HTML content using the assigned token
-         (target_wc * 5) ensuring target_wc is an integer.
       3. Generates final sections (Introduction, Abstract, Conclusion, Reference Summary Table).
       4. Assembles the Table of Contents and the final HTML.
-      5. Passes the raw HTML through the placeholder replacement functions before returning.
     """
     import json, logging, re
@@ -1754,7 +1757,7 @@ Divide the report into two groups:
        • "key_content_elements": An array of 3 to 5 bullet points that must be mentioned.
        • "placeholders": An object with boolean keys "visual", "graph", and "focus" indicating which placeholders to include.
          **Overall guidance**: Across all core sections, the total number of visual placeholders should be between ⌊{pages}/10⌋ and ⌈{pages}/5⌉,
-         graph placeholders should be in the same range, and focus placeholders between ⌊{pages}/20⌋ and ⌈{pages}/10⌉. Decide per section which to activate.
 2. "final_sections": These should be generated after core sections and include:
        - "Introduction"
@@ -1763,29 +1766,36 @@ Divide the report into two groups:
        - "Reference Summary Table"
    Their combined target word count should be about 30% of the total (approximately {int(0.3 * total_word_count)} words),
    distributed evenly among them.
 Return only valid JSON with two keys: "core_sections" and "final_sections", with no additional commentary.
 """
     skeleton_response = openai_call(
         prompt=prompt_skeleton,
         model="o3-mini",
-        max_tokens_param=int(1500),
         temperature=0
     )
     try:
         skeleton = json.loads(skeleton_response)
     except Exception as e:
         logging.error(f"Error parsing skeleton JSON: {e}")
-        # Fallback: attempt to extract JSON from a markdown code fence.
         match = re.search(r"```json(.*?)```", skeleton_response, re.DOTALL)
         if match:
-            try:
-                skeleton = json.loads(match.group(1).strip())
-            except Exception as e2:
-                logging.error(f"Fallback JSON parsing failed: {e2}")
-                skeleton = {"core_sections": [], "final_sections": []}
         else:
             skeleton = {"core_sections": [], "final_sections": []}
     # --- Step 2: Generate content for each core section sequentially.
@@ -1802,7 +1812,7 @@ Return only valid JSON with two keys: "core_sections" and "final_sections", with
                 target_wc = 500
             key_elements = section.get("key_content_elements", [])
             placeholders = section.get("placeholders", {})
-            # Build placeholder directive based on allocated booleans.
             placeholder_directive = ""
             if placeholders.get("visual", False):
                 placeholder_directive += "[[Visual Placeholder: Insert one visual here.]]\n"
@@ -1890,7 +1900,7 @@ Return only the HTML snippet without additional commentary.
     toc_html = openai_call(
         prompt=prompt_toc,
         model="o3-mini",
-        max_tokens_param=500,
         temperature=0
     ).strip()
@@ -1973,7 +1983,7 @@ Return only the HTML snippet without additional commentary.
     logging.info("generate_final_report: Report generated successfully with integrated placeholder allocation decisions.")
     return final_report_html
 def filter_search_results(results: list, visited_urls: set, query: str, clarifications: str) -> list:
     # Filter out already seen results
     new_results = []

 def generate_final_report(initial_query: str, context: str, reportstyle: str, learnings: list, visited_urls: list,
                           aggregated_crumbs: str, references: list, pages: int = 8) -> str:
     """
+    Revised generate_final_report with improved JSON extraction for the skeleton output.
+    The function:
       1. Generates a JSON skeleton outlining the report sections and placeholder allocations.
+      2. For each core section, generates HTML content using the assigned token (target_wc * 5),
+         ensuring that target_wc is treated as an integer.
       3. Generates final sections (Introduction, Abstract, Conclusion, Reference Summary Table).
       4. Assembles the Table of Contents and the final HTML.
+      5. Passes the raw HTML through placeholder replacement functions before returning.
+    Improvements:
+      - Increased fallback extraction attempts if the JSON skeleton is incomplete.
+      - Ensures that max_tokens parameters are integers.
     """
     import json, logging, re
        • "key_content_elements": An array of 3 to 5 bullet points that must be mentioned.
        • "placeholders": An object with boolean keys "visual", "graph", and "focus" indicating which placeholders to include.
          **Overall guidance**: Across all core sections, the total number of visual placeholders should be between ⌊{pages}/10⌋ and ⌈{pages}/5⌉,
+         graph placeholders should be in the same range, and focus placeholders should be between ⌊{pages}/20⌋ and ⌈{pages}/10⌉. Decide per section which to activate.
 2. "final_sections": These should be generated after core sections and include:
        - "Introduction"
        - "Reference Summary Table"
    Their combined target word count should be about 30% of the total (approximately {int(0.3 * total_word_count)} words),
    distributed evenly among them.
 Return only valid JSON with two keys: "core_sections" and "final_sections", with no additional commentary.
 """
+    # Increase the token allocation if needed (e.g., 2000 tokens)
     skeleton_response = openai_call(
         prompt=prompt_skeleton,
         model="o3-mini",
+        max_tokens_param=int(2000),
         temperature=0
     )
+    # --- Fallback extraction for JSON skeleton ---
     try:
         skeleton = json.loads(skeleton_response)
     except Exception as e:
         logging.error(f"Error parsing skeleton JSON: {e}")
+        # First attempt: extract JSON from a markdown code fence.
         match = re.search(r"```json(.*?)```", skeleton_response, re.DOTALL)
+        json_str = ""
         if match:
+            json_str = match.group(1).strip()
         else:
+            # Second attempt: extract any substring that starts with '{' and ends with '}'.
+            json_match = re.search(r'({.*})', skeleton_response, re.DOTALL)
+            if json_match:
+                json_str = json_match.group(1).strip()
+        try:
+            skeleton = json.loads(json_str) if json_str else {"core_sections": [], "final_sections": []}
+        except Exception as e2:
+            logging.error(f"Fallback JSON parsing failed: {e2}")
             skeleton = {"core_sections": [], "final_sections": []}
     # --- Step 2: Generate content for each core section sequentially.
                 target_wc = 500
             key_elements = section.get("key_content_elements", [])
             placeholders = section.get("placeholders", {})
+            # Build a placeholder directive based on allocated booleans.
             placeholder_directive = ""
             if placeholders.get("visual", False):
                 placeholder_directive += "[[Visual Placeholder: Insert one visual here.]]\n"
     toc_html = openai_call(
         prompt=prompt_toc,
         model="o3-mini",
+        max_tokens_param=int(500),
         temperature=0
     ).strip()
     logging.info("generate_final_report: Report generated successfully with integrated placeholder allocation decisions.")
     return final_report_html
 def filter_search_results(results: list, visited_urls: set, query: str, clarifications: str) -> list:
     # Filter out already seen results
     new_results = []