Guiyom commited on
Commit
65fa006
Β·
verified Β·
1 Parent(s): 508a2e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -18
app.py CHANGED
@@ -1719,16 +1719,19 @@ def compress_text(text: str, target_length: int) -> str:
1719
  def generate_final_report(initial_query: str, context: str, reportstyle: str, learnings: list, visited_urls: list,
1720
  aggregated_crumbs: str, references: list, pages: int = 8) -> str:
1721
  """
1722
- Revised generate_final_report with explicit type conversions for max_tokens values
1723
- and a fallback for incomplete JSON parsing.
1724
 
1725
- This function:
1726
  1. Generates a JSON skeleton outlining the report sections and placeholder allocations.
1727
- 2. For each core section, generates HTML content using the assigned token
1728
- (target_wc * 5) ensuring target_wc is an integer.
1729
  3. Generates final sections (Introduction, Abstract, Conclusion, Reference Summary Table).
1730
  4. Assembles the Table of Contents and the final HTML.
1731
- 5. Passes the raw HTML through the placeholder replacement functions before returning.
 
 
 
 
1732
  """
1733
  import json, logging, re
1734
 
@@ -1754,7 +1757,7 @@ Divide the report into two groups:
1754
  β€’ "key_content_elements": An array of 3 to 5 bullet points that must be mentioned.
1755
  β€’ "placeholders": An object with boolean keys "visual", "graph", and "focus" indicating which placeholders to include.
1756
  **Overall guidance**: Across all core sections, the total number of visual placeholders should be between ⌊{pages}/10βŒ‹ and ⌈{pages}/5βŒ‰,
1757
- graph placeholders should be in the same range, and focus placeholders between ⌊{pages}/20βŒ‹ and ⌈{pages}/10βŒ‰. Decide per section which to activate.
1758
 
1759
  2. "final_sections": These should be generated after core sections and include:
1760
  - "Introduction"
@@ -1763,29 +1766,36 @@ Divide the report into two groups:
1763
  - "Reference Summary Table"
1764
  Their combined target word count should be about 30% of the total (approximately {int(0.3 * total_word_count)} words),
1765
  distributed evenly among them.
1766
-
1767
  Return only valid JSON with two keys: "core_sections" and "final_sections", with no additional commentary.
1768
  """
 
1769
  skeleton_response = openai_call(
1770
  prompt=prompt_skeleton,
1771
  model="o3-mini",
1772
- max_tokens_param=int(1500),
1773
  temperature=0
1774
  )
1775
 
 
1776
  try:
1777
  skeleton = json.loads(skeleton_response)
1778
  except Exception as e:
1779
  logging.error(f"Error parsing skeleton JSON: {e}")
1780
- # Fallback: attempt to extract JSON from a markdown code fence.
1781
  match = re.search(r"```json(.*?)```", skeleton_response, re.DOTALL)
 
1782
  if match:
1783
- try:
1784
- skeleton = json.loads(match.group(1).strip())
1785
- except Exception as e2:
1786
- logging.error(f"Fallback JSON parsing failed: {e2}")
1787
- skeleton = {"core_sections": [], "final_sections": []}
1788
  else:
 
 
 
 
 
 
 
 
 
1789
  skeleton = {"core_sections": [], "final_sections": []}
1790
 
1791
  # --- Step 2: Generate content for each core section sequentially.
@@ -1802,7 +1812,7 @@ Return only valid JSON with two keys: "core_sections" and "final_sections", with
1802
  target_wc = 500
1803
  key_elements = section.get("key_content_elements", [])
1804
  placeholders = section.get("placeholders", {})
1805
- # Build placeholder directive based on allocated booleans.
1806
  placeholder_directive = ""
1807
  if placeholders.get("visual", False):
1808
  placeholder_directive += "[[Visual Placeholder: Insert one visual here.]]\n"
@@ -1890,7 +1900,7 @@ Return only the HTML snippet without additional commentary.
1890
  toc_html = openai_call(
1891
  prompt=prompt_toc,
1892
  model="o3-mini",
1893
- max_tokens_param=500,
1894
  temperature=0
1895
  ).strip()
1896
 
@@ -1973,7 +1983,7 @@ Return only the HTML snippet without additional commentary.
1973
 
1974
  logging.info("generate_final_report: Report generated successfully with integrated placeholder allocation decisions.")
1975
  return final_report_html
1976
-
1977
  def filter_search_results(results: list, visited_urls: set, query: str, clarifications: str) -> list:
1978
  # Filter out already seen results
1979
  new_results = []
 
1719
  def generate_final_report(initial_query: str, context: str, reportstyle: str, learnings: list, visited_urls: list,
1720
  aggregated_crumbs: str, references: list, pages: int = 8) -> str:
1721
  """
1722
+ Revised generate_final_report with improved JSON extraction for the skeleton output.
 
1723
 
1724
+ The function:
1725
  1. Generates a JSON skeleton outlining the report sections and placeholder allocations.
1726
+ 2. For each core section, generates HTML content using the assigned token (target_wc * 5),
1727
+ ensuring that target_wc is treated as an integer.
1728
  3. Generates final sections (Introduction, Abstract, Conclusion, Reference Summary Table).
1729
  4. Assembles the Table of Contents and the final HTML.
1730
+ 5. Passes the raw HTML through placeholder replacement functions before returning.
1731
+
1732
+ Improvements:
1733
+ - Increased fallback extraction attempts if the JSON skeleton is incomplete.
1734
+ - Ensures that max_tokens parameters are integers.
1735
  """
1736
  import json, logging, re
1737
 
 
1757
  β€’ "key_content_elements": An array of 3 to 5 bullet points that must be mentioned.
1758
  β€’ "placeholders": An object with boolean keys "visual", "graph", and "focus" indicating which placeholders to include.
1759
  **Overall guidance**: Across all core sections, the total number of visual placeholders should be between ⌊{pages}/10βŒ‹ and ⌈{pages}/5βŒ‰,
1760
+ graph placeholders should be in the same range, and focus placeholders should be between ⌊{pages}/20βŒ‹ and ⌈{pages}/10βŒ‰. Decide per section which to activate.
1761
 
1762
  2. "final_sections": These should be generated after core sections and include:
1763
  - "Introduction"
 
1766
  - "Reference Summary Table"
1767
  Their combined target word count should be about 30% of the total (approximately {int(0.3 * total_word_count)} words),
1768
  distributed evenly among them.
 
1769
  Return only valid JSON with two keys: "core_sections" and "final_sections", with no additional commentary.
1770
  """
1771
+ # Increase the token allocation if needed (e.g., 2000 tokens)
1772
  skeleton_response = openai_call(
1773
  prompt=prompt_skeleton,
1774
  model="o3-mini",
1775
+ max_tokens_param=int(2000),
1776
  temperature=0
1777
  )
1778
 
1779
+ # --- Fallback extraction for JSON skeleton ---
1780
  try:
1781
  skeleton = json.loads(skeleton_response)
1782
  except Exception as e:
1783
  logging.error(f"Error parsing skeleton JSON: {e}")
1784
+ # First attempt: extract JSON from a markdown code fence.
1785
  match = re.search(r"```json(.*?)```", skeleton_response, re.DOTALL)
1786
+ json_str = ""
1787
  if match:
1788
+ json_str = match.group(1).strip()
 
 
 
 
1789
  else:
1790
+ # Second attempt: extract any substring that starts with '{' and ends with '}'.
1791
+ json_match = re.search(r'({.*})', skeleton_response, re.DOTALL)
1792
+ if json_match:
1793
+ json_str = json_match.group(1).strip()
1794
+
1795
+ try:
1796
+ skeleton = json.loads(json_str) if json_str else {"core_sections": [], "final_sections": []}
1797
+ except Exception as e2:
1798
+ logging.error(f"Fallback JSON parsing failed: {e2}")
1799
  skeleton = {"core_sections": [], "final_sections": []}
1800
 
1801
  # --- Step 2: Generate content for each core section sequentially.
 
1812
  target_wc = 500
1813
  key_elements = section.get("key_content_elements", [])
1814
  placeholders = section.get("placeholders", {})
1815
+ # Build a placeholder directive based on allocated booleans.
1816
  placeholder_directive = ""
1817
  if placeholders.get("visual", False):
1818
  placeholder_directive += "[[Visual Placeholder: Insert one visual here.]]\n"
 
1900
  toc_html = openai_call(
1901
  prompt=prompt_toc,
1902
  model="o3-mini",
1903
+ max_tokens_param=int(500),
1904
  temperature=0
1905
  ).strip()
1906
 
 
1983
 
1984
  logging.info("generate_final_report: Report generated successfully with integrated placeholder allocation decisions.")
1985
  return final_report_html
1986
+
1987
  def filter_search_results(results: list, visited_urls: set, query: str, clarifications: str) -> list:
1988
  # Filter out already seen results
1989
  new_results = []