Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1719,16 +1719,19 @@ def compress_text(text: str, target_length: int) -> str:
|
|
| 1719 |
def generate_final_report(initial_query: str, context: str, reportstyle: str, learnings: list, visited_urls: list,
|
| 1720 |
aggregated_crumbs: str, references: list, pages: int = 8) -> str:
|
| 1721 |
"""
|
| 1722 |
-
Revised generate_final_report with
|
| 1723 |
-
and a fallback for incomplete JSON parsing.
|
| 1724 |
|
| 1725 |
-
|
| 1726 |
1. Generates a JSON skeleton outlining the report sections and placeholder allocations.
|
| 1727 |
-
2. For each core section, generates HTML content using the assigned token
|
| 1728 |
-
|
| 1729 |
3. Generates final sections (Introduction, Abstract, Conclusion, Reference Summary Table).
|
| 1730 |
4. Assembles the Table of Contents and the final HTML.
|
| 1731 |
-
5. Passes the raw HTML through
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1732 |
"""
|
| 1733 |
import json, logging, re
|
| 1734 |
|
|
@@ -1754,7 +1757,7 @@ Divide the report into two groups:
|
|
| 1754 |
β’ "key_content_elements": An array of 3 to 5 bullet points that must be mentioned.
|
| 1755 |
β’ "placeholders": An object with boolean keys "visual", "graph", and "focus" indicating which placeholders to include.
|
| 1756 |
**Overall guidance**: Across all core sections, the total number of visual placeholders should be between β{pages}/10β and β{pages}/5β,
|
| 1757 |
-
graph placeholders should be in the same range, and focus placeholders between β{pages}/20β and β{pages}/10β. Decide per section which to activate.
|
| 1758 |
|
| 1759 |
2. "final_sections": These should be generated after core sections and include:
|
| 1760 |
- "Introduction"
|
|
@@ -1763,29 +1766,36 @@ Divide the report into two groups:
|
|
| 1763 |
- "Reference Summary Table"
|
| 1764 |
Their combined target word count should be about 30% of the total (approximately {int(0.3 * total_word_count)} words),
|
| 1765 |
distributed evenly among them.
|
| 1766 |
-
|
| 1767 |
Return only valid JSON with two keys: "core_sections" and "final_sections", with no additional commentary.
|
| 1768 |
"""
|
|
|
|
| 1769 |
skeleton_response = openai_call(
|
| 1770 |
prompt=prompt_skeleton,
|
| 1771 |
model="o3-mini",
|
| 1772 |
-
max_tokens_param=int(
|
| 1773 |
temperature=0
|
| 1774 |
)
|
| 1775 |
|
|
|
|
| 1776 |
try:
|
| 1777 |
skeleton = json.loads(skeleton_response)
|
| 1778 |
except Exception as e:
|
| 1779 |
logging.error(f"Error parsing skeleton JSON: {e}")
|
| 1780 |
-
#
|
| 1781 |
match = re.search(r"```json(.*?)```", skeleton_response, re.DOTALL)
|
|
|
|
| 1782 |
if match:
|
| 1783 |
-
|
| 1784 |
-
skeleton = json.loads(match.group(1).strip())
|
| 1785 |
-
except Exception as e2:
|
| 1786 |
-
logging.error(f"Fallback JSON parsing failed: {e2}")
|
| 1787 |
-
skeleton = {"core_sections": [], "final_sections": []}
|
| 1788 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1789 |
skeleton = {"core_sections": [], "final_sections": []}
|
| 1790 |
|
| 1791 |
# --- Step 2: Generate content for each core section sequentially.
|
|
@@ -1802,7 +1812,7 @@ Return only valid JSON with two keys: "core_sections" and "final_sections", with
|
|
| 1802 |
target_wc = 500
|
| 1803 |
key_elements = section.get("key_content_elements", [])
|
| 1804 |
placeholders = section.get("placeholders", {})
|
| 1805 |
-
# Build placeholder directive based on allocated booleans.
|
| 1806 |
placeholder_directive = ""
|
| 1807 |
if placeholders.get("visual", False):
|
| 1808 |
placeholder_directive += "[[Visual Placeholder: Insert one visual here.]]\n"
|
|
@@ -1890,7 +1900,7 @@ Return only the HTML snippet without additional commentary.
|
|
| 1890 |
toc_html = openai_call(
|
| 1891 |
prompt=prompt_toc,
|
| 1892 |
model="o3-mini",
|
| 1893 |
-
max_tokens_param=500,
|
| 1894 |
temperature=0
|
| 1895 |
).strip()
|
| 1896 |
|
|
@@ -1973,7 +1983,7 @@ Return only the HTML snippet without additional commentary.
|
|
| 1973 |
|
| 1974 |
logging.info("generate_final_report: Report generated successfully with integrated placeholder allocation decisions.")
|
| 1975 |
return final_report_html
|
| 1976 |
-
|
| 1977 |
def filter_search_results(results: list, visited_urls: set, query: str, clarifications: str) -> list:
|
| 1978 |
# Filter out already seen results
|
| 1979 |
new_results = []
|
|
|
|
| 1719 |
def generate_final_report(initial_query: str, context: str, reportstyle: str, learnings: list, visited_urls: list,
|
| 1720 |
aggregated_crumbs: str, references: list, pages: int = 8) -> str:
|
| 1721 |
"""
|
| 1722 |
+
Revised generate_final_report with improved JSON extraction for the skeleton output.
|
|
|
|
| 1723 |
|
| 1724 |
+
The function:
|
| 1725 |
1. Generates a JSON skeleton outlining the report sections and placeholder allocations.
|
| 1726 |
+
2. For each core section, generates HTML content using the assigned token (target_wc * 5),
|
| 1727 |
+
ensuring that target_wc is treated as an integer.
|
| 1728 |
3. Generates final sections (Introduction, Abstract, Conclusion, Reference Summary Table).
|
| 1729 |
4. Assembles the Table of Contents and the final HTML.
|
| 1730 |
+
5. Passes the raw HTML through placeholder replacement functions before returning.
|
| 1731 |
+
|
| 1732 |
+
Improvements:
|
| 1733 |
+
- Increased fallback extraction attempts if the JSON skeleton is incomplete.
|
| 1734 |
+
- Ensures that max_tokens parameters are integers.
|
| 1735 |
"""
|
| 1736 |
import json, logging, re
|
| 1737 |
|
|
|
|
| 1757 |
β’ "key_content_elements": An array of 3 to 5 bullet points that must be mentioned.
|
| 1758 |
β’ "placeholders": An object with boolean keys "visual", "graph", and "focus" indicating which placeholders to include.
|
| 1759 |
**Overall guidance**: Across all core sections, the total number of visual placeholders should be between β{pages}/10β and β{pages}/5β,
|
| 1760 |
+
graph placeholders should be in the same range, and focus placeholders should be between β{pages}/20β and β{pages}/10β. Decide per section which to activate.
|
| 1761 |
|
| 1762 |
2. "final_sections": These should be generated after core sections and include:
|
| 1763 |
- "Introduction"
|
|
|
|
| 1766 |
- "Reference Summary Table"
|
| 1767 |
Their combined target word count should be about 30% of the total (approximately {int(0.3 * total_word_count)} words),
|
| 1768 |
distributed evenly among them.
|
|
|
|
| 1769 |
Return only valid JSON with two keys: "core_sections" and "final_sections", with no additional commentary.
|
| 1770 |
"""
|
| 1771 |
+
# Increase the token allocation if needed (e.g., 2000 tokens)
|
| 1772 |
skeleton_response = openai_call(
|
| 1773 |
prompt=prompt_skeleton,
|
| 1774 |
model="o3-mini",
|
| 1775 |
+
max_tokens_param=int(2000),
|
| 1776 |
temperature=0
|
| 1777 |
)
|
| 1778 |
|
| 1779 |
+
# --- Fallback extraction for JSON skeleton ---
|
| 1780 |
try:
|
| 1781 |
skeleton = json.loads(skeleton_response)
|
| 1782 |
except Exception as e:
|
| 1783 |
logging.error(f"Error parsing skeleton JSON: {e}")
|
| 1784 |
+
# First attempt: extract JSON from a markdown code fence.
|
| 1785 |
match = re.search(r"```json(.*?)```", skeleton_response, re.DOTALL)
|
| 1786 |
+
json_str = ""
|
| 1787 |
if match:
|
| 1788 |
+
json_str = match.group(1).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1789 |
else:
|
| 1790 |
+
# Second attempt: extract any substring that starts with '{' and ends with '}'.
|
| 1791 |
+
json_match = re.search(r'({.*})', skeleton_response, re.DOTALL)
|
| 1792 |
+
if json_match:
|
| 1793 |
+
json_str = json_match.group(1).strip()
|
| 1794 |
+
|
| 1795 |
+
try:
|
| 1796 |
+
skeleton = json.loads(json_str) if json_str else {"core_sections": [], "final_sections": []}
|
| 1797 |
+
except Exception as e2:
|
| 1798 |
+
logging.error(f"Fallback JSON parsing failed: {e2}")
|
| 1799 |
skeleton = {"core_sections": [], "final_sections": []}
|
| 1800 |
|
| 1801 |
# --- Step 2: Generate content for each core section sequentially.
|
|
|
|
| 1812 |
target_wc = 500
|
| 1813 |
key_elements = section.get("key_content_elements", [])
|
| 1814 |
placeholders = section.get("placeholders", {})
|
| 1815 |
+
# Build a placeholder directive based on allocated booleans.
|
| 1816 |
placeholder_directive = ""
|
| 1817 |
if placeholders.get("visual", False):
|
| 1818 |
placeholder_directive += "[[Visual Placeholder: Insert one visual here.]]\n"
|
|
|
|
| 1900 |
toc_html = openai_call(
|
| 1901 |
prompt=prompt_toc,
|
| 1902 |
model="o3-mini",
|
| 1903 |
+
max_tokens_param=int(500),
|
| 1904 |
temperature=0
|
| 1905 |
).strip()
|
| 1906 |
|
|
|
|
| 1983 |
|
| 1984 |
logging.info("generate_final_report: Report generated successfully with integrated placeholder allocation decisions.")
|
| 1985 |
return final_report_html
|
| 1986 |
+
|
| 1987 |
def filter_search_results(results: list, visited_urls: set, query: str, clarifications: str) -> list:
|
| 1988 |
# Filter out already seen results
|
| 1989 |
new_results = []
|