Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -33,29 +33,64 @@ TOTAL_SUMMARIZED_WORDS = 0
|
|
| 33 |
# Helper functions for external APIs and PDF Processing
|
| 34 |
# =============================================================================
|
| 35 |
|
| 36 |
-
def
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def placeholder_replacer(match):
|
| 39 |
placeholder_num = match.group(1)
|
| 40 |
instructions = match.group(2).strip()
|
| 41 |
-
logging.info(f"Generating
|
|
|
|
| 42 |
try:
|
| 43 |
-
|
| 44 |
-
#
|
| 45 |
-
|
| 46 |
-
# Now wrap in a single container with reduced font size:
|
| 47 |
-
return (
|
| 48 |
-
f'<!-- Focus {placeholder_num} Start -->'
|
| 49 |
-
f'<p style="text-align: center;">-----------------------------------------------------------------------</p>'
|
| 50 |
-
f'{focus_html}'
|
| 51 |
-
f'<p style="text-align: center;">-----------------------------------------------------------------------</p>'
|
| 52 |
-
f'<!-- Focus {placeholder_num} End -->'
|
| 53 |
-
)
|
| 54 |
except Exception as e:
|
| 55 |
-
logging.error(f"
|
| 56 |
-
return f'<!-- ERROR GENERATING
|
|
|
|
| 57 |
return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
|
| 58 |
|
|
|
|
| 59 |
def generate_visual_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
|
| 60 |
# remove special lines
|
| 61 |
def remove_special_lines(input_string):
|
|
@@ -215,7 +250,7 @@ mindmap
|
|
| 215 |
#result = remove_special_lines(result)
|
| 216 |
#result = process_multiline_string(result)
|
| 217 |
|
| 218 |
-
htmloutput = f"""<iframe class="
|
| 219 |
<!DOCTYPE html>
|
| 220 |
<html>
|
| 221 |
<head>
|
|
@@ -266,6 +301,29 @@ def replace_visual_placeholders(report_html: str, context: str, initial_query: s
|
|
| 266 |
|
| 267 |
return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
|
| 268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
def generate_focus_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
|
| 270 |
prompt = (f"""
|
| 271 |
Generate a complete, self-contained inner-HTML code within the core tags - discard the <html><head><body> opening and closing tags
|
|
@@ -668,6 +726,28 @@ Important: after [[ put "Visual Placeholder n:" explicitly (with n as the ref nu
|
|
| 668 |
- 2 visual placeholders cannot be in the same or in 2 consecutive sections
|
| 669 |
Note: the placeholders will then be processed separtely by a llm to generate the specific code to display each of them so the instruction need to be clear enough.
|
| 670 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 671 |
// Focus placeholders
|
| 672 |
- To drill down on specific topic that would be deserve to be developped extensively separately, create special focus placeholders in [[...]] double backets
|
| 673 |
Note: outside of the placeholder, do not make reference in the report to "focus placeholders" just mention the "Focus box n".
|
|
@@ -717,7 +797,7 @@ Output the report directly without any introductory meta comments.
|
|
| 717 |
// Report ending
|
| 718 |
- End the report with the following sequence:
|
| 719 |
|
| 720 |
-
<iframe class="
|
| 721 |
<!DOCTYPE html>
|
| 722 |
<html>
|
| 723 |
</head>
|
|
@@ -1026,7 +1106,7 @@ class ReportGenerator:
|
|
| 1026 |
return html_content
|
| 1027 |
|
| 1028 |
def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
|
| 1029 |
-
# Generate the full HTML report (including text, focus placeholders, and
|
| 1030 |
html_report = self.generate_report_html(solution_content)
|
| 1031 |
|
| 1032 |
# Add header
|
|
@@ -1043,10 +1123,10 @@ class ReportGenerator:
|
|
| 1043 |
# Parse the HTML
|
| 1044 |
logging.info(f"ReportGenerator: soup report generated:\n{soup}")
|
| 1045 |
|
| 1046 |
-
# Find all mermaid visual iframes (assumed to have class "
|
| 1047 |
-
|
| 1048 |
|
| 1049 |
-
if
|
| 1050 |
# Set up Selenium with a window size and high DPI for better image resolution
|
| 1051 |
import base64, tempfile, time
|
| 1052 |
import chromedriver_autoinstaller
|
|
@@ -1064,7 +1144,7 @@ class ReportGenerator:
|
|
| 1064 |
service = Service(log_path=os.devnull)
|
| 1065 |
driver = webdriver.Chrome(service=service, options=options)
|
| 1066 |
|
| 1067 |
-
for iframe in
|
| 1068 |
# Assume the iframe has its content in srcdoc (as generated in generate_visual_snippet)
|
| 1069 |
srcdoc = iframe.get("srcdoc")
|
| 1070 |
if srcdoc:
|
|
@@ -1475,6 +1555,7 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
|
|
| 1475 |
# --- NEW STEP: Post-process final_report to replace visual and focus placeholders ---
|
| 1476 |
final_report = replace_visual_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
|
| 1477 |
final_report = replace_focus_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
|
|
|
|
| 1478 |
|
| 1479 |
alignment_assessment = assess_report_alignment(final_report, initial_query, followup_clarifications)
|
| 1480 |
final_report = final_report.replace(
|
|
|
|
| 33 |
# Helper functions for external APIs and PDF Processing
|
| 34 |
# =============================================================================
|
| 35 |
|
| 36 |
+
def generate_graph_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
|
| 37 |
+
prompt = f"""
|
| 38 |
+
Generate a full htmal code (including css and javascript) code displaying a simple but effective and elegant graph based on the following requirements:
|
| 39 |
+
{placeholder_text}
|
| 40 |
+
|
| 41 |
+
It will be integrated in a broader report (focus on the mermaid formatting though) about:
|
| 42 |
+
{initial_query}
|
| 43 |
+
|
| 44 |
+
// Sources:
|
| 45 |
+
Keep in mind the:
|
| 46 |
+
- context:
|
| 47 |
+
{context}
|
| 48 |
+
- the knowledge inputs
|
| 49 |
+
{crumbs}
|
| 50 |
+
|
| 51 |
+
// Requirements
|
| 52 |
+
- use elaborate but effective visual through call to the D3.js library.
|
| 53 |
+
- no introduction, conclusions or code fences -> Output the result directly
|
| 54 |
+
|
| 55 |
+
// Important
|
| 56 |
+
- Make the visuals content rich, there's no point having a visual if its content has no real value.
|
| 57 |
+
- It has to convey some relevant insights.
|
| 58 |
+
- Take a deep breath, think step by step and think it well.
|
| 59 |
+
- Use your judgement to decide between box plots, bubble charts, calendar view, chord diagrams, histograms, ...
|
| 60 |
+
- Your response should start with <html> and end with </html> - no intro before, no comments after.
|
| 61 |
+
"""
|
| 62 |
+
|
| 63 |
+
result = openai_call(prompt, model="o3-mini", max_tokens_param=10000)
|
| 64 |
+
result = result.strip().strip("```").strip()
|
| 65 |
+
htmloutput = f"""<iframe class="visual-frame" srcdoc='
|
| 66 |
+
<!DOCTYPE html>
|
| 67 |
+
{result}
|
| 68 |
+
</iframe>
|
| 69 |
+
"""
|
| 70 |
+
|
| 71 |
+
logging.info(f"The code produced for this graph placeholder:\n{placeholder_text}\n\n {htmloutput}\n\n")
|
| 72 |
+
return htmloutput
|
| 73 |
+
|
| 74 |
+
def replace_graph_placeholders(report_html: str, context: str, initial_query: str, crumbs: str) -> str:
|
| 75 |
+
pattern = r"\[\[Graph Placeholder (\d+):(.*?)\]\]" # Capture placeholder number
|
| 76 |
+
replacements = []
|
| 77 |
+
|
| 78 |
def placeholder_replacer(match):
|
| 79 |
placeholder_num = match.group(1)
|
| 80 |
instructions = match.group(2).strip()
|
| 81 |
+
logging.info(f"Generating graph {placeholder_num}")
|
| 82 |
+
|
| 83 |
try:
|
| 84 |
+
visual_html = generate_visual_snippet(instructions, context, initial_query, crumbs)
|
| 85 |
+
# Add error boundary and logging
|
| 86 |
+
return f'<!-- Graph {placeholder_num} Start -->\n{visual_html}\n<!-- Graph {placeholder_num} End -->'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
except Exception as e:
|
| 88 |
+
logging.error(f"Graph {placeholder_num} failed: {str(e)}")
|
| 89 |
+
return f'<!-- ERROR GENERATING GRAPH {placeholder_num} -->'
|
| 90 |
+
|
| 91 |
return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
|
| 92 |
|
| 93 |
+
|
| 94 |
def generate_visual_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
|
| 95 |
# remove special lines
|
| 96 |
def remove_special_lines(input_string):
|
|
|
|
| 250 |
#result = remove_special_lines(result)
|
| 251 |
#result = process_multiline_string(result)
|
| 252 |
|
| 253 |
+
htmloutput = f"""<iframe class="visual-frame" srcdoc='
|
| 254 |
<!DOCTYPE html>
|
| 255 |
<html>
|
| 256 |
<head>
|
|
|
|
| 301 |
|
| 302 |
return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
|
| 303 |
|
| 304 |
+
def replace_focus_placeholders(report_html: str, context: str, initial_query: str, crumbs: str) -> str:
|
| 305 |
+
pattern = r"\[\[Focus Placeholder (\d+):(.*?)\]\]" # Capture placeholder number
|
| 306 |
+
def placeholder_replacer(match):
|
| 307 |
+
placeholder_num = match.group(1)
|
| 308 |
+
instructions = match.group(2).strip()
|
| 309 |
+
logging.info(f"Generating focus box {placeholder_num}")
|
| 310 |
+
try:
|
| 311 |
+
focus_html = generate_focus_snippet(instructions, context, initial_query, crumbs)
|
| 312 |
+
# Remove any outer HTML or body tags from the generated snippet:
|
| 313 |
+
focus_html = re.sub(r"<\/?(html|head|body)[^>]*>", "", focus_html, flags=re.DOTALL|re.IGNORECASE).strip()
|
| 314 |
+
# Now wrap in a single container with reduced font size:
|
| 315 |
+
return (
|
| 316 |
+
f'<!-- Focus {placeholder_num} Start -->'
|
| 317 |
+
f'<p style="text-align: center;">-----------------------------------------------------------------------</p>'
|
| 318 |
+
f'{focus_html}'
|
| 319 |
+
f'<p style="text-align: center;">-----------------------------------------------------------------------</p>'
|
| 320 |
+
f'<!-- Focus {placeholder_num} End -->'
|
| 321 |
+
)
|
| 322 |
+
except Exception as e:
|
| 323 |
+
logging.error(f"Focus {placeholder_num} failed: {str(e)}")
|
| 324 |
+
return f'<!-- ERROR GENERATING FOCUS {placeholder_num} -->'
|
| 325 |
+
return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
|
| 326 |
+
|
| 327 |
def generate_focus_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
|
| 328 |
prompt = (f"""
|
| 329 |
Generate a complete, self-contained inner-HTML code within the core tags - discard the <html><head><body> opening and closing tags
|
|
|
|
| 726 |
- 2 visual placeholders cannot be in the same or in 2 consecutive sections
|
| 727 |
Note: the placeholders will then be processed separtely by a llm to generate the specific code to display each of them so the instruction need to be clear enough.
|
| 728 |
|
| 729 |
+
// Graph placeholders
|
| 730 |
+
- Create special graphe placeholders that will be rendered in d3.js afterwards based on your guidance:
|
| 731 |
+
[[Graph Placeholder n:
|
| 732 |
+
- Purpose of this graph is:...
|
| 733 |
+
- Relevant data to generate it:...
|
| 734 |
+
- Visual guidance:...
|
| 735 |
+
]]
|
| 736 |
+
with n as the reference number
|
| 737 |
+
Important: after [[ put "Graph Placeholder n:" explicitly (with n as the ref number of the focus box created). This will be used in a regex
|
| 738 |
+
|
| 739 |
+
- All types of graphs from d3.js library can be generated // Take this into consideration when providing the instructions for the graph data
|
| 740 |
+
- do not make reference in the report to "graph placeholders" just mention graph.
|
| 741 |
+
- in the placeholder, no need to add the references to the source, but make sure ALL of the data points required has a source from the learning and reference material hereafter
|
| 742 |
+
- these placeholders text should contain:
|
| 743 |
+
o the purpose of the future graph
|
| 744 |
+
o the relevant data to generate it
|
| 745 |
+
o the guidance in terms of look&feel (ex: red colors, bar chart style)
|
| 746 |
+
note: Be specific if you want some particular color used, keep it consistent across the report.
|
| 747 |
+
- there should be at least {round(pages/4,0)} of these graphs placeholders within the report
|
| 748 |
+
- 2 graph placeholders cannot be in the same or in 2 consecutive sections
|
| 749 |
+
Note: the placeholders will then be processed separtely by a llm to generate the specific code to display each of them so the instruction need to be clear enough.
|
| 750 |
+
|
| 751 |
// Focus placeholders
|
| 752 |
- To drill down on specific topic that would be deserve to be developped extensively separately, create special focus placeholders in [[...]] double backets
|
| 753 |
Note: outside of the placeholder, do not make reference in the report to "focus placeholders" just mention the "Focus box n".
|
|
|
|
| 797 |
// Report ending
|
| 798 |
- End the report with the following sequence:
|
| 799 |
|
| 800 |
+
<iframe class="visual-frame" srcdoc='
|
| 801 |
<!DOCTYPE html>
|
| 802 |
<html>
|
| 803 |
</head>
|
|
|
|
| 1106 |
return html_content
|
| 1107 |
|
| 1108 |
def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
|
| 1109 |
+
# Generate the full HTML report (including text, focus placeholders, and visuals as iframes)
|
| 1110 |
html_report = self.generate_report_html(solution_content)
|
| 1111 |
|
| 1112 |
# Add header
|
|
|
|
| 1123 |
# Parse the HTML
|
| 1124 |
logging.info(f"ReportGenerator: soup report generated:\n{soup}")
|
| 1125 |
|
| 1126 |
+
# Find all mermaid visual iframes (assumed to have class "visual-frame")
|
| 1127 |
+
visual_iframes = soup.find_all("iframe", class_="visual-frame")
|
| 1128 |
|
| 1129 |
+
if visual_iframes:
|
| 1130 |
# Set up Selenium with a window size and high DPI for better image resolution
|
| 1131 |
import base64, tempfile, time
|
| 1132 |
import chromedriver_autoinstaller
|
|
|
|
| 1144 |
service = Service(log_path=os.devnull)
|
| 1145 |
driver = webdriver.Chrome(service=service, options=options)
|
| 1146 |
|
| 1147 |
+
for iframe in visual_iframes:
|
| 1148 |
# Assume the iframe has its content in srcdoc (as generated in generate_visual_snippet)
|
| 1149 |
srcdoc = iframe.get("srcdoc")
|
| 1150 |
if srcdoc:
|
|
|
|
| 1555 |
# --- NEW STEP: Post-process final_report to replace visual and focus placeholders ---
|
| 1556 |
final_report = replace_visual_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
|
| 1557 |
final_report = replace_focus_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
|
| 1558 |
+
final_report = replace_graph_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
|
| 1559 |
|
| 1560 |
alignment_assessment = assess_report_alignment(final_report, initial_query, followup_clarifications)
|
| 1561 |
final_report = final_report.replace(
|