Guiyom commited on
Commit
36989f3
·
verified ·
1 Parent(s): 77ddce7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -23
app.py CHANGED
@@ -33,29 +33,64 @@ TOTAL_SUMMARIZED_WORDS = 0
33
  # Helper functions for external APIs and PDF Processing
34
  # =============================================================================
35
 
36
- def replace_focus_placeholders(report_html: str, context: str, initial_query: str, crumbs: str) -> str:
37
- pattern = r"\[\[Focus Placeholder (\d+):(.*?)\]\]" # Capture placeholder number
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def placeholder_replacer(match):
39
  placeholder_num = match.group(1)
40
  instructions = match.group(2).strip()
41
- logging.info(f"Generating focus box {placeholder_num}")
 
42
  try:
43
- focus_html = generate_focus_snippet(instructions, context, initial_query, crumbs)
44
- # Remove any outer HTML or body tags from the generated snippet:
45
- focus_html = re.sub(r"<\/?(html|head|body)[^>]*>", "", focus_html, flags=re.DOTALL|re.IGNORECASE).strip()
46
- # Now wrap in a single container with reduced font size:
47
- return (
48
- f'<!-- Focus {placeholder_num} Start -->'
49
- f'<p style="text-align: center;">-----------------------------------------------------------------------</p>'
50
- f'{focus_html}'
51
- f'<p style="text-align: center;">-----------------------------------------------------------------------</p>'
52
- f'<!-- Focus {placeholder_num} End -->'
53
- )
54
  except Exception as e:
55
- logging.error(f"Focus {placeholder_num} failed: {str(e)}")
56
- return f'<!-- ERROR GENERATING FOCUS {placeholder_num} -->'
 
57
  return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
58
 
 
59
  def generate_visual_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
60
  # remove special lines
61
  def remove_special_lines(input_string):
@@ -215,7 +250,7 @@ mindmap
215
  #result = remove_special_lines(result)
216
  #result = process_multiline_string(result)
217
 
218
- htmloutput = f"""<iframe class="mermaid-frame" srcdoc='
219
  <!DOCTYPE html>
220
  <html>
221
  <head>
@@ -266,6 +301,29 @@ def replace_visual_placeholders(report_html: str, context: str, initial_query: s
266
 
267
  return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  def generate_focus_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
270
  prompt = (f"""
271
  Generate a complete, self-contained inner-HTML code within the core tags - discard the <html><head><body> opening and closing tags
@@ -668,6 +726,28 @@ Important: after [[ put "Visual Placeholder n:" explicitly (with n as the ref nu
668
  - 2 visual placeholders cannot be in the same or in 2 consecutive sections
669
  Note: the placeholders will then be processed separtely by a llm to generate the specific code to display each of them so the instruction need to be clear enough.
670
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
671
  // Focus placeholders
672
  - To drill down on specific topic that would be deserve to be developped extensively separately, create special focus placeholders in [[...]] double backets
673
  Note: outside of the placeholder, do not make reference in the report to "focus placeholders" just mention the "Focus box n".
@@ -717,7 +797,7 @@ Output the report directly without any introductory meta comments.
717
  // Report ending
718
  - End the report with the following sequence:
719
 
720
- <iframe class="mermaid-frame" srcdoc='
721
  <!DOCTYPE html>
722
  <html>
723
  </head>
@@ -1026,7 +1106,7 @@ class ReportGenerator:
1026
  return html_content
1027
 
1028
  def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
1029
- # Generate the full HTML report (including text, focus placeholders, and mermaid visuals as iframes)
1030
  html_report = self.generate_report_html(solution_content)
1031
 
1032
  # Add header
@@ -1043,10 +1123,10 @@ class ReportGenerator:
1043
  # Parse the HTML
1044
  logging.info(f"ReportGenerator: soup report generated:\n{soup}")
1045
 
1046
- # Find all mermaid visual iframes (assumed to have class "mermaid-frame")
1047
- mermaid_iframes = soup.find_all("iframe", class_="mermaid-frame")
1048
 
1049
- if mermaid_iframes:
1050
  # Set up Selenium with a window size and high DPI for better image resolution
1051
  import base64, tempfile, time
1052
  import chromedriver_autoinstaller
@@ -1064,7 +1144,7 @@ class ReportGenerator:
1064
  service = Service(log_path=os.devnull)
1065
  driver = webdriver.Chrome(service=service, options=options)
1066
 
1067
- for iframe in mermaid_iframes:
1068
  # Assume the iframe has its content in srcdoc (as generated in generate_visual_snippet)
1069
  srcdoc = iframe.get("srcdoc")
1070
  if srcdoc:
@@ -1475,6 +1555,7 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
1475
  # --- NEW STEP: Post-process final_report to replace visual and focus placeholders ---
1476
  final_report = replace_visual_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
1477
  final_report = replace_focus_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
 
1478
 
1479
  alignment_assessment = assess_report_alignment(final_report, initial_query, followup_clarifications)
1480
  final_report = final_report.replace(
 
33
  # Helper functions for external APIs and PDF Processing
34
  # =============================================================================
35
 
36
+ def generate_graph_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
37
+ prompt = f"""
38
+ Generate a full htmal code (including css and javascript) code displaying a simple but effective and elegant graph based on the following requirements:
39
+ {placeholder_text}
40
+
41
+ It will be integrated in a broader report (focus on the mermaid formatting though) about:
42
+ {initial_query}
43
+
44
+ // Sources:
45
+ Keep in mind the:
46
+ - context:
47
+ {context}
48
+ - the knowledge inputs
49
+ {crumbs}
50
+
51
+ // Requirements
52
+ - use elaborate but effective visual through call to the D3.js library.
53
+ - no introduction, conclusions or code fences -> Output the result directly
54
+
55
+ // Important
56
+ - Make the visuals content rich, there's no point having a visual if its content has no real value.
57
+ - It has to convey some relevant insights.
58
+ - Take a deep breath, think step by step and think it well.
59
+ - Use your judgement to decide between box plots, bubble charts, calendar view, chord diagrams, histograms, ...
60
+ - Your response should start with <html> and end with </html> - no intro before, no comments after.
61
+ """
62
+
63
+ result = openai_call(prompt, model="o3-mini", max_tokens_param=10000)
64
+ result = result.strip().strip("```").strip()
65
+ htmloutput = f"""<iframe class="visual-frame" srcdoc='
66
+ <!DOCTYPE html>
67
+ {result}
68
+ </iframe>
69
+ """
70
+
71
+ logging.info(f"The code produced for this graph placeholder:\n{placeholder_text}\n\n {htmloutput}\n\n")
72
+ return htmloutput
73
+
74
+ def replace_graph_placeholders(report_html: str, context: str, initial_query: str, crumbs: str) -> str:
75
+ pattern = r"\[\[Graph Placeholder (\d+):(.*?)\]\]" # Capture placeholder number
76
+ replacements = []
77
+
78
  def placeholder_replacer(match):
79
  placeholder_num = match.group(1)
80
  instructions = match.group(2).strip()
81
+ logging.info(f"Generating graph {placeholder_num}")
82
+
83
  try:
84
+ visual_html = generate_visual_snippet(instructions, context, initial_query, crumbs)
85
+ # Add error boundary and logging
86
+ return f'<!-- Graph {placeholder_num} Start -->\n{visual_html}\n<!-- Graph {placeholder_num} End -->'
 
 
 
 
 
 
 
 
87
  except Exception as e:
88
+ logging.error(f"Graph {placeholder_num} failed: {str(e)}")
89
+ return f'<!-- ERROR GENERATING GRAPH {placeholder_num} -->'
90
+
91
  return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
92
 
93
+
94
  def generate_visual_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
95
  # remove special lines
96
  def remove_special_lines(input_string):
 
250
  #result = remove_special_lines(result)
251
  #result = process_multiline_string(result)
252
 
253
+ htmloutput = f"""<iframe class="visual-frame" srcdoc='
254
  <!DOCTYPE html>
255
  <html>
256
  <head>
 
301
 
302
  return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
303
 
304
+ def replace_focus_placeholders(report_html: str, context: str, initial_query: str, crumbs: str) -> str:
305
+ pattern = r"\[\[Focus Placeholder (\d+):(.*?)\]\]" # Capture placeholder number
306
+ def placeholder_replacer(match):
307
+ placeholder_num = match.group(1)
308
+ instructions = match.group(2).strip()
309
+ logging.info(f"Generating focus box {placeholder_num}")
310
+ try:
311
+ focus_html = generate_focus_snippet(instructions, context, initial_query, crumbs)
312
+ # Remove any outer HTML or body tags from the generated snippet:
313
+ focus_html = re.sub(r"<\/?(html|head|body)[^>]*>", "", focus_html, flags=re.DOTALL|re.IGNORECASE).strip()
314
+ # Now wrap in a single container with reduced font size:
315
+ return (
316
+ f'<!-- Focus {placeholder_num} Start -->'
317
+ f'<p style="text-align: center;">-----------------------------------------------------------------------</p>'
318
+ f'{focus_html}'
319
+ f'<p style="text-align: center;">-----------------------------------------------------------------------</p>'
320
+ f'<!-- Focus {placeholder_num} End -->'
321
+ )
322
+ except Exception as e:
323
+ logging.error(f"Focus {placeholder_num} failed: {str(e)}")
324
+ return f'<!-- ERROR GENERATING FOCUS {placeholder_num} -->'
325
+ return re.sub(pattern, placeholder_replacer, report_html, flags=re.DOTALL)
326
+
327
  def generate_focus_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
328
  prompt = (f"""
329
  Generate a complete, self-contained inner-HTML code within the core tags - discard the <html><head><body> opening and closing tags
 
726
  - 2 visual placeholders cannot be in the same or in 2 consecutive sections
727
  Note: the placeholders will then be processed separtely by a llm to generate the specific code to display each of them so the instruction need to be clear enough.
728
 
729
+ // Graph placeholders
730
+ - Create special graphe placeholders that will be rendered in d3.js afterwards based on your guidance:
731
+ [[Graph Placeholder n:
732
+ - Purpose of this graph is:...
733
+ - Relevant data to generate it:...
734
+ - Visual guidance:...
735
+ ]]
736
+ with n as the reference number
737
+ Important: after [[ put "Graph Placeholder n:" explicitly (with n as the ref number of the focus box created). This will be used in a regex
738
+
739
+ - All types of graphs from d3.js library can be generated // Take this into consideration when providing the instructions for the graph data
740
+ - do not make reference in the report to "graph placeholders" just mention graph.
741
+ - in the placeholder, no need to add the references to the source, but make sure ALL of the data points required has a source from the learning and reference material hereafter
742
+ - these placeholders text should contain:
743
+ o the purpose of the future graph
744
+ o the relevant data to generate it
745
+ o the guidance in terms of look&feel (ex: red colors, bar chart style)
746
+ note: Be specific if you want some particular color used, keep it consistent across the report.
747
+ - there should be at least {round(pages/4,0)} of these graphs placeholders within the report
748
+ - 2 graph placeholders cannot be in the same or in 2 consecutive sections
749
+ Note: the placeholders will then be processed separtely by a llm to generate the specific code to display each of them so the instruction need to be clear enough.
750
+
751
  // Focus placeholders
752
  - To drill down on specific topic that would be deserve to be developped extensively separately, create special focus placeholders in [[...]] double backets
753
  Note: outside of the placeholder, do not make reference in the report to "focus placeholders" just mention the "Focus box n".
 
797
  // Report ending
798
  - End the report with the following sequence:
799
 
800
+ <iframe class="visual-frame" srcdoc='
801
  <!DOCTYPE html>
802
  <html>
803
  </head>
 
1106
  return html_content
1107
 
1108
  def generate_report_pdf(self, solution_content: str, metadata: dict = None) -> bytes:
1109
+ # Generate the full HTML report (including text, focus placeholders, and visuals as iframes)
1110
  html_report = self.generate_report_html(solution_content)
1111
 
1112
  # Add header
 
1123
  # Parse the HTML
1124
  logging.info(f"ReportGenerator: soup report generated:\n{soup}")
1125
 
1126
+ # Find all mermaid visual iframes (assumed to have class "visual-frame")
1127
+ visual_iframes = soup.find_all("iframe", class_="visual-frame")
1128
 
1129
+ if visual_iframes:
1130
  # Set up Selenium with a window size and high DPI for better image resolution
1131
  import base64, tempfile, time
1132
  import chromedriver_autoinstaller
 
1144
  service = Service(log_path=os.devnull)
1145
  driver = webdriver.Chrome(service=service, options=options)
1146
 
1147
+ for iframe in visual_iframes:
1148
  # Assume the iframe has its content in srcdoc (as generated in generate_visual_snippet)
1149
  srcdoc = iframe.get("srcdoc")
1150
  if srcdoc:
 
1555
  # --- NEW STEP: Post-process final_report to replace visual and focus placeholders ---
1556
  final_report = replace_visual_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
1557
  final_report = replace_focus_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
1558
+ final_report = replace_graph_placeholders(final_report, combined_context, initial_query, aggregated_crumbs)
1559
 
1560
  alignment_assessment = assess_report_alignment(final_report, initial_query, followup_clarifications)
1561
  final_report = final_report.replace(