Guiyom commited on
Commit
3e33744
·
verified ·
1 Parent(s): 26abdea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -227
app.py CHANGED
@@ -1718,247 +1718,267 @@ def compress_text(text: str, target_length: int) -> str:
1718
 
1719
  def generate_final_report(initial_query: str, context: str, reportstyle: str, learnings: list, visited_urls: list,
1720
  aggregated_crumbs: str, references: list, pages: int = 8) -> str:
1721
- fallback_text = ""
1722
- if not learnings:
1723
- fallback_text = "No external summaries were directly extracted. It is not possible to analyze relevance."
1724
- combined_learnings = "\n".join(learnings) if learnings else fallback_text
1725
- word_count = pages * 500
1726
- prompt = (f"""
1727
- Produce a comprehensive report in html format.
1728
- The report should be very detailed and lengthy.
1729
-
1730
- // Requirements
1731
- - All text alignment has to be on the left
1732
- - The report should be {pages} long or {word_count} words (excluding html formatting)
1733
- - It must include inline citations (e.g., [1], [2], etc.) from real sources provided in the search results below
1734
- Note: citations sources in-line need to be in this format: blablabla - Source [x] / "pdf" is not a source, provide the title or author
1735
- - No more than 10 sentences per div blocks, skip lines and add line breaks when changing topic.
1736
- - The report must include between {round(pages/10,0)} and {round(pages/5,0)} tables from the sources used (add citations if necessary) and use facts and figures extensively to ground the analysis.
1737
- - For the numbering of titles or numbered lists, use numbers (ex: 1.) and sub-units (1.1, 1.2... 1.1.1...,1.1.2...).
1738
- Note: Exclude the use of html numbered lists format, they don't get correctly implemented. Use plain text format for numbering of sections and sub-sections
1739
- - Do not put a numbered list (ex: 1.1, ...) for every sentences! It should be used parcimoniously for real sub-sections.
1740
- - Put paragraphs, sentences that are part of the same section in a div tag, this will be used for formatting.
1741
- - Add on top of the report the report title (with the <h1> tag) - this is the only part that should be centered (in-line style)
1742
- - Titles for sections and sub-sections should systematically use the tags:
1743
- <h1> for sections (ex: <h1>3. Examination of State-of-the-Art of AI</h1>)
1744
- <h2> for sub-sections (ex: <h2>3.2 AI Performance in Mathematics</h2>)
1745
- <h3> for sub-sub-sections (ex: <h3>3.2.1 Illustration with math conjecture demonstration</h3>)
1746
- <h4> for bulletpoint title (ex: <h4>item to detail:</h4> description of the item to detail ...)
1747
- - Use inline formatting for the tables with homogeneous border and colors
1748
- - Avoid Chinese characters in the output (use the Pinyin version) since they won't display correcly in the pdf (black boxes)
1749
- - For the Table of contents: do not mention the pages, but make each item on separate line
1750
- - Put "Table of contents" and "Abstract" title in h1 format.
1751
- - The Table of contents should skip the abstract and table of contents, the numbering should start from the introduction and end with References Summary Table
1752
- - Exceptionally - for sections requiring specific improvements - put it between <div class="improvable-chunk">...</div> (but don't mention it in the report, this will be managed through post-processing)
1753
-
1754
- // Reference citations
1755
- - The name of the reference table should be: "Reference Summary Table"
1756
- - The reference table at the end containing the citations details should have 4 columns: the ref number, the title of the document, the author(s, the URL - with hyperlink)
1757
- - The report MUST include a reference summary table with between 10 (for a 8 page report) and 30 references (for a 40 pages report). All inline citations (e.g., [1], [2], …) present in the report and in any focus placeholders MUST have a corresponding entry in this table with its full URL.
1758
- - For the reference citations, add systematically the urls from the Learnings (no need to put them in numbered list format since we alredy have the [x] that serves as number list)
1759
- - Do not add any inline citations reference in the visual and graph placeholders descriptions belo, you can add them in focus though.
1760
- - Do not make false references / citations. It has to be grounded from the sources in the rsearch results / crumbs below (no example.com/... type references!)
1761
- - The references / citations should be only coming from the most reputable sources amongst all the Learnings and Results from searches below
1762
- - The table generated should have in-line styling to have word-wrap and 100% width
1763
-
1764
- // Instructions:
1765
- 1. Integrate numbers from the sources but always mention the source
1766
- 2. Whenever you mention a figure or quote, add an inline reference [x] matching its source from the references.
1767
- 3. Again, Specifically name relevant organizations, tools, project names, and people encountered in the crumbs or learnings.
1768
- Note: This is for academic purposes, so thorough citations and referencing are essential.
1769
- 4. Focus on reputable sources that will not be disputed (generally social media posts cannot be an opposable sources, but some of them may mention reputable sources)
1770
- Note: put the full reference url (no generic domain address), down to the html page or the pdf
1771
-
1772
-
1773
- // Style
1774
- The report must follow this writing style {reportstyle}.
1775
-
1776
- // Format when mentioning sources, organisations and individuals
1777
- - We will perform a post-processing on the output
1778
- - For this reasons use this format for any specific name, organisation or project: {{[{{name}}]}}
1779
- example 1: {{[{{Organisation}}]}}'s CEO, {{[{{CEO name}}]}} ...
1780
- example 2: in a report from the {{[{{University name}}]}} titled "{{[{{report title}}]}}"...
1781
- example 3: the CEO of {{[{{Company name}}]}} , {{[{{Name}}]}}, said that "the best way to..."
1782
- eexample 4: the project {{[{{project name}}]}}, anounced by {{[{{...}}]}} in collaboration with {{[{{...}}]}}
1783
- example 5: Mr. {{[{{person}}]}}, Marketing director in {{[{{company}}]}}, mentioned that ...
1784
- Note: the output will be processed through regex and the identifiers removed, but this way we can keep track of all sources and citations without disclosing them.
1785
- - This should apply to names, people/titles, dates, papers, reports, organisation/institute/NGO/government bodies quotes, products, project names, ...
1786
- - You should have approximately {2 * pages} mention of organisations, people, projects or people, use the prescribed format
1787
- - The same item cannot be mentioned more than 3 times, don't over do it
1788
- - Do not mix sources that are not directly related in the search results, don't put together organisations or people that have nothing to do with each other
1789
- - DO NOT MENTION this formmatting requirement, just apply it. The user doesn't have to know about this technicality.
1790
- Note: LinkedIn is not a relevant source - if you want to use a source related to LinkedIn, you should check the author of the page visited, this is the real source, mention the name of the author as "'authorName' from LinkedIn Pulse"
1791
-
1792
- // Sources
1793
- Use the following learnings and merged reference details from a deep research process on:
1794
- '{initial_query}'
1795
-
1796
- Taking also into consideration the context:
1797
- {context}
1798
-
1799
- --------------- Placeholders -----------
1800
- In order to enrich the content, within the core sections (between introduction and conclusion), you can inject some placeholders that will be developped later on.
1801
- There are 3 types: visual, graphs, focus - each with their own purpose
1802
-
1803
- // Visual placeholders
1804
- - Create special visual placeholders that will be rendered in mermaid afterwards.
1805
- - The Visual placeholders should follow this format:
1806
-
1807
- Source:source_name [y]
1808
- [[Visual Placeholder n:
1809
- - Purpose of this visual is:...
1810
- - Relevant content to generate it:
1811
- o ex: arguments
1812
- o ex: lists of elements
1813
- o ex: data points
1814
- o ...
1815
- - Message to convey: ...
1816
- ]]
1817
-
1818
- with:
1819
- - n as the reference number,
1820
- - source_name as the full name of the main source used and
1821
- - y as the number ref of the source reference in the reference table.
1822
- Important note for visual placeholders:
1823
- - on the line before [[...]] mention the source with the reference number [x] in the form: ""Source: abc [n]" - only one source should be mentioned
1824
- - after [[ put "Visual Placeholder n:" explicitly (with n as the ref number of the placeholder box created). This will be used in a regex
1825
- - the only types of mermaid diagram that can be generated are: flowchart, sequence, gantt, pie, mindmap (no charts) // Take this into consideration when providing the instructions for the diagram
1826
- - do not make mention in the report to "visual placeholders" just mention the visual and the number..
1827
- - in the placeholder, no need to add the references to the source or its ref number, but make sure ALL of the data points required has a source from the learning and reference material hereafter
1828
- - these placeholders text should contain:
1829
- o the purpose of the future visual
1830
- o the relevant data to generate it
1831
- - there should be between {round(pages/10,0)} and {round(pages/5,0)} of these visuals placeholders within the report (all between introduction and conclusion)
1832
- - 2 visual placeholders cannot be in the same section
1833
- Note: the placeholders will then be processed separately by a llm to generate the specific code to display each of them so the instruction need to be clear enough.
1834
-
1835
- // Graph placeholders
1836
- - Create special graph placeholders that will be rendered in d3.js afterwards based on your guidance:
1837
-
1838
- Source:source_name [y]
1839
- [[Graph Placeholder n:
1840
- - Purpose of this graph is:...
1841
- - Relevant numbers to generate it:
1842
- table format
1843
- - Message to convey: ...
1844
- ]]
1845
-
1846
- with:
1847
- - n as the reference number,
1848
- - source_name as the full name of the main source used and
1849
- - y as the source reference in the reference table.
1850
- - the table containing all the required data has to include data points FROM the learnings / results from the search below
1851
- Important note for graph placeholders:
1852
- - on the line before [[...]] mention the source with the reference number [x] in the form: ""Source: abc [n]" - only one source should be mentioned
1853
- - use p tag for the source and source reference number
1854
- - after [[ put "Graph Placeholder n:" explicitly (with n as the ref number of the graph created). This will be used in a regex
1855
- - Do not make things up - every data points have to be from a real source
1856
- - All types of graphs (using d3.js library) can be generated // Take this into consideration when providing the instructions for the graph data
1857
- - do not make mention in the report to "graph placeholders" just mention graph.
1858
- - in the placeholder, no need to add the references to the source or its ref number, but make sure ALL of the data points required has a source from the learning and reference material hereafter
1859
- - these placeholders text should contain:
1860
- o the purpose of the future graph
1861
- o the relevant data to generate it
1862
- - there should be between {round(pages/10,0)} and {round(pages/5,0)} of these graphs placeholders within the report (all between introduction and conclusion)
1863
- - 2 graph placeholders cannot be in the same section
1864
- Note: the placeholders will then be processed separately by a llm to generate the specific code to display each of them so the instruction need to be clear enough.
1865
-
1866
- // Focus placeholders
1867
- - To drill down on specific topic that would be deserve to be developped extensively separately, create special focus placeholders in [[...]] double backets
1868
- Note: outside of the placeholder, do not make reference in the report to "focus placeholders" just mention the "Focus box n".
1869
- - in the Focus placeholder, make a mention to the prescribed sources used (no need to add the source before or after the placeholder)
1870
- - do not make the placeholder on the exact same topic as the section or the sub-section where it is positioned, it has to be either:
1871
- o a special case that deserves attention
1872
- o a recent development / innovation
1873
- o a theoretical drill-down
1874
- o a contrarian point of view / objection
1875
- - these placeholders text should contain:
1876
- o the purpose of the focus box
1877
- o the relevant data to generate it
1878
- o the guidance in terms of style and message to convey
1879
- Note: Be specific if you want some particular point developped, keep it consistent across the report.
1880
- - there should be between {round(pages/20,0)} and {round(pages/10,0)} of these focus placeholders within the report (all between introduction and conclusion)
1881
- - 2 focus placeholders cannot be in the same section and should be a few pages apart in the report
1882
- - Mention all the sources that should be used to generate this focus placeholder and list also the references that will be mentioned in the References section later (ex: [1], [2])
1883
- Note: the Focus placeholders will then be processed separately by a llm to generate the specific code to display each of them so the instruction need to be clear enough.
1884
-
1885
- // Format:
1886
- [[Focus Placeholder n:
1887
- - Topic of this focus:...
1888
- - Relevant info to generate it:...
1889
- - Specific angle of the focus placeholder:...
1890
- - Key elements to mention:
1891
- o ...
1892
- o ...
1893
- ...
1894
- ]]
1895
-
1896
- with:
1897
- - n as the reference number,
1898
-
1899
- Important note for focus placeholders:
1900
- - after [[ put "Focus Placeholder n:" explicitly (with n as the ref number of the focus box created). This will be used in a regex
1901
- - Do not add a title for the Focus placeholder just before the [[...]], the content that will replace the focus placeholder - generated later on - will already include a title
1902
 
1903
- // Report ending required
1904
- End the report with the following sequence:
 
 
 
1905
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1906
  <iframe class="visual-frame" srcdoc='
1907
  <!DOCTYPE html>
1908
  <html>
1909
- </head>
1910
  <body>
1911
  <div>
1912
  -end-
1913
  </div>
1914
  </body>
1915
  </html>' width="100px" height="15px" style="border:none;"></iframe>
1916
-
1917
- Then close the html code from the broader report
1918
- </body>
1919
  </html>
 
1920
 
1921
- // Structure the overall report as follows:
1922
-
1923
- {{Do not add anything before - no introductory meta comment or content}}
1924
-
1925
- - Abstract
1926
- - Table of contents
1927
- - Introduction
1928
- - [Sections and sub-sections, depending on the size and relevant topic - including visual, graph and focus placeholders]
1929
- - Conclusion
1930
- - References summary table
1931
- - Report ending formatting (as mentioned before)
1932
-
1933
- {{Do not add anything after - no conclusive meta comment or content}}
1934
-
1935
- Important note: placeholders (visual, graph or focus) can only appear in the sections or sub-sections not in introduction, the conclusion, the references or after the references
1936
-
1937
- // Material to use to ground your report:
1938
- - Learnings:
1939
- {json.dumps(learnings, indent=2)}
1940
-
1941
- - Results from searches:
1942
- {aggregated_crumbs}
1943
 
1944
- Take a deep breath, do your best.
1945
- Now, produce the report please.
1946
- """
1947
- )
1948
- tokentarget = word_count * 5 # rough multiplier for token target
1949
- report = openai_call(prompt, model="o3-mini", max_tokens_param=tokentarget)
1950
- # Post-processing
1951
- report = re.sub(r'\{\[\{(.*?)\}\]\}', r'\1', report)
1952
- report = re.sub(r'\[\{(.*?)\}\]', r'\1', report)
1953
-
1954
- # If the report is too long, compress it.
1955
- if len(report) > MAX_MESSAGE_LENGTH:
1956
- report = compress_text(report, MAX_MESSAGE_LENGTH)
1957
- if report.startswith("Error calling OpenAI API"):
1958
- logging.error(f"generate_final_report error: {report}")
1959
- return f"Error generating report: {report}"
1960
- logging.info("generate_final_report: Report generated successfully.")
1961
- return report
1962
 
1963
  def filter_search_results(results: list, visited_urls: set, query: str, clarifications: str) -> list:
1964
  # Filter out already seen results
 
1718
 
1719
  def generate_final_report(initial_query: str, context: str, reportstyle: str, learnings: list, visited_urls: list,
1720
  aggregated_crumbs: str, references: list, pages: int = 8) -> str:
1721
+ """
1722
+ Revised generate_final_report with placeholder allocation decisions in the initial JSON skeleton.
1723
+
1724
+ The function proceeds as follows:
1725
+
1726
+ 1. **Skeleton Generation:**
1727
+ It first builds a JSON skeleton outline for the report. For core sections,
1728
+ in addition to "section_name", "instructions", "target_word_count", and "key_content_elements",
1729
+ an extra field "placeholders" is generated. This field is an object with boolean values
1730
+ indicating whether to include a visual, graph, and/or focus placeholder.
1731
+
1732
+ The prompt instructs the LLM that, overall, the report should have:
1733
+ - Visual placeholders between ⌊pages/10⌋ and ⌈pages/5⌉ in total.
1734
+ - Graph placeholders in the same range as visual.
1735
+ - Focus placeholders between ⌊pages/20⌋ and ⌈pages/10⌉ in total.
1736
+ Not every section need have every placeholder.
1737
+
1738
+ 2. **Section Generation:**
1739
+ Each core section is generated using a detailed prompt that incorporates the respective
1740
+ placeholder decisions along with context, initial query, report style, learnings, aggregated crumbs,
1741
+ and previously generated sections.
1742
+
1743
+ 3. **Final Sections Generation and Assembly:**
1744
+ The final sections (Introduction, Abstract, Conclusion, Reference Summary Table) are generated afterward
1745
+ and a Table of Contents is created from all section titles. Finally, all parts are assembled into a complete
1746
+ HTML document.
1747
+
1748
+ 4. **Placeholder Replacement:**
1749
+ Once the HTML report is assembled, the placeholder markers (e.g. [[Visual Placeholder: …]]) are replaced
1750
+ via the dedicated replacement functions.
1751
+ """
1752
+ import json, logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1753
 
1754
+ # Calculate overall target word count (approximate)
1755
+ total_word_count = pages * 500
1756
+ combined_learnings = "\n".join(learnings) if learnings else (
1757
+ "No external summaries were directly extracted. It is not possible to analyze relevance."
1758
+ )
1759
 
1760
+ # Step 1: Generate the JSON skeleton outline with explicit placeholder allocation decisions.
1761
+ prompt_skeleton = f"""
1762
+ You are a master technical editor.
1763
+ Produce a detailed JSON skeleton outline for a comprehensive academic research report titled "{initial_query}".
1764
+ The overall report should be approximately {total_word_count} words long.
1765
+ Divide the report into two groups:
1766
+
1767
+ 1. "core_sections": These are the main content sections that address key sub-topics drawn from the context, research learnings, and search results.
1768
+ - There should be between 4 and 6 core sections. Their combined target word count should be about 70% of the total (approximately {int(0.7 * total_word_count)} words).
1769
+ - For each core section, provide:
1770
+ • "section_name": A concise title.
1771
+ • "instructions": Detailed guidelines on which sub-topics, facts, and arguments to cover.
1772
+ • "target_word_count": An approximate desired word count for that section.
1773
+ • "key_content_elements": An array of 3 to 5 bullet points that must be mentioned.
1774
+ • "placeholders": An object indicating which placeholder types to include.
1775
+ - Include "visual": true or false.
1776
+ - Include "graph": true or false.
1777
+ - Include "focus": true or false.
1778
+ **Overall guidance**: Across all core sections, the total number of visual placeholders should be between ⌊{pages}/10⌋ and ⌈{pages}/5⌉, graph placeholders should follow the same rule, and focus placeholders should appear between ⌊{pages}/20⌋ and ⌈{pages}/10⌉. Decide per section which placeholder(s) to activate, ensuring that not every section receives all three.
1779
+
1780
+ 2. "final_sections": These sections frame the report and include:
1781
+ - "Introduction"
1782
+ - "Abstract"
1783
+ - "Conclusion"
1784
+ - "Reference Summary Table"
1785
+ The combined target word count for final sections should be about 30% of the total (approximately {int(0.3 * total_word_count)} words),
1786
+ distributed evenly among them.
1787
+
1788
+ Return only valid JSON with two keys: "core_sections" and "final_sections", with no additional commentary.
1789
+ """
1790
+ skeleton_response = openai_call(
1791
+ prompt=prompt_skeleton,
1792
+ model="o3-mini",
1793
+ max_tokens_param=1500,
1794
+ temperature=0
1795
+ )
1796
+ try:
1797
+ skeleton = json.loads(skeleton_response)
1798
+ except Exception as e:
1799
+ logging.error(f"Error parsing skeleton JSON: {e}")
1800
+ skeleton = {"core_sections": [], "final_sections": []}
1801
+
1802
+ # Step 2: Generate content for each core section sequentially.
1803
+ generated_core_sections = {}
1804
+ previous_sections_content = ""
1805
+ if "core_sections" in skeleton:
1806
+ for section in skeleton["core_sections"]:
1807
+ section_name = section.get("section_name", "Untitled Section")
1808
+ instructions = section.get("instructions", "")
1809
+ target_wc = section.get("target_word_count", 500)
1810
+ key_elements = section.get("key_content_elements", [])
1811
+ placeholders = section.get("placeholders", {})
1812
+ # Build a placeholder directive based on the allocated booleans.
1813
+ placeholder_directive = ""
1814
+ if placeholders.get("visual", False):
1815
+ placeholder_directive += "[[Visual Placeholder: Insert one visual here.]]\n"
1816
+ if placeholders.get("graph", False):
1817
+ placeholder_directive += "[[Graph Placeholder: Insert one graph here.]]\n"
1818
+ if placeholders.get("focus", False):
1819
+ placeholder_directive += "[[Focus Placeholder: Insert one focus box here if deeper analysis is needed.]]\n"
1820
+
1821
+ prompt_section = f"""
1822
+ You are an expert technical editor.
1823
+ Generate detailed HTML content for the research report section titled "{section_name}".
1824
+ Instructions: {instructions}
1825
+ Target word count: Approximately {target_wc} words.
1826
+ Key content elements to include: {", ".join(key_elements)}.
1827
+ Additionally, please embed the following placeholder directives exactly where appropriate in the content:
1828
+ {placeholder_directive if placeholder_directive else "No placeholders required for this section."}
1829
+ Context: {context}
1830
+ Initial Query: {initial_query}
1831
+ Report Style: {reportstyle}
1832
+ Learnings: {combined_learnings}
1833
+ Aggregated Search Results: {aggregated_crumbs}
1834
+ Previously generated sections (if any): {previous_sections_content}
1835
+
1836
+ Return only the HTML content for this section (do not include outer <html> or <body> tags).
1837
+ """
1838
+ section_content = openai_call(
1839
+ prompt=prompt_section,
1840
+ model="o3-mini",
1841
+ max_tokens_param=target_wc * 5,
1842
+ temperature=0
1843
+ )
1844
+ section_content = section_content.strip()
1845
+ generated_core_sections[section_name] = section_content
1846
+ # Accumulate current section's content into a shared context for continuity.
1847
+ previous_sections_content += f"\n<!-- {section_name} -->\n" + section_content
1848
+
1849
+ # Step 3: Generate final sections (Introduction, Abstract, Conclusion, Reference Summary Table).
1850
+ generated_final_sections = {}
1851
+ if "final_sections" in skeleton:
1852
+ for section in skeleton["final_sections"]:
1853
+ section_name = section.get("section_name", "Untitled Final Section")
1854
+ instructions = section.get("instructions", "")
1855
+ target_wc = section.get("target_word_count", 500)
1856
+ prompt_final = f"""
1857
+ You are a master technical editor.
1858
+ Generate detailed HTML content for the final section titled "{section_name}".
1859
+ Instructions: {instructions}
1860
+ Target word count: Approximately {target_wc} words.
1861
+ Context: {context}
1862
+ Initial Query: {initial_query}
1863
+ Report Style: {reportstyle}
1864
+ Learnings: {combined_learnings}
1865
+ Aggregated Search Results: {aggregated_crumbs}
1866
+ Previously generated core sections: {previous_sections_content}
1867
+
1868
+ Return only the HTML content for this section (do not include outer <html> or <body> tags).
1869
+ """
1870
+ final_section_content = openai_call(
1871
+ prompt=prompt_final,
1872
+ model="o3-mini",
1873
+ max_tokens_param=target_wc * 5,
1874
+ temperature=0
1875
+ )
1876
+ final_section_content = final_section_content.strip()
1877
+ generated_final_sections[section_name] = final_section_content
1878
+ previous_sections_content += f"\n<!-- {section_name} -->\n" + final_section_content
1879
+
1880
+ # Step 4: Generate a Table of Contents from the section titles.
1881
+ toc_titles = []
1882
+ for section in skeleton.get("core_sections", []):
1883
+ if "section_name" in section:
1884
+ toc_titles.append(section["section_name"])
1885
+ for section in skeleton.get("final_sections", []):
1886
+ if "section_name" in section:
1887
+ toc_titles.append(section["section_name"])
1888
+ prompt_toc = f"""
1889
+ You are a technical editor.
1890
+ Based on the following list of section titles: {', '.join(toc_titles)},
1891
+ generate a concise HTML snippet for a Table of Contents,
1892
+ with each item appearing on a separate line and numbered (e.g., "1. Section Title", "2. Section Title", etc.).
1893
+ Return only the HTML snippet for the Table of Contents without additional commentary.
1894
+ """
1895
+ toc_html = openai_call(
1896
+ prompt=prompt_toc,
1897
+ model="o3-mini",
1898
+ max_tokens_param=500,
1899
+ temperature=0
1900
+ ).strip()
1901
+
1902
+ # Step 5: Assemble the final HTML document.
1903
+ final_report_html = f"""<html>
1904
+ <head>
1905
+ <meta charset="utf-8" />
1906
+ <meta name="viewport" content="width=device-width, initial-scale=1">
1907
+ <style>
1908
+ body {{
1909
+ font-family: Arial, sans-serif;
1910
+ margin: 20px;
1911
+ padding: 0;
1912
+ background-color: #ffffff;
1913
+ }}
1914
+ h1 {{
1915
+ text-align: center;
1916
+ margin-bottom: 20px;
1917
+ }}
1918
+ h2 {{
1919
+ text-align: left;
1920
+ margin-top: 20px;
1921
+ margin-bottom: 10px;
1922
+ }}
1923
+ .section {{
1924
+ margin-bottom: 30px;
1925
+ }}
1926
+ .toc {{
1927
+ margin: 20px 0;
1928
+ border: 1px solid #ccc;
1929
+ padding: 10px;
1930
+ }}
1931
+ </style>
1932
+ </head>
1933
+ <body>
1934
+ <!-- Report Title -->
1935
+ <h1>{initial_query}</h1>
1936
+ <!-- Table of Contents -->
1937
+ <div class="toc">
1938
+ {toc_html}
1939
+ </div>
1940
+ """
1941
+ # Append core sections.
1942
+ for section in skeleton.get("core_sections", []):
1943
+ section_name = section.get("section_name", "Untitled Section")
1944
+ content = generated_core_sections.get(section_name, "")
1945
+ final_report_html += f"""<div class="section">
1946
+ <h2>{section_name}</h2>
1947
+ {content}
1948
+ </div>
1949
+ """
1950
+ # Append final sections.
1951
+ for section in skeleton.get("final_sections", []):
1952
+ section_name = section.get("section_name", "Untitled Final Section")
1953
+ content = generated_final_sections.get(section_name, "")
1954
+ final_report_html += f"""<div class="section">
1955
+ <h2>{section_name}</h2>
1956
+ {content}
1957
+ </div>
1958
+ """
1959
+ # Append a designated report ending marker.
1960
+ final_report_html += """
1961
  <iframe class="visual-frame" srcdoc='
1962
  <!DOCTYPE html>
1963
  <html>
1964
+ <head></head>
1965
  <body>
1966
  <div>
1967
  -end-
1968
  </div>
1969
  </body>
1970
  </html>' width="100px" height="15px" style="border:none;"></iframe>
1971
+ </body>
 
 
1972
  </html>
1973
+ """
1974
 
1975
+ # Step 6: Process the raw HTML to replace placeholder markers with actual placeholder code.
1976
+ final_report_html = replace_visual_placeholders(final_report_html, context, initial_query, aggregated_crumbs)
1977
+ final_report_html = replace_graph_placeholders(final_report_html, context, initial_query, aggregated_crumbs)
1978
+ final_report_html = replace_focus_placeholders(final_report_html, context, initial_query, aggregated_crumbs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1979
 
1980
+ logging.info("generate_final_report: Report generated successfully with integrated placeholder allocation decisions.")
1981
+ return final_report_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1982
 
1983
  def filter_search_results(results: list, visited_urls: set, query: str, clarifications: str) -> list:
1984
  # Filter out already seen results