Spaces:

10gen
/

deepsearchitv2

Running

App Files Files Community

Guiyom commited on Mar 3, 2025

Commit

cf000a0

verified ·

1 Parent(s): a76f63d

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -117

app.py CHANGED Viewed

@@ -84,15 +84,6 @@ def clean_llm_response(response: str) -> str:
     # Collapse multiple spaces into one.
     cleaned = re.sub(r'\s+', ' ', cleaned)
-    # Optionally, if you suspect unescaped quotes in the content,
-    # you might try to protect them. For example, if the improved field contains inner double quotes,
-    # ensure they are properly escaped. This can be a bit tricky because you want to preserve valid escapes.
-    #
-    # Example (if needed):
-    # cleaned = cleaned.replace('\"', '\\\"')
-    #
-    # But be cautious: too many replacements may ruin valid escapes.
     return cleaned.strip()
 def snippet_in_tag(tag: Tag, snippet: str) -> bool:
@@ -113,15 +104,6 @@ def snippet_in_tag(tag: Tag, snippet: str) -> bool:
     return False
 def expand_snippet_area(soup: BeautifulSoup, snippet: str) -> Tag:
-    """
-    Given a BeautifulSoup object and a snippet of text, this function finds the element that contains the snippet.
-    It then uses an iterative while loop to traverse upward (from the immediate parent to the top)
-    until the highest level <iframe> is reached or (if no <iframe> is present) until a <div> or <table> is
-    encountered—the first allowed container (<div> or <table>) found is used. If neither is found,
-    it returns the candidate element itself.
-    Logging is provided at each key step.
-    """
     allowed_tags = {"div", "table"}
     logging.info("Searching for all elements containing the snippet: '%s'", snippet)
@@ -173,8 +155,8 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
     # Step 1: (LLM call to get unique strings) ...
     # [Assume this part remains unchanged and unique_strings is obtained]
-    prompt_identify = (
-        f"""You are a meticulous technical editor.
 Below is the full report HTML and a user adjustment request.
 Extract one or more unique plain-text string(s) (without any HTML tags or formatting) that uniquely appear in the area(s) targeted by the adjustment request.
@@ -189,9 +171,9 @@ Extract one or more unique plain-text string(s) (without any HTML tags or format
 Output them in a JSON object with the key "identified_unique_strings" mapped to a list of strings.
 Ensure these strings exactly match the content in the report.
-Note: if the unique string is from within a code snippet (ex: javascript graph or a mermaid code), don't use the code as snippet,
 For example instead of "A[Fundamental AI Research - Emerging Theories and Paradigms] --&gt; B[Algorithm Innovation - Novel ML and NLP Models]"
-Rather use "Fundamental AI Research - Emerging Theories and Paradigms"
 This would make it easier to find it
 Full Report HTML:
@@ -234,8 +216,8 @@ Only output valid JSON."""
         logging.info("fine_tune_report: Found container for unique string adjustment:\n\n%s\n", original_container_html)
         # Step 3: Call the LLM to adjust this container.
-        prompt_adjust = (
-            f"""You are a technical editor.
 Given the following HTML container (with its outer tags) extracted from a larger report and based on the user adjustment request,
 produce a corrected version by making only the necessary changes. Preserve inline citations, formatting, and context.
 The updated version will be put back in the exact same location and must have the same outer tags.
@@ -261,7 +243,7 @@ Output a JSON object with exactly two keys:
 - "improved" (the corrected container's full HTML) and
 - "summary" (a brief explanation of the changes)
-Only output valid JSON."""
         )
         response_adjust = openai_call(prompt=prompt_adjust, model="o3-mini", max_tokens_param=2000, temperature=0.0)
@@ -290,11 +272,13 @@ Only output valid JSON."""
     # (Step 5 and Step 6 remain as before to update the reference table and the QA log)
-    prompt_refs = (
-        f"You are a technical editor. Review the following updated report HTML. "
-        f"If any new inline citations (e.g., [x]) have been introduced that are not in the original reference table, "
-        f"generate an updated Reference Summary Table as valid HTML. Output only the updated reference table HTML with no explanations.\n\n"
-        f"Updated Report HTML:\n{updated_report_html}"
     )
     updated_refs = openai_call(prompt=prompt_refs, model="o3-mini", max_tokens_param=1000, temperature=0.5)
     updated_refs = updated_refs.strip().strip("```")
@@ -329,17 +313,24 @@ def suggest_improvements(report_html: str, openai_api_key: str, serpapi_api_key:
     os.environ["OPENAI_API_KEY"] = openai_api_key
     os.environ["SERPAPI_API_KEY"] = serpapi_api_key
-    prompt = (
-        "You are a technical editor. Based on the following full HTML report, generate exactly 10 proposed improvement suggestions. "
-        "Format each proposal as a numbered list item in the following style:\n"
-        "1) in the section xyz, adjust ...\n"
-        "2) after the paragraph abc, detail the graph further ...\n"
-        "3) in the focus placeholder xxx, add a mention about ...\n"
-        "4) make a reference to ... in the section 3.2\n"
-        "...\n"
-        "10) final improvement suggestion...\n"
-        "Only output the suggestions exactly as a numbered list.\n\n"
-        f"Full Report HTML:\n{report_html}"
     )
     suggestions = openai_call(prompt=prompt, model="o3-mini", max_tokens_param=1000, temperature=0.5)
     return suggestions.strip().strip("```").strip()
@@ -366,7 +357,7 @@ def improve_report_from_chat(user_message: str, chat_history: list, report_text:
     chat_history.append([user_message, answer])
     return chat_history, "", updated_report
-def send_chat_message(user_message, openai_api_key, serpapi_api_key, chat_history, report_text, crumbs_text):
     os.environ["OPENAI_API_KEY"] = openai_api_key
     os.environ["SERPAPI_API_KEY"] = serpapi_api_key
@@ -376,15 +367,21 @@ def send_chat_message(user_message, openai_api_key, serpapi_api_key, chat_histor
     if "http://" in user_message or "https://" in user_message:
         answer = handle_link_request(user_message)
     else:
-        system_prompt = f"""You are a knowledgeable research assistant. Based on the following report:
 {report_text}
-Source Crumbs:
 {crumbs_text}
-User Question:
 {user_message}
 Your Answer:"""
         answer = openai_call(prompt=system_prompt, model="o3-mini", max_tokens_param=10000)
     updated_history = chat_history + [[user_message, answer]]
@@ -1420,8 +1417,10 @@ def summarize_large_text(text: str, target_length: int, chunk_size: int = 1000,
     words = text.split()
     if len(words) <= chunk_size:
         # If the text is short, simply return it (or you could call a simple summarization)
-        return text
     chunks = []
     i = 0
     while i < len(words):
@@ -1436,20 +1435,8 @@ def summarize_large_text(text: str, target_length: int, chunk_size: int = 1000,
         chunk_prompt = (f"""
 Summarize the following text, preserving all key details and ensuring that any tables or structured data are also summarized:
 {chunk}
-// Mentioning sources, organisations and individuals
-- We will perform a post-processing on the output
-- For this reasons use this format for any specific name, organisation or project: {{[{{name}}]}}
-ex1: {{[{{Google}}]}} CEO, {{[{{Sundar Pichai}}]}} ...
-ex2: in a report from the {{[{{university of Berkeley}}]}} titled "{{[{{The great acceleration}}]}}"...
-ex3: the CEO of {{[{{Softbank}}]}} , {{[{{Masayoshi Son}}]}}, said that "the best way to..."
-ex4: the project {{[{{Stargate}}]}}, anounced by {{[{{OpenAI}}]}} in collaboration with {{[{{Salesforce}}]}}
-ex5: Mr. {{[{{Michael Parrot}}]}}, Marketing director in {{[{{Panasonic}}]}}, mentioned that ...
-Note: the output will be processed through regex and the identifiers removed, but this way we can keep track of all sources and citations without disclosing them.
-- This should apply to names, people/titles, dates, papers, reports, organisation/institute/NGO/government bodies quotes, products, project names, ...
-- You should have approximately 10 mention of organisations, people, projects or people, use the prescribed format
-- DO NOT MENTION this formmatting requirement, just apply it. The user doesn't have to know about this technicality.
-Note: LinkedIn is not a source - if you want to use a source related to LinkedIn, you should check the author of the page visited, this is the real source, mention the name of the author as "'authorName' from LinkedIn Pulse"
 """
         )
         summary_chunk = openai_call(prompt=chunk_prompt, model="gpt-4o-mini", max_tokens_param=500, temperature=0.7)
@@ -1465,20 +1452,8 @@ Note: LinkedIn is not a source - if you want to use a source related to LinkedIn
     final_prompt = (f"""
 Combine the following summaries into one concise summary that preserves all critical details, including any relevant table or structured data:
 {combined_summary}
-// Mentioning sources, organisations and individuals
-- We will perform a post-processing on the output
-- For this reasons use this format for any specific name, organisation or project: {{[{{name}}]}}
-ex1: {{[{{Google}}]}} CEO, {{[{{Sundar Pichai}}]}} ...
-ex2: in a report from the {{[{{university of Berkeley}}]}} titled "{{[{{The great acceleration}}]}}"...
-ex3: the CEO of {{[{{Softbank}}]}} , {{[{{Masayoshi Son}}]}}, said that "the best way to..."
-ex4: the project {{[{{Stargate}}]}}, anounced by {{[{{OpenAI}}]}} in collaboration with {{[{{Salesforce}}]}}
-ex5: Mr. {{[{{Michael Parrot}}]}}, Marketing director in {{[{{Panasonic}}]}}, mentioned that ...
-Note: the output will be processed through regex and the identifiers removed, but this way we can keep track of all sources and citations without disclosing them.
-- This should apply to names, people/titles, dates, papers, reports, organisation/institute/NGO/government bodies quotes, products, project names, ...
-- You should have approximately 10 mention of organisations, people, projects or people, use the prescribed format
-- DO NOT MENTION this formmatting requirement, just apply it. The user doesn't have to know about this technicality.
-Note: LinkedIn is not a source - if you want to use a source related to LinkedIn, you should check the author of the page visited, this is the real source, mention the name of the author as "'authorName' from LinkedIn Pulse"
 """
     )
     final_summary = openai_call(prompt=final_prompt, model="gpt-4o-mini", max_tokens_param=target_length, temperature=0.7)
@@ -1502,7 +1477,8 @@ def analyze_with_gpt4o(query: str, snippet: str, breadth: int, temperature: floa
     client = os.getenv('OPENAI_API_KEY')  # alternatively, pass your API key here if needed.
-    prompt = (f"""Analyze the following content from a query result:
 {snippet}
@@ -1713,49 +1689,19 @@ def generate_final_report(initial_query: str, context: str, reportstyle: str, le
     combined_learnings = "\n".join(learnings) if learnings else fallback_text
     word_count = pages * 500
     prompt = (f"""
-// Instructions:
-1. Integrate numbers from the sources but always mention the source
-2. Whenever you mention a figure or quote, add an inline reference [x] matching its source from the references.
-3. Again, Specifically name relevant organizations, tools, project names, and people encountered in the crumbs or learnings.
-Note: This is for academic purposes, so thorough citations and referencing are essential.
-4. Focus on reputable sources that will not be disputed (generally social media posts cannot be an opposable sources, but some of them may mention reputable sources)
-Note: put the full reference url (no generic domain address), down to the html page or the pdf
-5. It must follow this writing style {reportstyle}.
-// Mentioning sources, organisations and individuals
-- We will perform a post-processing on the output
-- For this reasons use this format for any specific name, organisation or project: {{[{{name}}]}}
-ex1: {{[{{Google}}]}} CEO, {{[{{Sundar Pichai}}]}} ...
-ex2: in a report from the {{[{{university of Berkeley}}]}} titled "{{[{{The great acceleration}}]}}"...
-ex3: the CEO of {{[{{Softbank}}]}} , {{[{{Masayoshi Son}}]}}, said that "the best way to..."
-ex4: the project {{[{{Stargate}}]}}, anounced by {{[{{OpenAI}}]}} in collaboration with {{[{{Salesforce}}]}}
-ex5: Mr. {{[{{Michael Parrot}}]}}, Marketing director in {{[{{Panasonic}}]}}, mentioned that ...
-Note: the output will be processed through regex and the identifiers removed, but this way we can keep track of all sources and citations without disclosing them.
-- This should apply to names, people/titles, dates, papers, reports, organisation/institute/NGO/government bodies quotes, products, project names, ...
-- You should have approximately {10 * pages} mention of organisations, people, projects or people, use the prescribed format
-- DO NOT MENTION this formmatting requirement, just apply it. The user doesn't have to know about this technicality.
-Note: LinkedIn is not a source - if you want to use a source related to LinkedIn, you should check the author of the page visited, this is the real source, mention the name of the author as "'authorName' from LinkedIn Pulse"
-// Sources
-Use the following learnings and merged reference details from a deep research process on:
-'{initial_query}'
-Taking also into consideration the context:
-{context}
-Produce a comprehensive research report in html format.
-The report should be very detailed and lengthy — approximately the equivalent of {pages} pages (or {word_count} words) when printed.
-For sections requiring specific improvements, put it in <div class="improvable-chunk">...</div> (but don't mention it in the report, this will be managed through post-processing)
 // Requirements
 - All text alignment has to be on the left
 - It must include inline citations (e.g., [1], [2], etc.) from real sources provided in the search results below
 Note: citations sources in-line need to be in this format: blablabla - Source [x] / "pdf" is not a source, provide the title or author
-- No more than 7 sentences per div blocks, skip lines and add line breaks when changing topic.
 - The report must include between {round(pages/10,0)} and {round(pages/5,0)} tables from the sources used (add citations if necessary) and use facts and figures extensively to ground the analysis.
 - For the numbering of titles or numbered lists, use numbers (ex: 1.) and sub-units (1.1, 1.2... 1.1.1...,1.1.2...).
 Note: Exclude the use of html numbered lists format, they don't get correctly implemented. Use plain text format for numbering of sections and sub-sections
 - Put paragraphs, sentences that are part of the same section in a div tag, this will be used for formatting.
-- Text Alignment has to be to the left, including for the titles
 - Add on top of the report the report title (with the <h1> tag) - this is the only part that should be centered (in-line style)
 - Titles for sections and sub-sections should systematically use the tags:
   <h1> for sections (ex: 3. Examination of State-of-the-Art of AI)
@@ -1766,18 +1712,52 @@ Note: Exclude the use of html numbered lists format, they don't get correctly im
 - Avoid Chinese characters in the output (use the Pinyin version) since they won't display correcly in the pdf (black boxes)
 - For the Table of contents: do not mention the pages, but make each item on separate line
 - Put "Table of contents" and "Abstract" title in h1 format.
-- The Table of contents should not mention the abstract and table of contents, the numbering should start from the introduction and end with References Summary Table
 // Reference citations
 - The name of the reference table should be: "Reference Summary Table"
 - The reference table at the end containing the citations details should have 4 columns: the ref number, the title of the document, the author(s, the URL - with hyperlink)
 - The report MUST include a reference summary table with between 10 (for a 8 page report) and 30 references (for a 40 pages report). All inline citations (e.g., [1], [2], …) present in the report and in any focus placeholders MUST have a corresponding entry in this table with its full URL.
 - For the reference citations, add systematically the urls from the Learnings (no need to put them in numbered list format since we alredy have the [x] that serves as number list)
-- Do not add any inline citations reference in the visual and graph placeholders descriptions below, you can add them in focus though.
 - Do not make false references / citations. It has to be grounded from the sources in the rsearch results / crumbs below (no example.com/... type references!)
 - The references / citations should be only coming from the most reputable sources amongst all the Learnings and Results from searches below
 - The table generated should have in-line styling to have word-wrap and 100% width
 --------------- Placeholders -----------
 In order to enrich the content, within the core sections (between introduction and conclusion), you can inject some placeholders that will be developped later on.
 There are 3 types: visual, graphs, focus - each with their own purpose
@@ -1888,7 +1868,7 @@ Then close the html code from the broader report
 </body>
 </html>
-// Structure of the overall report as follows:
 {{Do not add anything before - no introductory meta comment or content}}
@@ -1897,7 +1877,7 @@ Then close the html code from the broader report
 - Introduction
 - [Sections and sub-sections, depending on the size and relevant topic - including visual, graph and focus placeholders]
 - Conclusion
-- References of the documents used in the inline citations
 - Report ending formatting (as mentioned before)
 {{Do not add anything after - no conclusive meta comment or content}}
@@ -1911,8 +1891,8 @@ Important note: placeholders (visual, graph or focus) can only appear in the sec
 - Results from searches:
 {aggregated_crumbs}
 """
     )
     tokentarget = word_count * 5  # rough multiplier for token target
@@ -3055,7 +3035,7 @@ def main():
         send_button.click(
             fn=send_chat_message,
-            inputs=[chat_input, openai_api_key_input, serpapi_api_key_input, chatbot, final_report, crumbs_box],
             outputs=[chatbot, chat_input, final_report]
         )

     # Collapse multiple spaces into one.
     cleaned = re.sub(r'\s+', ' ', cleaned)
     return cleaned.strip()
 def snippet_in_tag(tag: Tag, snippet: str) -> bool:
     return False
 def expand_snippet_area(soup: BeautifulSoup, snippet: str) -> Tag:
     allowed_tags = {"div", "table"}
     logging.info("Searching for all elements containing the snippet: '%s'", snippet)
     # Step 1: (LLM call to get unique strings) ...
     # [Assume this part remains unchanged and unique_strings is obtained]
+    prompt_identify = (f"""
+You are a meticulous technical editor.
 Below is the full report HTML and a user adjustment request.
 Extract one or more unique plain-text string(s) (without any HTML tags or formatting) that uniquely appear in the area(s) targeted by the adjustment request.
 Output them in a JSON object with the key "identified_unique_strings" mapped to a list of strings.
 Ensure these strings exactly match the content in the report.
+Note: if the unique string is from within a code snippet (ex: javascript graph or a mermaid code), don't use the code as part of the snippet,
 For example instead of "A[Fundamental AI Research - Emerging Theories and Paradigms] --&gt; B[Algorithm Innovation - Novel ML and NLP Models]"
+Simply use "Fundamental AI Research - Emerging Theories and Paradigms"
 This would make it easier to find it
 Full Report HTML:
         logging.info("fine_tune_report: Found container for unique string adjustment:\n\n%s\n", original_container_html)
         # Step 3: Call the LLM to adjust this container.
+        prompt_adjust = (f"""
+You are a technical editor.
 Given the following HTML container (with its outer tags) extracted from a larger report and based on the user adjustment request,
 produce a corrected version by making only the necessary changes. Preserve inline citations, formatting, and context.
 The updated version will be put back in the exact same location and must have the same outer tags.
 - "improved" (the corrected container's full HTML) and
 - "summary" (a brief explanation of the changes)
+Only output valid JSON with no comments or code fences."""
         )
         response_adjust = openai_call(prompt=prompt_adjust, model="o3-mini", max_tokens_param=2000, temperature=0.0)
     # (Step 5 and Step 6 remain as before to update the reference table and the QA log)
+    prompt_refs = (f"""
+You are a technical editor.
+Review the following updated report HTML.
+If any new inline citations (e.g., [x]) have been introduced that are not in the original reference table,
+generate an updated Reference Summary Table as valid HTML. Output only the updated reference table HTML with no explanations.
+Updated Report HTML:\n{updated_report_html}"""
     )
     updated_refs = openai_call(prompt=prompt_refs, model="o3-mini", max_tokens_param=1000, temperature=0.5)
     updated_refs = updated_refs.strip().strip("```")
     os.environ["OPENAI_API_KEY"] = openai_api_key
     os.environ["SERPAPI_API_KEY"] = serpapi_api_key
+    prompt = (f"""
+You are a technical editor.
+Based on the following full HTML report, generate improvement suggestions - at least 3."
+Format each proposal as a numbered list item in the following style:\n"
+Examples:
+1) in the section xyz, adjust ...
+2) after the paragraph abc, detail the graph further ...
+3) in the focus placeholder xxx, add a mention about ...
+4) make a reference to ... in the section 3.2
+...
+n) final improvement suggestion...
+Only output the suggestions exactly as a numbered list (text)
+Full Report HTML:
+{report_html}
+Now provide your suggestions."""
     )
     suggestions = openai_call(prompt=prompt, model="o3-mini", max_tokens_param=1000, temperature=0.5)
     return suggestions.strip().strip("```").strip()
     chat_history.append([user_message, answer])
     return chat_history, "", updated_report
+def send_chat_message(user_message, openai_api_key, serpapi_api_key, chat_history, report_text, crumbs_text, style):
     os.environ["OPENAI_API_KEY"] = openai_api_key
     os.environ["SERPAPI_API_KEY"] = serpapi_api_key
     if "http://" in user_message or "https://" in user_message:
         answer = handle_link_request(user_message)
     else:
+        system_prompt = f"""
+You are a knowledgeable research assistant.
+Based on the following
+- Report:
 {report_text}
+- Source Crumbs:
 {crumbs_text}
+- User Question:
 {user_message}
+Provide a response in the desired style: {style}
 Your Answer:"""
         answer = openai_call(prompt=system_prompt, model="o3-mini", max_tokens_param=10000)
     updated_history = chat_history + [[user_message, answer]]
     words = text.split()
     if len(words) <= chunk_size:
         # If the text is short, simply return it (or you could call a simple summarization)
+        if len(words) < 500:
+            return "Not a coherent text or not worth processing - discard."
+        else
+            return text
     chunks = []
     i = 0
     while i < len(words):
         chunk_prompt = (f"""
 Summarize the following text, preserving all key details and ensuring that any tables or structured data are also summarized:
 {chunk}
+Maintain the original sources.
+Keep all mentions of names, people/titles, dates, papers, reports, organisation/institute/NGO/government bodies quotes, products, project names, ...
 """
         )
         summary_chunk = openai_call(prompt=chunk_prompt, model="gpt-4o-mini", max_tokens_param=500, temperature=0.7)
     final_prompt = (f"""
 Combine the following summaries into one concise summary that preserves all critical details, including any relevant table or structured data:
 {combined_summary}
+Maintain the original sources.
+Keep all mentions of names, people/titles, dates, papers, reports, organisation/institute/NGO/government bodies quotes, products, project names, ...
 """
     )
     final_summary = openai_call(prompt=final_prompt, model="gpt-4o-mini", max_tokens_param=target_length, temperature=0.7)
     client = os.getenv('OPENAI_API_KEY')  # alternatively, pass your API key here if needed.
+    prompt = (f"""
+Analyze the following content from a query result:
 {snippet}
     combined_learnings = "\n".join(learnings) if learnings else fallback_text
     word_count = pages * 500
     prompt = (f"""
+Produce a comprehensive report in html format.
+The report should be very detailed and lengthy.
 // Requirements
 - All text alignment has to be on the left
+- The report should be {pages} long or {word_count} words (excluding html formatting)
 - It must include inline citations (e.g., [1], [2], etc.) from real sources provided in the search results below
 Note: citations sources in-line need to be in this format: blablabla - Source [x] / "pdf" is not a source, provide the title or author
+- No more than 10 sentences per div blocks, skip lines and add line breaks when changing topic.
 - The report must include between {round(pages/10,0)} and {round(pages/5,0)} tables from the sources used (add citations if necessary) and use facts and figures extensively to ground the analysis.
 - For the numbering of titles or numbered lists, use numbers (ex: 1.) and sub-units (1.1, 1.2... 1.1.1...,1.1.2...).
 Note: Exclude the use of html numbered lists format, they don't get correctly implemented. Use plain text format for numbering of sections and sub-sections
 - Put paragraphs, sentences that are part of the same section in a div tag, this will be used for formatting.
 - Add on top of the report the report title (with the <h1> tag) - this is the only part that should be centered (in-line style)
 - Titles for sections and sub-sections should systematically use the tags:
   <h1> for sections (ex: 3. Examination of State-of-the-Art of AI)
 - Avoid Chinese characters in the output (use the Pinyin version) since they won't display correcly in the pdf (black boxes)
 - For the Table of contents: do not mention the pages, but make each item on separate line
 - Put "Table of contents" and "Abstract" title in h1 format.
+- The Table of contents should skip the abstract and table of contents, the numbering should start from the introduction and end with References Summary Table
+- For sections requiring specific improvements, put it in <div class="improvable-chunk">...</div> (but don't mention it in the report, this will be managed through post-processing)
 // Reference citations
 - The name of the reference table should be: "Reference Summary Table"
 - The reference table at the end containing the citations details should have 4 columns: the ref number, the title of the document, the author(s, the URL - with hyperlink)
 - The report MUST include a reference summary table with between 10 (for a 8 page report) and 30 references (for a 40 pages report). All inline citations (e.g., [1], [2], …) present in the report and in any focus placeholders MUST have a corresponding entry in this table with its full URL.
 - For the reference citations, add systematically the urls from the Learnings (no need to put them in numbered list format since we alredy have the [x] that serves as number list)
+- Do not add any inline citations reference in the visual and graph placeholders descriptions belo, you can add them in focus though.
 - Do not make false references / citations. It has to be grounded from the sources in the rsearch results / crumbs below (no example.com/... type references!)
 - The references / citations should be only coming from the most reputable sources amongst all the Learnings and Results from searches below
 - The table generated should have in-line styling to have word-wrap and 100% width
+// Instructions:
+1. Integrate numbers from the sources but always mention the source
+2. Whenever you mention a figure or quote, add an inline reference [x] matching its source from the references.
+3. Again, Specifically name relevant organizations, tools, project names, and people encountered in the crumbs or learnings.
+Note: This is for academic purposes, so thorough citations and referencing are essential.
+4. Focus on reputable sources that will not be disputed (generally social media posts cannot be an opposable sources, but some of them may mention reputable sources)
+Note: put the full reference url (no generic domain address), down to the html page or the pdf
+// Style
+The report must follow this writing style {reportstyle}.
+// Format when mentioning sources, organisations and individuals
+- We will perform a post-processing on the output
+- For this reasons use this format for any specific name, organisation or project: {{[{{name}}]}}
+ex1: {{[{{Google}}]}} CEO, {{[{{Sundar Pichai}}]}} ...
+ex2: in a report from the {{[{{university of Berkeley}}]}} titled "{{[{{The great acceleration}}]}}"...
+ex3: the CEO of {{[{{Softbank}}]}} , {{[{{Masayoshi Son}}]}}, said that "the best way to..."
+ex4: the project {{[{{Stargate}}]}}, anounced by {{[{{OpenAI}}]}} in collaboration with {{[{{Salesforce}}]}}
+ex5: Mr. {{[{{Michael Parrot}}]}}, Marketing director in {{[{{Panasonic}}]}}, mentioned that ...
+Note: the output will be processed through regex and the identifiers removed, but this way we can keep track of all sources and citations without disclosing them.
+- This should apply to names, people/titles, dates, papers, reports, organisation/institute/NGO/government bodies quotes, products, project names, ...
+- You should have approximately {10 * pages} mention of organisations, people, projects or people, use the prescribed format
+- DO NOT MENTION this formmatting requirement, just apply it. The user doesn't have to know about this technicality.
+Note: LinkedIn is not a source - if you want to use a source related to LinkedIn, you should check the author of the page visited, this is the real source, mention the name of the author as "'authorName' from LinkedIn Pulse"
+// Sources
+Use the following learnings and merged reference details from a deep research process on:
+'{initial_query}'
+Taking also into consideration the context:
+{context}
 --------------- Placeholders -----------
 In order to enrich the content, within the core sections (between introduction and conclusion), you can inject some placeholders that will be developped later on.
 There are 3 types: visual, graphs, focus - each with their own purpose
 </body>
 </html>
+// Structure the overall report as follows:
 {{Do not add anything before - no introductory meta comment or content}}
 - Introduction
 - [Sections and sub-sections, depending on the size and relevant topic - including visual, graph and focus placeholders]
 - Conclusion
+- References summary table
 - Report ending formatting (as mentioned before)
 {{Do not add anything after - no conclusive meta comment or content}}
 - Results from searches:
 {aggregated_crumbs}
+Take a deep breath, do your best.
+Now, produce the report please.
 """
     )
     tokentarget = word_count * 5  # rough multiplier for token target
         send_button.click(
             fn=send_chat_message,
+            inputs=[chat_input, openai_api_key_input, serpapi_api_key_input, chatbot, final_report, crumbs_box, reportstyle],
             outputs=[chatbot, chat_input, final_report]
         )