Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,8 @@ import tempfile
|
|
| 12 |
import logging
|
| 13 |
import markdown
|
| 14 |
import unicodedata
|
|
|
|
|
|
|
| 15 |
from datetime import datetime
|
| 16 |
from reportlab.lib.pagesizes import A4
|
| 17 |
from xhtml2pdf import pisa
|
|
@@ -27,7 +29,7 @@ HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/
|
|
| 27 |
"(KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"}
|
| 28 |
|
| 29 |
# =============================================================================
|
| 30 |
-
# Helper functions for external APIs
|
| 31 |
# =============================================================================
|
| 32 |
|
| 33 |
def display_image():
|
|
@@ -176,32 +178,24 @@ def generate_final_report(initial_query: str, reportstyle: str, learnings: list,
|
|
| 176 |
prompt = (f"""
|
| 177 |
Using the following learnings and merged reference details from a deep research process on '{initial_query}', produce a comprehensive research report in Markdown format.
|
| 178 |
The report should be very detailed and lengthy — approximately the equivalent of {pages} pages (or {word_count} words) when printed.
|
| 179 |
-
It must include inline citations (e.g., [1], [2], etc.).
|
| 180 |
-
|
| 181 |
-
The
|
| 182 |
-
|
| 183 |
-
The structure of the report should be:
|
| 184 |
- Abstract
|
| 185 |
- Table of contents
|
| 186 |
- Introduction
|
| 187 |
-
- [Sections and sub-sections
|
| 188 |
- Conclusion
|
| 189 |
-
- References
|
| 190 |
-
|
| 191 |
-
Important: For the numbering of titles or numbered lists, use numbers (ex: 1.) and sub-units (1.1, 1.2... 1.1.1...,1.1.2...). This is to avoid issues when converting markdown to html.
|
| 192 |
-
You should still use markdown for the stryling (titles levels, bold, italic), tables...
|
| 193 |
-
|
| 194 |
-
Output the report directly without any introductory meta comments.
|
| 195 |
|
|
|
|
| 196 |
Learnings:
|
| 197 |
{json.dumps(learnings, indent=2)}
|
| 198 |
-
|
| 199 |
Merged Reference Details:
|
| 200 |
{aggregated_crumbs}"""
|
| 201 |
)
|
| 202 |
tokentarget = word_count * 3 # rough multiplier for token target
|
| 203 |
report = openai_call(prompt, model="o3-mini", max_tokens_param=tokentarget)
|
| 204 |
-
# If the report is too long, compress it.
|
| 205 |
if len(report) > MAX_MESSAGE_LENGTH:
|
| 206 |
report = compress_text(report, MAX_MESSAGE_LENGTH)
|
| 207 |
if report.startswith("Error calling OpenAI API"):
|
|
@@ -211,17 +205,21 @@ Merged Reference Details:
|
|
| 211 |
return report
|
| 212 |
|
| 213 |
def filter_search_results(results: list, visited_urls: set, query: str, clarifications: str) -> list:
|
| 214 |
-
# Filter out already seen results
|
| 215 |
new_results = []
|
| 216 |
candidate_indexes = []
|
|
|
|
| 217 |
for idx, res in enumerate(results):
|
| 218 |
url = res.get("link", "")
|
| 219 |
if url and url not in visited_urls:
|
|
|
|
|
|
|
|
|
|
| 220 |
new_results.append(res)
|
| 221 |
candidate_indexes.append(idx)
|
|
|
|
| 222 |
if not new_results:
|
| 223 |
return []
|
| 224 |
-
# Build the prompt with relaxed criteria.
|
| 225 |
results_text = ""
|
| 226 |
for idx, res in enumerate(new_results):
|
| 227 |
title = res.get("title", "No Title")
|
|
@@ -231,25 +229,18 @@ def filter_search_results(results: list, visited_urls: set, query: str, clarific
|
|
| 231 |
prompt = (
|
| 232 |
f"The following search results were obtained for the query '{query}' with clarifications:\n"
|
| 233 |
f"{clarifications}\n\n"
|
| 234 |
-
"For each result, decide
|
| 235 |
-
"
|
| 236 |
-
"Return your decision as a JSON object where each key is the result index (as an integer) and the value is either 'yes' or 'no'. "
|
| 237 |
-
"For example: {\"0\": \"yes\", \"1\": \"no\", \"2\": \"yes\"}.\n"
|
| 238 |
-
"Consider the title, snippet, and URL in your decision."
|
| 239 |
-
f"\nResults:{results_text}\n"
|
| 240 |
-
"Output only the JSON object."
|
| 241 |
)
|
| 242 |
llm_response = openai_call(prompt, model="gpt-4o-mini", max_tokens_param=200)
|
| 243 |
try:
|
| 244 |
decision_map = json.loads(llm_response)
|
| 245 |
except Exception as e:
|
| 246 |
logging.error(f"filter_search_results: JSON decode error: {e}; Full response: {llm_response}")
|
| 247 |
-
# In case of error, default to no results selected.
|
| 248 |
decision_map = {}
|
| 249 |
filtered = []
|
| 250 |
for idx, res in enumerate(new_results):
|
| 251 |
url = res.get("link", "")
|
| 252 |
-
# Add each URL to visited regardless of decision.
|
| 253 |
visited_urls.add(url)
|
| 254 |
decision = decision_map.get(str(idx), "no").strip().lower()
|
| 255 |
if decision == "yes":
|
|
@@ -258,12 +249,11 @@ def filter_search_results(results: list, visited_urls: set, query: str, clarific
|
|
| 258 |
return filtered
|
| 259 |
|
| 260 |
def make_multilingual_query(query: str, context: str, languagesdetected: str) -> str:
|
| 261 |
-
finalquery = f"({query})" # original query
|
| 262 |
languages_detected_list = languagesdetected.split(",")
|
| 263 |
for lang in languages_detected_list:
|
| 264 |
prompt2 = f"""The research query is: "{query}".
|
| 265 |
-
Based on this query and context: "{context}", and
|
| 266 |
-
The translation must be less than 20 words and preserve search operators like AND, OR, parenthesis, quotation marks, and exclusion hyphens.
|
| 267 |
Output only the translated query."""
|
| 268 |
translatedquery = openai_call(prompt2, model="gpt-4o-mini", max_tokens_param=50)
|
| 269 |
finalquery += f" OR ({translatedquery})"
|
|
@@ -271,11 +261,14 @@ Output only the translated query."""
|
|
| 271 |
return finalquery
|
| 272 |
|
| 273 |
def generate_query_tree(initial_query: str, breadth: int, depth: int) -> list:
|
|
|
|
| 274 |
base_terms = initial_query.strip()
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
| 279 |
logging.info(f"generate_query_tree: Generated queries: {final_queries}")
|
| 280 |
return final_queries
|
| 281 |
|
|
@@ -283,13 +276,11 @@ def generate_serp_queries(context: str, breadth: int, depth: int, initial_query:
|
|
| 283 |
selected_engines=None, results_per_query: int = 10) -> list:
|
| 284 |
queries = generate_query_tree(initial_query, breadth, depth)
|
| 285 |
prompt = f"""The research query is: "{initial_query}".
|
| 286 |
-
Based on
|
| 287 |
Output either:
|
| 288 |
- "No local attributes detected"
|
| 289 |
-
-
|
| 290 |
-
|
| 291 |
-
Output only the result.
|
| 292 |
-
"""
|
| 293 |
languages_detected = openai_call(prompt, model="gpt-4o-mini", max_tokens_param=20)
|
| 294 |
if languages_detected != "No local attributes detected":
|
| 295 |
queries = [make_multilingual_query(q, context, languages_detected) for q in queries]
|
|
@@ -297,17 +288,12 @@ Output only the result.
|
|
| 297 |
prompt_engines = f"""
|
| 298 |
Examine these queries:
|
| 299 |
{queries}
|
| 300 |
-
|
| 301 |
{context}
|
| 302 |
-
Identify among these search engines:
|
| 303 |
-
google
|
| 304 |
-
Which are most relevant? Output a comma separated list (e.g., "google,baidu").
|
| 305 |
-
If none are found, output "google".
|
| 306 |
-
"""
|
| 307 |
identified_engines = openai_call(prompt_engines, model="gpt-4o-mini", max_tokens_param=20)
|
| 308 |
selected_engines = identified_engines.split(",")
|
| 309 |
-
else:
|
| 310 |
-
selected_engines = selected_engines
|
| 311 |
final_queries = []
|
| 312 |
for q in queries:
|
| 313 |
for engine in selected_engines:
|
|
@@ -393,6 +379,23 @@ def refine_query(query: str, openai_api_key: str) -> str:
|
|
| 393 |
logging.info(f"refine_query: Refined query: {refined}")
|
| 394 |
return refined
|
| 395 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
class ReportGenerator:
|
| 397 |
def __init__(self):
|
| 398 |
pass
|
|
@@ -403,9 +406,9 @@ class ReportGenerator:
|
|
| 403 |
solution_content = re.sub(r'[\u2010\u2011\u2012\u2013\u2014\u2015]', "-", solution_content)
|
| 404 |
# Remove markdown hyperlink syntax: replace [text](link) with just text.
|
| 405 |
solution_content = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', solution_content)
|
| 406 |
-
# Convert markdown to HTML using the "extra" and "tables" extensions
|
| 407 |
html_content = markdown.markdown(solution_content, extensions=['extra', 'tables'])
|
| 408 |
-
# Insert explicit page breaks before
|
| 409 |
html_content = html_content.replace("<h2>Table of Contents</h2>",
|
| 410 |
"<div style='page-break-before: always;'></div><h2>Table of Contents</h2>")
|
| 411 |
html_content = html_content.replace("<h2>Introduction</h2>",
|
|
@@ -414,10 +417,8 @@ class ReportGenerator:
|
|
| 414 |
"<div style='page-break-before: always;'></div><h2>Conclusion</h2>")
|
| 415 |
html_content = html_content.replace("<h2>References</h2>",
|
| 416 |
"<div style='page-break-before: always;'></div><h2>References</h2>")
|
| 417 |
-
# For the Surprise-Me section, ensure it starts on a new page.
|
| 418 |
html_content = html_content.replace("<h2>Surprise-Me Extension Report</h2>",
|
| 419 |
"<div style='page-break-before: always;'></div><h2>Surprise-Me Extension Report</h2>")
|
| 420 |
-
# Build header using metadata if provided.
|
| 421 |
date_str = datetime.now().strftime("%Y-%m-%d")
|
| 422 |
header = ""
|
| 423 |
if metadata:
|
|
@@ -425,33 +426,21 @@ class ReportGenerator:
|
|
| 425 |
<p>Author: {metadata.get('User name', 'N/A')}</p>
|
| 426 |
<p>Date: {metadata.get('Date', date_str)}</p>
|
| 427 |
<hr/>"""
|
| 428 |
-
# Build a complete HTML document with CSS.
|
| 429 |
full_html = f"""
|
| 430 |
<html>
|
| 431 |
<head>
|
| 432 |
<meta charset="utf-8" />
|
| 433 |
<style>
|
| 434 |
-
body {{ font-family: Helvetica, sans-serif; margin: 40px; }}
|
| 435 |
h1 {{ font-size: 24pt; margin-bottom: 12px; }}
|
| 436 |
h2 {{ font-size: 20pt; margin-bottom: 10px; }}
|
| 437 |
h3 {{ font-size: 18pt; margin-bottom: 8px; }}
|
| 438 |
p {{ font-size: 11pt; line-height: 1.5; margin-bottom: 10px; }}
|
| 439 |
-
ol {{ font-size: 11pt; margin-left: 20px;
|
| 440 |
-
ul {{ font-size: 11pt; margin-left: 20px; margin-top: 0; margin-bottom: 10px; line-height: 1.5; }}
|
| 441 |
hr {{ border: 1px solid #ccc; margin: 20px 0; }}
|
| 442 |
-
table {{
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
margin-bottom: 10px;
|
| 446 |
-
}}
|
| 447 |
-
th, td {{
|
| 448 |
-
border: 1px solid #ccc;
|
| 449 |
-
padding: 8px;
|
| 450 |
-
text-align: left;
|
| 451 |
-
}}
|
| 452 |
-
th {{
|
| 453 |
-
background-color: #f2f2f2;
|
| 454 |
-
}}
|
| 455 |
</style>
|
| 456 |
</head>
|
| 457 |
<body>
|
|
@@ -460,7 +449,6 @@ class ReportGenerator:
|
|
| 460 |
</body>
|
| 461 |
</html>
|
| 462 |
"""
|
| 463 |
-
# Generate PDF from HTML using xhtml2pdf (pisa)
|
| 464 |
pdf_buffer = io.BytesIO()
|
| 465 |
pisa_status = pisa.CreatePDF(full_html, dest=pdf_buffer)
|
| 466 |
if pisa_status.err:
|
|
@@ -481,7 +469,6 @@ def handle_generate_report(query_name: str, user_name: str, final_report: str):
|
|
| 481 |
final_report = compress_text(final_report, MAX_MESSAGE_LENGTH)
|
| 482 |
pdf_bytes = report_generator.generate_report_pdf_html(solution_content=final_report,
|
| 483 |
metadata=metadata)
|
| 484 |
-
# Create a temporary file for PDF download
|
| 485 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
|
| 486 |
tmp_file.write(pdf_bytes)
|
| 487 |
tmp_path = tmp_file.name
|
|
@@ -492,10 +479,6 @@ def handle_generate_report(query_name: str, user_name: str, final_report: str):
|
|
| 492 |
return f"Error generating report: {str(e)}", None
|
| 493 |
|
| 494 |
def extract_summary_from_crumbs(crumbs_list: list) -> str:
|
| 495 |
-
"""
|
| 496 |
-
Given a list of crumb records (each with 'url', 'summary', and 'full_content'),
|
| 497 |
-
extract and aggregate only the summary parts.
|
| 498 |
-
"""
|
| 499 |
aggregated = "\n".join([f"URL: {c['url']}\nSummary: {c['summary']}" for c in crumbs_list])
|
| 500 |
logging.info("extract_summary_from_crumbs: Aggregated crumb summary created.")
|
| 501 |
return aggregated
|
|
@@ -514,16 +497,12 @@ def generate_surprise_report(previous_report: str, crumbs_list: list, initial_qu
|
|
| 514 |
"Formulate this as a new research query that could lead to innovative insights.")
|
| 515 |
disruptive_query = openai_call(new_prompt, model="gpt-4o-mini", max_tokens_param=500)
|
| 516 |
logging.info(f"generate_surprise_report: Disruptive new query generated: {disruptive_query}")
|
| 517 |
-
|
| 518 |
-
# Generate tailored clarification questions for the disruptive query
|
| 519 |
clarifications_for_new = generate_tailored_questions(
|
| 520 |
os.getenv("OPENAI_API_KEY"),
|
| 521 |
-
disruptive_query + "\n\n IMPORTANT NOTE: in this
|
| 522 |
"", "", "", ""
|
| 523 |
)
|
| 524 |
logging.info(f"generate_surprise_report: Clarification questions for new query: {clarifications_for_new}")
|
| 525 |
-
|
| 526 |
-
# Run iterative deep research for the disruptive query
|
| 527 |
generator = iterative_deep_research_gen(
|
| 528 |
disruptive_query, reportstyle, breadth, depth, followup_clarifications,
|
| 529 |
include_domains, exclude_keywords, additional_clarifications,
|
|
@@ -538,7 +517,8 @@ def generate_surprise_report(previous_report: str, crumbs_list: list, initial_qu
|
|
| 538 |
appended_report = previous_report + "\n\n<div style='page-break-before: always;'></div>\n<h2>Surprise-Me Extension Report</h2>\n\n" + clarifications_for_new + "\n\n" + extension_report
|
| 539 |
return appended_report
|
| 540 |
|
| 541 |
-
|
|
|
|
| 542 |
followup_clarifications: str,
|
| 543 |
include_domains: str,
|
| 544 |
exclude_keywords: str,
|
|
@@ -558,90 +538,93 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
|
|
| 558 |
references_list = []
|
| 559 |
followup_suggestions = []
|
| 560 |
logging.info("iterative_deep_research_gen: Research started.")
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
mod_query += f"
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
if url.lower().endswith(".pdf"):
|
| 595 |
-
content = process_pdf(url)
|
| 596 |
-
if "Error processing PDF" in content:
|
| 597 |
continue
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
analysis_summary = content[:200] + "..." if len(content) > 200 else content
|
| 614 |
-
crumbs_list.append({
|
| 615 |
-
"url": url,
|
| 616 |
-
"summary": analysis_summary,
|
| 617 |
-
"full_content": content
|
| 618 |
-
})
|
| 619 |
-
if analysis.get("relevant", "no").lower() == "yes":
|
| 620 |
-
if url.startswith("http://") or url.startswith("https://"):
|
| 621 |
-
link_str = f" <a href='{url}'>[{ref_counter}]</a>"
|
| 622 |
else:
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
|
| 646 |
def assess_report_alignment(report: str, initial_query: str, clarifications: str) -> str:
|
| 647 |
prompt = (
|
|
@@ -649,21 +632,21 @@ def assess_report_alignment(report: str, initial_query: str, clarifications: str
|
|
| 649 |
"and the clarification Q&A provided. Ensure that the report covers key points of the topic.\n\n"
|
| 650 |
"Initial Query: " + initial_query + "\nClarifications: " + clarifications + "\n\n"
|
| 651 |
"Research Report:\n" + report + "\n\n"
|
| 652 |
-
"Provide a short
|
| 653 |
)
|
| 654 |
assessment = openai_call(prompt, model="gpt-3.5-turbo", max_tokens_param=200)
|
| 655 |
logging.info(f"assess_report_alignment: Assessment result: {assessment}")
|
| 656 |
return assessment
|
| 657 |
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
|
|
|
| 663 |
if not openai_api_key or not serpapi_api_key:
|
| 664 |
-
logging.error("
|
| 665 |
-
return "Please input valid API keys", "", "", ""
|
| 666 |
-
|
| 667 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
| 668 |
os.environ["SERPAPI_API_KEY"] = serpapi_api_key
|
| 669 |
|
|
@@ -675,50 +658,42 @@ def run_deep_research(openai_api_key: str, serpapi_api_key: str, initial_query:
|
|
| 675 |
if existing_crumbs:
|
| 676 |
extra_context += f"Existing Crumbs:\n{existing_crumbs}\n"
|
| 677 |
|
| 678 |
-
|
|
|
|
|
|
|
|
|
|
| 679 |
final_report = ""
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
logging.info("run_deep_research: Starting deep research process.")
|
| 683 |
-
for progress, rep, proc_log, crumbs in iterative_deep_research_gen(
|
| 684 |
-
initial_query, reportstyle, breadth, depth, followup_clarifications,
|
| 685 |
-
include_domains, exclude_keywords, additional_clarifications,
|
| 686 |
-
extra_context, selected_engines, results_per_query, go_deeper=int(pages)):
|
| 687 |
if rep is None:
|
| 688 |
-
|
| 689 |
-
|
| 690 |
else:
|
| 691 |
final_report = rep
|
| 692 |
-
|
| 693 |
-
final_crumbs = crumbs
|
| 694 |
break
|
| 695 |
if surprise_me:
|
| 696 |
-
extended_report = generate_surprise_report(
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
results_per_query, selected_engines
|
| 700 |
-
)
|
| 701 |
final_report = extended_report
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
except Exception as e:
|
| 720 |
-
logging.error(f"load_example: Error loading {filename}: {e}")
|
| 721 |
-
return ""
|
| 722 |
|
| 723 |
def main():
|
| 724 |
custom_css = """
|
|
@@ -772,16 +747,16 @@ def main():
|
|
| 772 |
openai_api_key_input = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API Key here...", type="password")
|
| 773 |
serpapi_api_key_input = gr.Textbox(label="SERPAPI API Key", placeholder="Enter your SERPAPI API Key here...", type="password")
|
| 774 |
gr.Markdown("[Create OpenAI API Key](https://platform.openai.com/account/api-keys) | [Create SERPAPI API Key](https://serpapi.com/manage-api-key)")
|
| 775 |
-
gr.Markdown("
|
| 776 |
|
| 777 |
-
with gr.Accordion
|
| 778 |
with gr.Row():
|
| 779 |
research_query = gr.Textbox(label="Research Query", placeholder="Enter your research query here...", lines=2, elem_id="research-query", scale=4)
|
| 780 |
refine_query_button = gr.Button("Refine my Query", scale=1)
|
| 781 |
|
| 782 |
with gr.Accordion("3] Q&A", open=False):
|
| 783 |
with gr.Row():
|
| 784 |
-
clarification_text = gr.Textbox(label="Clarification / Follow-Up Questions", placeholder="Tailored clarifying suggestions will appear here...", lines=6, scale
|
| 785 |
gen_followups = gr.Button("Generate Tailored Clarification Questions", scale=1)
|
| 786 |
|
| 787 |
with gr.Accordion("4] Search Parameters", open=False):
|
|
@@ -822,7 +797,7 @@ def main():
|
|
| 822 |
with gr.Accordion("5] Report", open=False, elem_classes="folder"):
|
| 823 |
progress_display = gr.Markdown("", elem_id="progress-display")
|
| 824 |
run_btn = gr.Button("Generate report")
|
| 825 |
-
final_report = gr.Markdown(label="Final Report (Markdown)", height
|
| 826 |
with gr.Accordion("Generate PDF", open=False, elem_classes="folder"):
|
| 827 |
with gr.Column():
|
| 828 |
query_name = gr.Textbox(label="Query name", placeholder="Enter query name...", lines=1)
|
|
|
|
| 12 |
import logging
|
| 13 |
import markdown
|
| 14 |
import unicodedata
|
| 15 |
+
import asyncio
|
| 16 |
+
import aiohttp
|
| 17 |
from datetime import datetime
|
| 18 |
from reportlab.lib.pagesizes import A4
|
| 19 |
from xhtml2pdf import pisa
|
|
|
|
| 29 |
"(KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"}
|
| 30 |
|
| 31 |
# =============================================================================
|
| 32 |
+
# Helper functions for external APIs, PDF Processing and Asynchronous Requests
|
| 33 |
# =============================================================================
|
| 34 |
|
| 35 |
def display_image():
|
|
|
|
| 178 |
prompt = (f"""
|
| 179 |
Using the following learnings and merged reference details from a deep research process on '{initial_query}', produce a comprehensive research report in Markdown format.
|
| 180 |
The report should be very detailed and lengthy — approximately the equivalent of {pages} pages (or {word_count} words) when printed.
|
| 181 |
+
It must include inline citations (e.g., [1], [2], etc.) and follow this writing style: {reportstyle}.
|
| 182 |
+
Include at least {round(pages/3,0)} tables from the sources used (citations added if necessary).
|
| 183 |
+
The structure should have:
|
|
|
|
|
|
|
| 184 |
- Abstract
|
| 185 |
- Table of contents
|
| 186 |
- Introduction
|
| 187 |
+
- [Sections and sub-sections as needed]
|
| 188 |
- Conclusion
|
| 189 |
+
- References
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
+
Important: Number titles and lists as 1., 1.1, etc.
|
| 192 |
Learnings:
|
| 193 |
{json.dumps(learnings, indent=2)}
|
|
|
|
| 194 |
Merged Reference Details:
|
| 195 |
{aggregated_crumbs}"""
|
| 196 |
)
|
| 197 |
tokentarget = word_count * 3 # rough multiplier for token target
|
| 198 |
report = openai_call(prompt, model="o3-mini", max_tokens_param=tokentarget)
|
|
|
|
| 199 |
if len(report) > MAX_MESSAGE_LENGTH:
|
| 200 |
report = compress_text(report, MAX_MESSAGE_LENGTH)
|
| 201 |
if report.startswith("Error calling OpenAI API"):
|
|
|
|
| 205 |
return report
|
| 206 |
|
| 207 |
def filter_search_results(results: list, visited_urls: set, query: str, clarifications: str) -> list:
|
| 208 |
+
# Filter out already seen results by URL and domain (robust deduplication)
|
| 209 |
new_results = []
|
| 210 |
candidate_indexes = []
|
| 211 |
+
seen_domains = set()
|
| 212 |
for idx, res in enumerate(results):
|
| 213 |
url = res.get("link", "")
|
| 214 |
if url and url not in visited_urls:
|
| 215 |
+
domain = url.split("/")[2] if "://" in url else url
|
| 216 |
+
if domain in seen_domains:
|
| 217 |
+
continue
|
| 218 |
new_results.append(res)
|
| 219 |
candidate_indexes.append(idx)
|
| 220 |
+
seen_domains.add(domain)
|
| 221 |
if not new_results:
|
| 222 |
return []
|
|
|
|
| 223 |
results_text = ""
|
| 224 |
for idx, res in enumerate(new_results):
|
| 225 |
title = res.get("title", "No Title")
|
|
|
|
| 229 |
prompt = (
|
| 230 |
f"The following search results were obtained for the query '{query}' with clarifications:\n"
|
| 231 |
f"{clarifications}\n\n"
|
| 232 |
+
"For each result, decide if it might be relevant for deeper research. Return a JSON object with keys as result indices and values as 'yes' or 'no'.\n"
|
| 233 |
+
f"Results:{results_text}\nOutput only the JSON object."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
)
|
| 235 |
llm_response = openai_call(prompt, model="gpt-4o-mini", max_tokens_param=200)
|
| 236 |
try:
|
| 237 |
decision_map = json.loads(llm_response)
|
| 238 |
except Exception as e:
|
| 239 |
logging.error(f"filter_search_results: JSON decode error: {e}; Full response: {llm_response}")
|
|
|
|
| 240 |
decision_map = {}
|
| 241 |
filtered = []
|
| 242 |
for idx, res in enumerate(new_results):
|
| 243 |
url = res.get("link", "")
|
|
|
|
| 244 |
visited_urls.add(url)
|
| 245 |
decision = decision_map.get(str(idx), "no").strip().lower()
|
| 246 |
if decision == "yes":
|
|
|
|
| 249 |
return filtered
|
| 250 |
|
| 251 |
def make_multilingual_query(query: str, context: str, languagesdetected: str) -> str:
|
| 252 |
+
finalquery = f"({query})" # original query in parentheses
|
| 253 |
languages_detected_list = languagesdetected.split(",")
|
| 254 |
for lang in languages_detected_list:
|
| 255 |
prompt2 = f"""The research query is: "{query}".
|
| 256 |
+
Based on this query and context: "{context}", and using the detected language {lang}, provide a translated version (less than 20 words) preserving search operators.
|
|
|
|
| 257 |
Output only the translated query."""
|
| 258 |
translatedquery = openai_call(prompt2, model="gpt-4o-mini", max_tokens_param=50)
|
| 259 |
finalquery += f" OR ({translatedquery})"
|
|
|
|
| 261 |
return finalquery
|
| 262 |
|
| 263 |
def generate_query_tree(initial_query: str, breadth: int, depth: int) -> list:
|
| 264 |
+
# Generate several variants of the query based on the desired breadth.
|
| 265 |
base_terms = initial_query.strip()
|
| 266 |
+
variants = [base_terms,
|
| 267 |
+
base_terms + " detailed analysis",
|
| 268 |
+
base_terms + " review",
|
| 269 |
+
base_terms + " case study"]
|
| 270 |
+
# Return only as many as needed (up to 'breadth')
|
| 271 |
+
final_queries = variants[:min(len(variants), breadth)]
|
| 272 |
logging.info(f"generate_query_tree: Generated queries: {final_queries}")
|
| 273 |
return final_queries
|
| 274 |
|
|
|
|
| 276 |
selected_engines=None, results_per_query: int = 10) -> list:
|
| 277 |
queries = generate_query_tree(initial_query, breadth, depth)
|
| 278 |
prompt = f"""The research query is: "{initial_query}".
|
| 279 |
+
Based on the context: "{context}", suggest non-English languages (if any) relevant.
|
| 280 |
Output either:
|
| 281 |
- "No local attributes detected"
|
| 282 |
+
- A comma-separated list (e.g., "Spanish,Italian")
|
| 283 |
+
Output only the result."""
|
|
|
|
|
|
|
| 284 |
languages_detected = openai_call(prompt, model="gpt-4o-mini", max_tokens_param=20)
|
| 285 |
if languages_detected != "No local attributes detected":
|
| 286 |
queries = [make_multilingual_query(q, context, languages_detected) for q in queries]
|
|
|
|
| 288 |
prompt_engines = f"""
|
| 289 |
Examine these queries:
|
| 290 |
{queries}
|
| 291 |
+
Considering the context:
|
| 292 |
{context}
|
| 293 |
+
Identify among these search engines: google,google_jobs_listing,google_trends,google_news,google_scholar,google_ai_overview,bing,bing_news,baidu,baidu_news,yandex,youtube_video,linkedin,linkedin_profile,duckduckgo_news,yelp_reviews.
|
| 294 |
+
Return a comma separated list (default "google" if none)."""
|
|
|
|
|
|
|
|
|
|
| 295 |
identified_engines = openai_call(prompt_engines, model="gpt-4o-mini", max_tokens_param=20)
|
| 296 |
selected_engines = identified_engines.split(",")
|
|
|
|
|
|
|
| 297 |
final_queries = []
|
| 298 |
for q in queries:
|
| 299 |
for engine in selected_engines:
|
|
|
|
| 379 |
logging.info(f"refine_query: Refined query: {refined}")
|
| 380 |
return refined
|
| 381 |
|
| 382 |
+
# --- New Asynchronous Helper for Parallel URL Fetching --- #
|
| 383 |
+
async def async_fetch_url(session: aiohttp.ClientSession, url: str) -> str:
|
| 384 |
+
"""Fetch the URL asynchronously using aiohttp."""
|
| 385 |
+
try:
|
| 386 |
+
async with session.get(url, headers=HEADERS, timeout=10) as response:
|
| 387 |
+
response.raise_for_status()
|
| 388 |
+
text = await response.text()
|
| 389 |
+
logging.info(f"async_fetch_url: Fetched content from {url}")
|
| 390 |
+
return text
|
| 391 |
+
except Exception as e:
|
| 392 |
+
logging.error(f"async_fetch_url: Error retrieving content from {url}: {e}")
|
| 393 |
+
return ""
|
| 394 |
+
|
| 395 |
+
# =============================================================================
|
| 396 |
+
# ReportGenerator and PDF generation (Enhanced CSS added)
|
| 397 |
+
# =============================================================================
|
| 398 |
+
|
| 399 |
class ReportGenerator:
|
| 400 |
def __init__(self):
|
| 401 |
pass
|
|
|
|
| 406 |
solution_content = re.sub(r'[\u2010\u2011\u2012\u2013\u2014\u2015]', "-", solution_content)
|
| 407 |
# Remove markdown hyperlink syntax: replace [text](link) with just text.
|
| 408 |
solution_content = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', solution_content)
|
| 409 |
+
# Convert markdown to HTML using the "extra" and "tables" extensions.
|
| 410 |
html_content = markdown.markdown(solution_content, extensions=['extra', 'tables'])
|
| 411 |
+
# Insert explicit page breaks before key headings (with added CSS for dynamic styling).
|
| 412 |
html_content = html_content.replace("<h2>Table of Contents</h2>",
|
| 413 |
"<div style='page-break-before: always;'></div><h2>Table of Contents</h2>")
|
| 414 |
html_content = html_content.replace("<h2>Introduction</h2>",
|
|
|
|
| 417 |
"<div style='page-break-before: always;'></div><h2>Conclusion</h2>")
|
| 418 |
html_content = html_content.replace("<h2>References</h2>",
|
| 419 |
"<div style='page-break-before: always;'></div><h2>References</h2>")
|
|
|
|
| 420 |
html_content = html_content.replace("<h2>Surprise-Me Extension Report</h2>",
|
| 421 |
"<div style='page-break-before: always;'></div><h2>Surprise-Me Extension Report</h2>")
|
|
|
|
| 422 |
date_str = datetime.now().strftime("%Y-%m-%d")
|
| 423 |
header = ""
|
| 424 |
if metadata:
|
|
|
|
| 426 |
<p>Author: {metadata.get('User name', 'N/A')}</p>
|
| 427 |
<p>Date: {metadata.get('Date', date_str)}</p>
|
| 428 |
<hr/>"""
|
|
|
|
| 429 |
full_html = f"""
|
| 430 |
<html>
|
| 431 |
<head>
|
| 432 |
<meta charset="utf-8" />
|
| 433 |
<style>
|
| 434 |
+
body {{ font-family: Helvetica, sans-serif; margin: 40px; background: #fefefe; }}
|
| 435 |
h1 {{ font-size: 24pt; margin-bottom: 12px; }}
|
| 436 |
h2 {{ font-size: 20pt; margin-bottom: 10px; }}
|
| 437 |
h3 {{ font-size: 18pt; margin-bottom: 8px; }}
|
| 438 |
p {{ font-size: 11pt; line-height: 1.5; margin-bottom: 10px; }}
|
| 439 |
+
ol, ul {{ font-size: 11pt; margin-left: 20px; line-height: 1.5; }}
|
|
|
|
| 440 |
hr {{ border: 1px solid #ccc; margin: 20px 0; }}
|
| 441 |
+
table {{ border-collapse: collapse; width: 100%; margin-bottom: 10px; }}
|
| 442 |
+
th, td {{ border: 1px solid #ccc; padding: 8px; text-align: left; }}
|
| 443 |
+
th {{ background-color: #f2f2f2; }}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 444 |
</style>
|
| 445 |
</head>
|
| 446 |
<body>
|
|
|
|
| 449 |
</body>
|
| 450 |
</html>
|
| 451 |
"""
|
|
|
|
| 452 |
pdf_buffer = io.BytesIO()
|
| 453 |
pisa_status = pisa.CreatePDF(full_html, dest=pdf_buffer)
|
| 454 |
if pisa_status.err:
|
|
|
|
| 469 |
final_report = compress_text(final_report, MAX_MESSAGE_LENGTH)
|
| 470 |
pdf_bytes = report_generator.generate_report_pdf_html(solution_content=final_report,
|
| 471 |
metadata=metadata)
|
|
|
|
| 472 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
|
| 473 |
tmp_file.write(pdf_bytes)
|
| 474 |
tmp_path = tmp_file.name
|
|
|
|
| 479 |
return f"Error generating report: {str(e)}", None
|
| 480 |
|
| 481 |
def extract_summary_from_crumbs(crumbs_list: list) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
aggregated = "\n".join([f"URL: {c['url']}\nSummary: {c['summary']}" for c in crumbs_list])
|
| 483 |
logging.info("extract_summary_from_crumbs: Aggregated crumb summary created.")
|
| 484 |
return aggregated
|
|
|
|
| 497 |
"Formulate this as a new research query that could lead to innovative insights.")
|
| 498 |
disruptive_query = openai_call(new_prompt, model="gpt-4o-mini", max_tokens_param=500)
|
| 499 |
logging.info(f"generate_surprise_report: Disruptive new query generated: {disruptive_query}")
|
|
|
|
|
|
|
| 500 |
clarifications_for_new = generate_tailored_questions(
|
| 501 |
os.getenv("OPENAI_API_KEY"),
|
| 502 |
+
disruptive_query + "\n\n IMPORTANT NOTE: in this iteration, generate also simulated responses for the questions asked",
|
| 503 |
"", "", "", ""
|
| 504 |
)
|
| 505 |
logging.info(f"generate_surprise_report: Clarification questions for new query: {clarifications_for_new}")
|
|
|
|
|
|
|
| 506 |
generator = iterative_deep_research_gen(
|
| 507 |
disruptive_query, reportstyle, breadth, depth, followup_clarifications,
|
| 508 |
include_domains, exclude_keywords, additional_clarifications,
|
|
|
|
| 517 |
appended_report = previous_report + "\n\n<div style='page-break-before: always;'></div>\n<h2>Surprise-Me Extension Report</h2>\n\n" + clarifications_for_new + "\n\n" + extension_report
|
| 518 |
return appended_report
|
| 519 |
|
| 520 |
+
# --- Adaptive and Parallel Organized Research (Dynamic Agent Orchestration) --- #
|
| 521 |
+
async def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: int, depth: int,
|
| 522 |
followup_clarifications: str,
|
| 523 |
include_domains: str,
|
| 524 |
exclude_keywords: str,
|
|
|
|
| 538 |
references_list = []
|
| 539 |
followup_suggestions = []
|
| 540 |
logging.info("iterative_deep_research_gen: Research started.")
|
| 541 |
+
|
| 542 |
+
# Create a single aiohttp session for parallel page fetching
|
| 543 |
+
async with aiohttp.ClientSession() as session:
|
| 544 |
+
for iteration in range(1, depth + 1):
|
| 545 |
+
process_log += f"\n--- Iteration {iteration} ---\n"
|
| 546 |
+
logging.info(f"iterative_deep_research_gen: Starting iteration {iteration}.")
|
| 547 |
+
combined_context = overall_context
|
| 548 |
+
if followup_suggestions:
|
| 549 |
+
unique_suggestions = list(set(followup_suggestions))
|
| 550 |
+
combined_context += "\nFollow-up suggestions: " + ", ".join(unique_suggestions)
|
| 551 |
+
queries = generate_serp_queries(combined_context, breadth, depth, initial_query, selected_engines, results_per_query)
|
| 552 |
+
process_log += f"Generated queries: {queries}\n"
|
| 553 |
+
iteration_learnings = []
|
| 554 |
+
followup_suggestions = [] # reset for current iteration
|
| 555 |
+
|
| 556 |
+
# For each query, perform SERPAPI search and fetch pages concurrently:
|
| 557 |
+
for query_str, engine in queries:
|
| 558 |
+
mod_query = query_str
|
| 559 |
+
if include_domains.strip():
|
| 560 |
+
domains = [d.strip() for d in include_domains.split(",") if d.strip()]
|
| 561 |
+
domain_str = " OR ".join([f"site:{d}" for d in domains])
|
| 562 |
+
mod_query += f" ({domain_str})"
|
| 563 |
+
if exclude_keywords.strip():
|
| 564 |
+
for ex in [ex.strip() for ex in exclude_keywords.split(",") if ex.strip()]:
|
| 565 |
+
mod_query += f" -{ex}"
|
| 566 |
+
process_log += f"\nPerforming SERPAPI search with query: {mod_query} using engine: {engine}\n"
|
| 567 |
+
results = perform_serpapi_search(mod_query, engine, results_per_query)
|
| 568 |
+
filtered_results = filter_search_results(results, visited_urls, initial_query, followup_clarifications)
|
| 569 |
+
process_log += f"After filtering, {len(filtered_results)} results remain for processing.\n"
|
| 570 |
+
async_tasks = []
|
| 571 |
+
for res in filtered_results:
|
| 572 |
+
url = res.get("link", "")
|
| 573 |
+
if not url:
|
|
|
|
|
|
|
|
|
|
| 574 |
continue
|
| 575 |
+
if url.lower().endswith(".pdf"):
|
| 576 |
+
content = process_pdf(url)
|
| 577 |
+
process_log += f"Extracted PDF content from {url}\n"
|
| 578 |
+
# Process synchronously for PDFs
|
| 579 |
+
analysis = analyze_with_gpt4o(initial_query, content)
|
| 580 |
+
analysis_summary = analysis.get("summary", "").strip() or (content[:200] + "..." if len(content) > 200 else content)
|
| 581 |
+
crumbs_list.append({"url": url, "summary": analysis_summary, "full_content": content})
|
| 582 |
+
if analysis.get("relevant", "no").lower() == "yes":
|
| 583 |
+
link_str = f" <a href='{url}'>[{ref_counter}]</a>"
|
| 584 |
+
summary_with_ref = analysis_summary + link_str
|
| 585 |
+
iteration_learnings.append(summary_with_ref)
|
| 586 |
+
references_list.append((ref_counter, url))
|
| 587 |
+
ref_counter += 1
|
| 588 |
+
if isinstance(analysis.get("followups"), list):
|
| 589 |
+
followup_suggestions.extend(analysis.get("followups"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 590 |
else:
|
| 591 |
+
# Schedule asynchronous fetching for non-PDF pages
|
| 592 |
+
async_tasks.append(async_fetch_url(session, url))
|
| 593 |
+
# Wait for asynchronous fetches to complete
|
| 594 |
+
if async_tasks:
|
| 595 |
+
fetched_contents = await asyncio.gather(*async_tasks)
|
| 596 |
+
for content in fetched_contents:
|
| 597 |
+
if not content:
|
| 598 |
+
continue
|
| 599 |
+
analysis = analyze_with_gpt4o(initial_query, content)
|
| 600 |
+
analysis_summary = analysis.get("summary", "").strip() or (content[:200] + "..." if len(content) > 200 else content)
|
| 601 |
+
# Here we do not re-fetch URL since it is already processed
|
| 602 |
+
crumbs_list.append({"url": "async_url", "summary": analysis_summary, "full_content": content})
|
| 603 |
+
if analysis.get("relevant", "no").lower() == "yes":
|
| 604 |
+
link_str = f" [*]" # Mark asynchronous fetched URLs.
|
| 605 |
+
summary_with_ref = analysis_summary + link_str
|
| 606 |
+
iteration_learnings.append(summary_with_ref)
|
| 607 |
+
if isinstance(analysis.get("followups"), list):
|
| 608 |
+
followup_suggestions.extend(analysis.get("followups"))
|
| 609 |
+
process_log += f"Iteration {iteration} extracted {len(iteration_learnings)} learnings.\n"
|
| 610 |
+
logging.info(f"iterative_deep_research_gen: Iteration {iteration} extracted {len(iteration_learnings)} learnings.")
|
| 611 |
+
overall_learnings.extend(iteration_learnings)
|
| 612 |
+
overall_context += f"\nIteration {iteration} learnings:\n" + "\n".join(iteration_learnings) + "\n"
|
| 613 |
+
if additional_clarifications.strip():
|
| 614 |
+
overall_context += "\nAdditional Clarifications from user: " + additional_clarifications.strip() + "\n"
|
| 615 |
+
process_log += "Appended additional clarifications to the context.\n"
|
| 616 |
+
# Adaptive follow-up: if new followup suggestions emerged, call tailored questions generator
|
| 617 |
+
if followup_suggestions:
|
| 618 |
+
extra_questions = generate_tailored_questions(os.getenv("OPENAI_API_KEY"), initial_query, "", "", "", "")
|
| 619 |
+
overall_context += "\nAdaptive Follow-Up Questions:\n" + extra_questions + "\n"
|
| 620 |
+
progress_pct = int((iteration / depth) * 100)
|
| 621 |
+
yield (f"Progress: {progress_pct}%", None, process_log, None)
|
| 622 |
+
aggregated_crumbs = "\n".join([f"URL: {c['url']}\nSummary: {c['summary']}" for c in crumbs_list])
|
| 623 |
+
final_report = generate_final_report(initial_query, reportstyle, overall_learnings, list(visited_urls), aggregated_crumbs, references_list, pages=go_deeper)
|
| 624 |
+
alignment_assessment = assess_report_alignment(final_report, initial_query, followup_clarifications)
|
| 625 |
+
final_report += "\n\n\n\n\n**Report alignment assessment:**\n" + alignment_assessment
|
| 626 |
+
logging.info("iterative_deep_research_gen: Final report generated.")
|
| 627 |
+
yield ("", final_report, process_log, crumbs_list)
|
| 628 |
|
| 629 |
def assess_report_alignment(report: str, initial_query: str, clarifications: str) -> str:
|
| 630 |
prompt = (
|
|
|
|
| 632 |
"and the clarification Q&A provided. Ensure that the report covers key points of the topic.\n\n"
|
| 633 |
"Initial Query: " + initial_query + "\nClarifications: " + clarifications + "\n\n"
|
| 634 |
"Research Report:\n" + report + "\n\n"
|
| 635 |
+
"Provide a short paragraph assessment on how well the report aligns with these requirements."
|
| 636 |
)
|
| 637 |
assessment = openai_call(prompt, model="gpt-3.5-turbo", max_tokens_param=200)
|
| 638 |
logging.info(f"assess_report_alignment: Assessment result: {assessment}")
|
| 639 |
return assessment
|
| 640 |
|
| 641 |
+
# --- Main Deep Research Orchestrator (Wrapper for async execution) --- #
|
| 642 |
+
async def orchestrate_deep_research(openai_api_key: str, serpapi_api_key: str, initial_query: str, reportstyle: str,
|
| 643 |
+
breadth: int, depth: int, followup_clarifications: str, include_domains: str,
|
| 644 |
+
exclude_keywords: str, additional_clarifications: str, results_per_query: int,
|
| 645 |
+
selected_engines, existing_crumbs: str, existing_report: str, existing_log: str,
|
| 646 |
+
pages: str, surprise_me: bool):
|
| 647 |
if not openai_api_key or not serpapi_api_key:
|
| 648 |
+
logging.error("orchestrate_deep_research: Invalid API keys provided.")
|
| 649 |
+
return "Please input valid API keys", "", "", ""
|
|
|
|
| 650 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
| 651 |
os.environ["SERPAPI_API_KEY"] = serpapi_api_key
|
| 652 |
|
|
|
|
| 658 |
if existing_crumbs:
|
| 659 |
extra_context += f"Existing Crumbs:\n{existing_crumbs}\n"
|
| 660 |
|
| 661 |
+
loop = asyncio.get_event_loop()
|
| 662 |
+
researcher = iterative_deep_research_gen(initial_query, reportstyle, breadth, depth, followup_clarifications,
|
| 663 |
+
include_domains, exclude_keywords, additional_clarifications,
|
| 664 |
+
extra_context, selected_engines, results_per_query, go_deeper=int(pages))
|
| 665 |
final_report = ""
|
| 666 |
+
process_log = ""
|
| 667 |
+
async for progress, rep, proc_log, crumbs in researcher:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
if rep is None:
|
| 669 |
+
current_progress = progress
|
| 670 |
+
# You could yield intermediate progress if needed.
|
| 671 |
else:
|
| 672 |
final_report = rep
|
| 673 |
+
process_log = proc_log
|
|
|
|
| 674 |
break
|
| 675 |
if surprise_me:
|
| 676 |
+
extended_report = generate_surprise_report(final_report, crumbs, initial_query, reportstyle, breadth, depth,
|
| 677 |
+
followup_clarifications, include_domains, exclude_keywords,
|
| 678 |
+
additional_clarifications, results_per_query, selected_engines)
|
|
|
|
|
|
|
| 679 |
final_report = extended_report
|
| 680 |
+
return final_report, process_log, extra_context
|
| 681 |
+
|
| 682 |
+
def run_deep_research(openai_api_key: str, serpapi_api_key: str, initial_query: str, reportstyle: str, breadth: int, depth: int,
|
| 683 |
+
followup_clarifications: str, include_domains: str, exclude_keywords: str, additional_clarifications: str,
|
| 684 |
+
results_per_query: int, selected_engines, existing_crumbs: str, existing_report: str, existing_log: str,
|
| 685 |
+
pages: str, surprise_me: bool):
|
| 686 |
+
final_report, proc_log, extra_context = asyncio.run(
|
| 687 |
+
orchestrate_deep_research(openai_api_key, serpapi_api_key, initial_query, reportstyle, breadth, depth,
|
| 688 |
+
followup_clarifications, include_domains, exclude_keywords, additional_clarifications,
|
| 689 |
+
results_per_query, selected_engines, existing_crumbs, existing_report, existing_log,
|
| 690 |
+
pages, surprise_me)
|
| 691 |
+
)
|
| 692 |
+
return ("Progress: 100%", final_report, existing_report, existing_log, existing_crumbs)
|
| 693 |
+
|
| 694 |
+
# =============================================================================
|
| 695 |
+
# Gradio Interface using gr.Blocks with Custom CSS
|
| 696 |
+
# =============================================================================
|
|
|
|
|
|
|
|
|
|
| 697 |
|
| 698 |
def main():
|
| 699 |
custom_css = """
|
|
|
|
| 747 |
openai_api_key_input = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API Key here...", type="password")
|
| 748 |
serpapi_api_key_input = gr.Textbox(label="SERPAPI API Key", placeholder="Enter your SERPAPI API Key here...", type="password")
|
| 749 |
gr.Markdown("[Create OpenAI API Key](https://platform.openai.com/account/api-keys) | [Create SERPAPI API Key](https://serpapi.com/manage-api-key)")
|
| 750 |
+
gr.Markdown("API keys are not stored or logged.")
|
| 751 |
|
| 752 |
+
with gr.Accordion("2] Research topic", open=False):
|
| 753 |
with gr.Row():
|
| 754 |
research_query = gr.Textbox(label="Research Query", placeholder="Enter your research query here...", lines=2, elem_id="research-query", scale=4)
|
| 755 |
refine_query_button = gr.Button("Refine my Query", scale=1)
|
| 756 |
|
| 757 |
with gr.Accordion("3] Q&A", open=False):
|
| 758 |
with gr.Row():
|
| 759 |
+
clarification_text = gr.Textbox(label="Clarification / Follow-Up Questions", placeholder="Tailored clarifying suggestions will appear here...", lines=6, scale=4)
|
| 760 |
gen_followups = gr.Button("Generate Tailored Clarification Questions", scale=1)
|
| 761 |
|
| 762 |
with gr.Accordion("4] Search Parameters", open=False):
|
|
|
|
| 797 |
with gr.Accordion("5] Report", open=False, elem_classes="folder"):
|
| 798 |
progress_display = gr.Markdown("", elem_id="progress-display")
|
| 799 |
run_btn = gr.Button("Generate report")
|
| 800 |
+
final_report = gr.Markdown(label="Final Report (Markdown)", height=800, min_height=50)
|
| 801 |
with gr.Accordion("Generate PDF", open=False, elem_classes="folder"):
|
| 802 |
with gr.Column():
|
| 803 |
query_name = gr.Textbox(label="Query name", placeholder="Enter query name...", lines=1)
|