Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,8 +12,6 @@ import tempfile
|
|
| 12 |
import logging
|
| 13 |
import markdown
|
| 14 |
import unicodedata
|
| 15 |
-
import asyncio
|
| 16 |
-
import aiohttp
|
| 17 |
from datetime import datetime
|
| 18 |
from reportlab.lib.pagesizes import A4
|
| 19 |
from xhtml2pdf import pisa
|
|
@@ -29,7 +27,7 @@ HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/
|
|
| 29 |
"(KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"}
|
| 30 |
|
| 31 |
# =============================================================================
|
| 32 |
-
# Helper functions for external APIs
|
| 33 |
# =============================================================================
|
| 34 |
|
| 35 |
def display_image():
|
|
@@ -178,24 +176,32 @@ def generate_final_report(initial_query: str, reportstyle: str, learnings: list,
|
|
| 178 |
prompt = (f"""
|
| 179 |
Using the following learnings and merged reference details from a deep research process on '{initial_query}', produce a comprehensive research report in Markdown format.
|
| 180 |
The report should be very detailed and lengthy — approximately the equivalent of {pages} pages (or {word_count} words) when printed.
|
| 181 |
-
It must include inline citations (e.g., [1], [2], etc.)
|
| 182 |
-
|
| 183 |
-
The
|
|
|
|
|
|
|
| 184 |
- Abstract
|
| 185 |
- Table of contents
|
| 186 |
- Introduction
|
| 187 |
-
- [Sections and sub-sections
|
| 188 |
- Conclusion
|
| 189 |
-
- References
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
-
Important: Number titles and lists as 1., 1.1, etc.
|
| 192 |
Learnings:
|
| 193 |
{json.dumps(learnings, indent=2)}
|
|
|
|
| 194 |
Merged Reference Details:
|
| 195 |
{aggregated_crumbs}"""
|
| 196 |
)
|
| 197 |
tokentarget = word_count * 3 # rough multiplier for token target
|
| 198 |
report = openai_call(prompt, model="o3-mini", max_tokens_param=tokentarget)
|
|
|
|
| 199 |
if len(report) > MAX_MESSAGE_LENGTH:
|
| 200 |
report = compress_text(report, MAX_MESSAGE_LENGTH)
|
| 201 |
if report.startswith("Error calling OpenAI API"):
|
|
@@ -205,21 +211,17 @@ Merged Reference Details:
|
|
| 205 |
return report
|
| 206 |
|
| 207 |
def filter_search_results(results: list, visited_urls: set, query: str, clarifications: str) -> list:
|
| 208 |
-
# Filter out already seen results
|
| 209 |
new_results = []
|
| 210 |
candidate_indexes = []
|
| 211 |
-
seen_domains = set()
|
| 212 |
for idx, res in enumerate(results):
|
| 213 |
url = res.get("link", "")
|
| 214 |
if url and url not in visited_urls:
|
| 215 |
-
domain = url.split("/")[2] if "://" in url else url
|
| 216 |
-
if domain in seen_domains:
|
| 217 |
-
continue
|
| 218 |
new_results.append(res)
|
| 219 |
candidate_indexes.append(idx)
|
| 220 |
-
seen_domains.add(domain)
|
| 221 |
if not new_results:
|
| 222 |
return []
|
|
|
|
| 223 |
results_text = ""
|
| 224 |
for idx, res in enumerate(new_results):
|
| 225 |
title = res.get("title", "No Title")
|
|
@@ -229,18 +231,25 @@ def filter_search_results(results: list, visited_urls: set, query: str, clarific
|
|
| 229 |
prompt = (
|
| 230 |
f"The following search results were obtained for the query '{query}' with clarifications:\n"
|
| 231 |
f"{clarifications}\n\n"
|
| 232 |
-
"For each result, decide
|
| 233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
)
|
| 235 |
llm_response = openai_call(prompt, model="gpt-4o-mini", max_tokens_param=200)
|
| 236 |
try:
|
| 237 |
decision_map = json.loads(llm_response)
|
| 238 |
except Exception as e:
|
| 239 |
logging.error(f"filter_search_results: JSON decode error: {e}; Full response: {llm_response}")
|
|
|
|
| 240 |
decision_map = {}
|
| 241 |
filtered = []
|
| 242 |
for idx, res in enumerate(new_results):
|
| 243 |
url = res.get("link", "")
|
|
|
|
| 244 |
visited_urls.add(url)
|
| 245 |
decision = decision_map.get(str(idx), "no").strip().lower()
|
| 246 |
if decision == "yes":
|
|
@@ -249,11 +258,12 @@ def filter_search_results(results: list, visited_urls: set, query: str, clarific
|
|
| 249 |
return filtered
|
| 250 |
|
| 251 |
def make_multilingual_query(query: str, context: str, languagesdetected: str) -> str:
|
| 252 |
-
finalquery = f"({query})" # original query in parentheses
|
| 253 |
languages_detected_list = languagesdetected.split(",")
|
| 254 |
for lang in languages_detected_list:
|
| 255 |
prompt2 = f"""The research query is: "{query}".
|
| 256 |
-
Based on this query and context: "{context}", and
|
|
|
|
| 257 |
Output only the translated query."""
|
| 258 |
translatedquery = openai_call(prompt2, model="gpt-4o-mini", max_tokens_param=50)
|
| 259 |
finalquery += f" OR ({translatedquery})"
|
|
@@ -261,14 +271,11 @@ Output only the translated query."""
|
|
| 261 |
return finalquery
|
| 262 |
|
| 263 |
def generate_query_tree(initial_query: str, breadth: int, depth: int) -> list:
|
| 264 |
-
# Generate several variants of the query based on the desired breadth.
|
| 265 |
base_terms = initial_query.strip()
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
# Return only as many as needed (up to 'breadth')
|
| 271 |
-
final_queries = variants[:min(len(variants), breadth)]
|
| 272 |
logging.info(f"generate_query_tree: Generated queries: {final_queries}")
|
| 273 |
return final_queries
|
| 274 |
|
|
@@ -276,11 +283,13 @@ def generate_serp_queries(context: str, breadth: int, depth: int, initial_query:
|
|
| 276 |
selected_engines=None, results_per_query: int = 10) -> list:
|
| 277 |
queries = generate_query_tree(initial_query, breadth, depth)
|
| 278 |
prompt = f"""The research query is: "{initial_query}".
|
| 279 |
-
Based on the context: "{context}", suggest
|
| 280 |
Output either:
|
| 281 |
- "No local attributes detected"
|
| 282 |
-
-
|
| 283 |
-
|
|
|
|
|
|
|
| 284 |
languages_detected = openai_call(prompt, model="gpt-4o-mini", max_tokens_param=20)
|
| 285 |
if languages_detected != "No local attributes detected":
|
| 286 |
queries = [make_multilingual_query(q, context, languages_detected) for q in queries]
|
|
@@ -288,12 +297,17 @@ Output only the result."""
|
|
| 288 |
prompt_engines = f"""
|
| 289 |
Examine these queries:
|
| 290 |
{queries}
|
| 291 |
-
|
| 292 |
{context}
|
| 293 |
-
Identify among these search engines:
|
| 294 |
-
|
|
|
|
|
|
|
|
|
|
| 295 |
identified_engines = openai_call(prompt_engines, model="gpt-4o-mini", max_tokens_param=20)
|
| 296 |
selected_engines = identified_engines.split(",")
|
|
|
|
|
|
|
| 297 |
final_queries = []
|
| 298 |
for q in queries:
|
| 299 |
for engine in selected_engines:
|
|
@@ -379,23 +393,6 @@ def refine_query(query: str, openai_api_key: str) -> str:
|
|
| 379 |
logging.info(f"refine_query: Refined query: {refined}")
|
| 380 |
return refined
|
| 381 |
|
| 382 |
-
# --- New Asynchronous Helper for Parallel URL Fetching --- #
|
| 383 |
-
async def async_fetch_url(session: aiohttp.ClientSession, url: str) -> str:
|
| 384 |
-
"""Fetch the URL asynchronously using aiohttp."""
|
| 385 |
-
try:
|
| 386 |
-
async with session.get(url, headers=HEADERS, timeout=10) as response:
|
| 387 |
-
response.raise_for_status()
|
| 388 |
-
text = await response.text()
|
| 389 |
-
logging.info(f"async_fetch_url: Fetched content from {url}")
|
| 390 |
-
return text
|
| 391 |
-
except Exception as e:
|
| 392 |
-
logging.error(f"async_fetch_url: Error retrieving content from {url}: {e}")
|
| 393 |
-
return ""
|
| 394 |
-
|
| 395 |
-
# =============================================================================
|
| 396 |
-
# ReportGenerator and PDF generation (Enhanced CSS added)
|
| 397 |
-
# =============================================================================
|
| 398 |
-
|
| 399 |
class ReportGenerator:
|
| 400 |
def __init__(self):
|
| 401 |
pass
|
|
@@ -406,9 +403,9 @@ class ReportGenerator:
|
|
| 406 |
solution_content = re.sub(r'[\u2010\u2011\u2012\u2013\u2014\u2015]', "-", solution_content)
|
| 407 |
# Remove markdown hyperlink syntax: replace [text](link) with just text.
|
| 408 |
solution_content = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', solution_content)
|
| 409 |
-
# Convert markdown to HTML using the "extra" and "tables" extensions.
|
| 410 |
html_content = markdown.markdown(solution_content, extensions=['extra', 'tables'])
|
| 411 |
-
# Insert explicit page breaks before
|
| 412 |
html_content = html_content.replace("<h2>Table of Contents</h2>",
|
| 413 |
"<div style='page-break-before: always;'></div><h2>Table of Contents</h2>")
|
| 414 |
html_content = html_content.replace("<h2>Introduction</h2>",
|
|
@@ -417,8 +414,10 @@ class ReportGenerator:
|
|
| 417 |
"<div style='page-break-before: always;'></div><h2>Conclusion</h2>")
|
| 418 |
html_content = html_content.replace("<h2>References</h2>",
|
| 419 |
"<div style='page-break-before: always;'></div><h2>References</h2>")
|
|
|
|
| 420 |
html_content = html_content.replace("<h2>Surprise-Me Extension Report</h2>",
|
| 421 |
"<div style='page-break-before: always;'></div><h2>Surprise-Me Extension Report</h2>")
|
|
|
|
| 422 |
date_str = datetime.now().strftime("%Y-%m-%d")
|
| 423 |
header = ""
|
| 424 |
if metadata:
|
|
@@ -426,21 +425,33 @@ class ReportGenerator:
|
|
| 426 |
<p>Author: {metadata.get('User name', 'N/A')}</p>
|
| 427 |
<p>Date: {metadata.get('Date', date_str)}</p>
|
| 428 |
<hr/>"""
|
|
|
|
| 429 |
full_html = f"""
|
| 430 |
<html>
|
| 431 |
<head>
|
| 432 |
<meta charset="utf-8" />
|
| 433 |
<style>
|
| 434 |
-
body {{ font-family: Helvetica, sans-serif; margin: 40px;
|
| 435 |
h1 {{ font-size: 24pt; margin-bottom: 12px; }}
|
| 436 |
h2 {{ font-size: 20pt; margin-bottom: 10px; }}
|
| 437 |
h3 {{ font-size: 18pt; margin-bottom: 8px; }}
|
| 438 |
p {{ font-size: 11pt; line-height: 1.5; margin-bottom: 10px; }}
|
| 439 |
-
ol
|
|
|
|
| 440 |
hr {{ border: 1px solid #ccc; margin: 20px 0; }}
|
| 441 |
-
table {{
|
| 442 |
-
|
| 443 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 444 |
</style>
|
| 445 |
</head>
|
| 446 |
<body>
|
|
@@ -449,6 +460,7 @@ class ReportGenerator:
|
|
| 449 |
</body>
|
| 450 |
</html>
|
| 451 |
"""
|
|
|
|
| 452 |
pdf_buffer = io.BytesIO()
|
| 453 |
pisa_status = pisa.CreatePDF(full_html, dest=pdf_buffer)
|
| 454 |
if pisa_status.err:
|
|
@@ -469,6 +481,7 @@ def handle_generate_report(query_name: str, user_name: str, final_report: str):
|
|
| 469 |
final_report = compress_text(final_report, MAX_MESSAGE_LENGTH)
|
| 470 |
pdf_bytes = report_generator.generate_report_pdf_html(solution_content=final_report,
|
| 471 |
metadata=metadata)
|
|
|
|
| 472 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
|
| 473 |
tmp_file.write(pdf_bytes)
|
| 474 |
tmp_path = tmp_file.name
|
|
@@ -479,6 +492,10 @@ def handle_generate_report(query_name: str, user_name: str, final_report: str):
|
|
| 479 |
return f"Error generating report: {str(e)}", None
|
| 480 |
|
| 481 |
def extract_summary_from_crumbs(crumbs_list: list) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
aggregated = "\n".join([f"URL: {c['url']}\nSummary: {c['summary']}" for c in crumbs_list])
|
| 483 |
logging.info("extract_summary_from_crumbs: Aggregated crumb summary created.")
|
| 484 |
return aggregated
|
|
@@ -497,12 +514,16 @@ def generate_surprise_report(previous_report: str, crumbs_list: list, initial_qu
|
|
| 497 |
"Formulate this as a new research query that could lead to innovative insights.")
|
| 498 |
disruptive_query = openai_call(new_prompt, model="gpt-4o-mini", max_tokens_param=500)
|
| 499 |
logging.info(f"generate_surprise_report: Disruptive new query generated: {disruptive_query}")
|
|
|
|
|
|
|
| 500 |
clarifications_for_new = generate_tailored_questions(
|
| 501 |
os.getenv("OPENAI_API_KEY"),
|
| 502 |
-
disruptive_query + "\n\n IMPORTANT NOTE: in this iteration, generate also
|
| 503 |
"", "", "", ""
|
| 504 |
)
|
| 505 |
logging.info(f"generate_surprise_report: Clarification questions for new query: {clarifications_for_new}")
|
|
|
|
|
|
|
| 506 |
generator = iterative_deep_research_gen(
|
| 507 |
disruptive_query, reportstyle, breadth, depth, followup_clarifications,
|
| 508 |
include_domains, exclude_keywords, additional_clarifications,
|
|
@@ -517,8 +538,7 @@ def generate_surprise_report(previous_report: str, crumbs_list: list, initial_qu
|
|
| 517 |
appended_report = previous_report + "\n\n<div style='page-break-before: always;'></div>\n<h2>Surprise-Me Extension Report</h2>\n\n" + clarifications_for_new + "\n\n" + extension_report
|
| 518 |
return appended_report
|
| 519 |
|
| 520 |
-
|
| 521 |
-
async def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: int, depth: int,
|
| 522 |
followup_clarifications: str,
|
| 523 |
include_domains: str,
|
| 524 |
exclude_keywords: str,
|
|
@@ -538,93 +558,90 @@ async def iterative_deep_research_gen(initial_query: str, reportstyle: str, brea
|
|
| 538 |
references_list = []
|
| 539 |
followup_suggestions = []
|
| 540 |
logging.info("iterative_deep_research_gen: Research started.")
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
mod_query += f"
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
|
|
|
|
|
|
|
|
|
| 574 |
continue
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 590 |
else:
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
if additional_clarifications.strip():
|
| 614 |
-
overall_context += "\nAdditional Clarifications from user: " + additional_clarifications.strip() + "\n"
|
| 615 |
-
process_log += "Appended additional clarifications to the context.\n"
|
| 616 |
-
# Adaptive follow-up: if new followup suggestions emerged, call tailored questions generator
|
| 617 |
-
if followup_suggestions:
|
| 618 |
-
extra_questions = generate_tailored_questions(os.getenv("OPENAI_API_KEY"), initial_query, "", "", "", "")
|
| 619 |
-
overall_context += "\nAdaptive Follow-Up Questions:\n" + extra_questions + "\n"
|
| 620 |
-
progress_pct = int((iteration / depth) * 100)
|
| 621 |
-
yield (f"Progress: {progress_pct}%", None, process_log, None)
|
| 622 |
-
aggregated_crumbs = "\n".join([f"URL: {c['url']}\nSummary: {c['summary']}" for c in crumbs_list])
|
| 623 |
-
final_report = generate_final_report(initial_query, reportstyle, overall_learnings, list(visited_urls), aggregated_crumbs, references_list, pages=go_deeper)
|
| 624 |
-
alignment_assessment = assess_report_alignment(final_report, initial_query, followup_clarifications)
|
| 625 |
-
final_report += "\n\n\n\n\n**Report alignment assessment:**\n" + alignment_assessment
|
| 626 |
-
logging.info("iterative_deep_research_gen: Final report generated.")
|
| 627 |
-
yield ("", final_report, process_log, crumbs_list)
|
| 628 |
|
| 629 |
def assess_report_alignment(report: str, initial_query: str, clarifications: str) -> str:
|
| 630 |
prompt = (
|
|
@@ -632,21 +649,21 @@ def assess_report_alignment(report: str, initial_query: str, clarifications: str
|
|
| 632 |
"and the clarification Q&A provided. Ensure that the report covers key points of the topic.\n\n"
|
| 633 |
"Initial Query: " + initial_query + "\nClarifications: " + clarifications + "\n\n"
|
| 634 |
"Research Report:\n" + report + "\n\n"
|
| 635 |
-
"Provide a short
|
| 636 |
)
|
| 637 |
assessment = openai_call(prompt, model="gpt-3.5-turbo", max_tokens_param=200)
|
| 638 |
logging.info(f"assess_report_alignment: Assessment result: {assessment}")
|
| 639 |
return assessment
|
| 640 |
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
pages: str, surprise_me: bool):
|
| 647 |
if not openai_api_key or not serpapi_api_key:
|
| 648 |
-
logging.error("
|
| 649 |
-
return "Please input valid API keys", "", "", ""
|
|
|
|
| 650 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
| 651 |
os.environ["SERPAPI_API_KEY"] = serpapi_api_key
|
| 652 |
|
|
@@ -658,26 +675,33 @@ async def orchestrate_deep_research(openai_api_key: str, serpapi_api_key: str, i
|
|
| 658 |
if existing_crumbs:
|
| 659 |
extra_context += f"Existing Crumbs:\n{existing_crumbs}\n"
|
| 660 |
|
| 661 |
-
|
| 662 |
-
researcher = iterative_deep_research_gen(initial_query, reportstyle, breadth, depth, followup_clarifications,
|
| 663 |
-
include_domains, exclude_keywords, additional_clarifications,
|
| 664 |
-
extra_context, selected_engines, results_per_query, go_deeper=int(pages))
|
| 665 |
final_report = ""
|
| 666 |
-
|
| 667 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
if rep is None:
|
| 669 |
-
|
| 670 |
-
|
| 671 |
else:
|
| 672 |
final_report = rep
|
| 673 |
-
|
|
|
|
| 674 |
break
|
| 675 |
if surprise_me:
|
| 676 |
-
extended_report = generate_surprise_report(
|
| 677 |
-
|
| 678 |
-
|
|
|
|
|
|
|
| 679 |
final_report = extended_report
|
| 680 |
-
|
|
|
|
|
|
|
| 681 |
|
| 682 |
def load_example(example_choice: str) -> str:
|
| 683 |
filename = ""
|
|
@@ -696,22 +720,6 @@ def load_example(example_choice: str) -> str:
|
|
| 696 |
logging.error(f"load_example: Error loading {filename}: {e}")
|
| 697 |
return ""
|
| 698 |
|
| 699 |
-
def run_deep_research(openai_api_key: str, serpapi_api_key: str, initial_query: str, reportstyle: str, breadth: int, depth: int,
|
| 700 |
-
followup_clarifications: str, include_domains: str, exclude_keywords: str, additional_clarifications: str,
|
| 701 |
-
results_per_query: int, selected_engines, existing_crumbs: str, existing_report: str, existing_log: str,
|
| 702 |
-
pages: str, surprise_me: bool):
|
| 703 |
-
final_report, proc_log, extra_context = asyncio.run(
|
| 704 |
-
orchestrate_deep_research(openai_api_key, serpapi_api_key, initial_query, reportstyle, breadth, depth,
|
| 705 |
-
followup_clarifications, include_domains, exclude_keywords, additional_clarifications,
|
| 706 |
-
results_per_query, selected_engines, existing_crumbs, existing_report, existing_log,
|
| 707 |
-
pages, surprise_me)
|
| 708 |
-
)
|
| 709 |
-
return ("Progress: 100%", final_report, existing_report, existing_log, existing_crumbs)
|
| 710 |
-
|
| 711 |
-
# =============================================================================
|
| 712 |
-
# Gradio Interface using gr.Blocks with Custom CSS
|
| 713 |
-
# =============================================================================
|
| 714 |
-
|
| 715 |
def main():
|
| 716 |
custom_css = """
|
| 717 |
/* Overall container customization */
|
|
@@ -764,16 +772,16 @@ def main():
|
|
| 764 |
openai_api_key_input = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API Key here...", type="password")
|
| 765 |
serpapi_api_key_input = gr.Textbox(label="SERPAPI API Key", placeholder="Enter your SERPAPI API Key here...", type="password")
|
| 766 |
gr.Markdown("[Create OpenAI API Key](https://platform.openai.com/account/api-keys) | [Create SERPAPI API Key](https://serpapi.com/manage-api-key)")
|
| 767 |
-
gr.Markdown("API keys are
|
| 768 |
|
| 769 |
-
with gr.Accordion("2] Research topic", open=False):
|
| 770 |
with gr.Row():
|
| 771 |
research_query = gr.Textbox(label="Research Query", placeholder="Enter your research query here...", lines=2, elem_id="research-query", scale=4)
|
| 772 |
refine_query_button = gr.Button("Refine my Query", scale=1)
|
| 773 |
|
| 774 |
with gr.Accordion("3] Q&A", open=False):
|
| 775 |
with gr.Row():
|
| 776 |
-
clarification_text = gr.Textbox(label="Clarification / Follow-Up Questions", placeholder="Tailored clarifying suggestions will appear here...", lines=6, scale=4)
|
| 777 |
gen_followups = gr.Button("Generate Tailored Clarification Questions", scale=1)
|
| 778 |
|
| 779 |
with gr.Accordion("4] Search Parameters", open=False):
|
|
@@ -814,7 +822,7 @@ def main():
|
|
| 814 |
with gr.Accordion("5] Report", open=False, elem_classes="folder"):
|
| 815 |
progress_display = gr.Markdown("", elem_id="progress-display")
|
| 816 |
run_btn = gr.Button("Generate report")
|
| 817 |
-
final_report = gr.Markdown(label="Final Report (Markdown)", height=800, min_height=50)
|
| 818 |
with gr.Accordion("Generate PDF", open=False, elem_classes="folder"):
|
| 819 |
with gr.Column():
|
| 820 |
query_name = gr.Textbox(label="Query name", placeholder="Enter query name...", lines=1)
|
|
|
|
| 12 |
import logging
|
| 13 |
import markdown
|
| 14 |
import unicodedata
|
|
|
|
|
|
|
| 15 |
from datetime import datetime
|
| 16 |
from reportlab.lib.pagesizes import A4
|
| 17 |
from xhtml2pdf import pisa
|
|
|
|
| 27 |
"(KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"}
|
| 28 |
|
| 29 |
# =============================================================================
|
| 30 |
+
# Helper functions for external APIs and PDF Processing
|
| 31 |
# =============================================================================
|
| 32 |
|
| 33 |
def display_image():
|
|
|
|
| 176 |
prompt = (f"""
|
| 177 |
Using the following learnings and merged reference details from a deep research process on '{initial_query}', produce a comprehensive research report in Markdown format.
|
| 178 |
The report should be very detailed and lengthy — approximately the equivalent of {pages} pages (or {word_count} words) when printed.
|
| 179 |
+
It must include inline citations (e.g., [1], [2], etc.).
|
| 180 |
+
It must follow this writing style {reportstyle}.
|
| 181 |
+
The report must include at least {round(pages/3,0)} tables from the sources used (add citations if necessary) and use facts and figures extensively to ground the analysis.
|
| 182 |
+
|
| 183 |
+
The structure of the report should be:
|
| 184 |
- Abstract
|
| 185 |
- Table of contents
|
| 186 |
- Introduction
|
| 187 |
+
- [Sections and sub-sections, depending on the size and relevant topic]
|
| 188 |
- Conclusion
|
| 189 |
+
- References of the documents used in the inline citations
|
| 190 |
+
|
| 191 |
+
Important: For the numbering of titles or numbered lists, use numbers (ex: 1.) and sub-units (1.1, 1.2... 1.1.1...,1.1.2...). This is to avoid issues when converting markdown to html.
|
| 192 |
+
You should still use markdown for the stryling (titles levels, bold, italic), tables...
|
| 193 |
+
|
| 194 |
+
Output the report directly without any introductory meta comments.
|
| 195 |
|
|
|
|
| 196 |
Learnings:
|
| 197 |
{json.dumps(learnings, indent=2)}
|
| 198 |
+
|
| 199 |
Merged Reference Details:
|
| 200 |
{aggregated_crumbs}"""
|
| 201 |
)
|
| 202 |
tokentarget = word_count * 3 # rough multiplier for token target
|
| 203 |
report = openai_call(prompt, model="o3-mini", max_tokens_param=tokentarget)
|
| 204 |
+
# If the report is too long, compress it.
|
| 205 |
if len(report) > MAX_MESSAGE_LENGTH:
|
| 206 |
report = compress_text(report, MAX_MESSAGE_LENGTH)
|
| 207 |
if report.startswith("Error calling OpenAI API"):
|
|
|
|
| 211 |
return report
|
| 212 |
|
| 213 |
def filter_search_results(results: list, visited_urls: set, query: str, clarifications: str) -> list:
|
| 214 |
+
# Filter out already seen results
|
| 215 |
new_results = []
|
| 216 |
candidate_indexes = []
|
|
|
|
| 217 |
for idx, res in enumerate(results):
|
| 218 |
url = res.get("link", "")
|
| 219 |
if url and url not in visited_urls:
|
|
|
|
|
|
|
|
|
|
| 220 |
new_results.append(res)
|
| 221 |
candidate_indexes.append(idx)
|
|
|
|
| 222 |
if not new_results:
|
| 223 |
return []
|
| 224 |
+
# Build the prompt with relaxed criteria.
|
| 225 |
results_text = ""
|
| 226 |
for idx, res in enumerate(new_results):
|
| 227 |
title = res.get("title", "No Title")
|
|
|
|
| 231 |
prompt = (
|
| 232 |
f"The following search results were obtained for the query '{query}' with clarifications:\n"
|
| 233 |
f"{clarifications}\n\n"
|
| 234 |
+
"For each result, decide whether it might be of interest for deeper research. "
|
| 235 |
+
"Even if not completely certain, lean towards including more potential references. "
|
| 236 |
+
"Return your decision as a JSON object where each key is the result index (as an integer) and the value is either 'yes' or 'no'. "
|
| 237 |
+
"For example: {\"0\": \"yes\", \"1\": \"no\", \"2\": \"yes\"}.\n"
|
| 238 |
+
"Consider the title, snippet, and URL in your decision."
|
| 239 |
+
f"\nResults:{results_text}\n"
|
| 240 |
+
"Output only the JSON object."
|
| 241 |
)
|
| 242 |
llm_response = openai_call(prompt, model="gpt-4o-mini", max_tokens_param=200)
|
| 243 |
try:
|
| 244 |
decision_map = json.loads(llm_response)
|
| 245 |
except Exception as e:
|
| 246 |
logging.error(f"filter_search_results: JSON decode error: {e}; Full response: {llm_response}")
|
| 247 |
+
# In case of error, default to no results selected.
|
| 248 |
decision_map = {}
|
| 249 |
filtered = []
|
| 250 |
for idx, res in enumerate(new_results):
|
| 251 |
url = res.get("link", "")
|
| 252 |
+
# Add each URL to visited regardless of decision.
|
| 253 |
visited_urls.add(url)
|
| 254 |
decision = decision_map.get(str(idx), "no").strip().lower()
|
| 255 |
if decision == "yes":
|
|
|
|
| 258 |
return filtered
|
| 259 |
|
| 260 |
def make_multilingual_query(query: str, context: str, languagesdetected: str) -> str:
|
| 261 |
+
finalquery = f"({query})" # original query is wrapped in parentheses
|
| 262 |
languages_detected_list = languagesdetected.split(",")
|
| 263 |
for lang in languages_detected_list:
|
| 264 |
prompt2 = f"""The research query is: "{query}".
|
| 265 |
+
Based on this query and context: "{context}", and with the detected language {lang}, provide the translated version of the query in that language.
|
| 266 |
+
The translation must be less than 20 words and preserve search operators like AND, OR, parenthesis, quotation marks, and exclusion hyphens.
|
| 267 |
Output only the translated query."""
|
| 268 |
translatedquery = openai_call(prompt2, model="gpt-4o-mini", max_tokens_param=50)
|
| 269 |
finalquery += f" OR ({translatedquery})"
|
|
|
|
| 271 |
return finalquery
|
| 272 |
|
| 273 |
def generate_query_tree(initial_query: str, breadth: int, depth: int) -> list:
|
|
|
|
| 274 |
base_terms = initial_query.strip()
|
| 275 |
+
# Here you may add refinements if necessary to keep queries short.
|
| 276 |
+
queries = [base_terms]
|
| 277 |
+
# If topics are to be added, you can extend this list.
|
| 278 |
+
final_queries = queries[:min(len(queries), breadth)]
|
|
|
|
|
|
|
| 279 |
logging.info(f"generate_query_tree: Generated queries: {final_queries}")
|
| 280 |
return final_queries
|
| 281 |
|
|
|
|
| 283 |
selected_engines=None, results_per_query: int = 10) -> list:
|
| 284 |
queries = generate_query_tree(initial_query, breadth, depth)
|
| 285 |
prompt = f"""The research query is: "{initial_query}".
|
| 286 |
+
Based on this query and the context: "{context}", suggest one or several languages (other than English) that might be relevant.
|
| 287 |
Output either:
|
| 288 |
- "No local attributes detected"
|
| 289 |
+
- One language (e.g., "Spanish")
|
| 290 |
+
- Multiple languages comma separated (e.g., "Italian,Putonghua,Cantonese")
|
| 291 |
+
Output only the result.
|
| 292 |
+
"""
|
| 293 |
languages_detected = openai_call(prompt, model="gpt-4o-mini", max_tokens_param=20)
|
| 294 |
if languages_detected != "No local attributes detected":
|
| 295 |
queries = [make_multilingual_query(q, context, languages_detected) for q in queries]
|
|
|
|
| 297 |
prompt_engines = f"""
|
| 298 |
Examine these queries:
|
| 299 |
{queries}
|
| 300 |
+
and considering the research context:
|
| 301 |
{context}
|
| 302 |
+
Identify among these search engines:
|
| 303 |
+
google,google_jobs_listing,google_trends,google_news,google_scholar,google_ai_overview,bing,bing_news,baidu,baidu_news,yandex,youtube_video,linkedin,linkedin_profile,duckduckgo_news,yelp_reviews
|
| 304 |
+
Which are most relevant? Output a comma separated list (e.g., "google,baidu").
|
| 305 |
+
If none are found, output "google".
|
| 306 |
+
"""
|
| 307 |
identified_engines = openai_call(prompt_engines, model="gpt-4o-mini", max_tokens_param=20)
|
| 308 |
selected_engines = identified_engines.split(",")
|
| 309 |
+
else:
|
| 310 |
+
selected_engines = selected_engines
|
| 311 |
final_queries = []
|
| 312 |
for q in queries:
|
| 313 |
for engine in selected_engines:
|
|
|
|
| 393 |
logging.info(f"refine_query: Refined query: {refined}")
|
| 394 |
return refined
|
| 395 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
class ReportGenerator:
|
| 397 |
def __init__(self):
|
| 398 |
pass
|
|
|
|
| 403 |
solution_content = re.sub(r'[\u2010\u2011\u2012\u2013\u2014\u2015]', "-", solution_content)
|
| 404 |
# Remove markdown hyperlink syntax: replace [text](link) with just text.
|
| 405 |
solution_content = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', solution_content)
|
| 406 |
+
# Convert markdown to HTML using the "extra" and "tables" extensions to support numbering and table syntax.
|
| 407 |
html_content = markdown.markdown(solution_content, extensions=['extra', 'tables'])
|
| 408 |
+
# Insert explicit page breaks before specific headings for main report sections.
|
| 409 |
html_content = html_content.replace("<h2>Table of Contents</h2>",
|
| 410 |
"<div style='page-break-before: always;'></div><h2>Table of Contents</h2>")
|
| 411 |
html_content = html_content.replace("<h2>Introduction</h2>",
|
|
|
|
| 414 |
"<div style='page-break-before: always;'></div><h2>Conclusion</h2>")
|
| 415 |
html_content = html_content.replace("<h2>References</h2>",
|
| 416 |
"<div style='page-break-before: always;'></div><h2>References</h2>")
|
| 417 |
+
# For the Surprise-Me section, ensure it starts on a new page.
|
| 418 |
html_content = html_content.replace("<h2>Surprise-Me Extension Report</h2>",
|
| 419 |
"<div style='page-break-before: always;'></div><h2>Surprise-Me Extension Report</h2>")
|
| 420 |
+
# Build header using metadata if provided.
|
| 421 |
date_str = datetime.now().strftime("%Y-%m-%d")
|
| 422 |
header = ""
|
| 423 |
if metadata:
|
|
|
|
| 425 |
<p>Author: {metadata.get('User name', 'N/A')}</p>
|
| 426 |
<p>Date: {metadata.get('Date', date_str)}</p>
|
| 427 |
<hr/>"""
|
| 428 |
+
# Build a complete HTML document with CSS.
|
| 429 |
full_html = f"""
|
| 430 |
<html>
|
| 431 |
<head>
|
| 432 |
<meta charset="utf-8" />
|
| 433 |
<style>
|
| 434 |
+
body {{ font-family: Helvetica, sans-serif; margin: 40px; }}
|
| 435 |
h1 {{ font-size: 24pt; margin-bottom: 12px; }}
|
| 436 |
h2 {{ font-size: 20pt; margin-bottom: 10px; }}
|
| 437 |
h3 {{ font-size: 18pt; margin-bottom: 8px; }}
|
| 438 |
p {{ font-size: 11pt; line-height: 1.5; margin-bottom: 10px; }}
|
| 439 |
+
ol {{ font-size: 11pt; margin-left: 20px; margin-top: 0; margin-bottom: 10px; line-height: 1.5; }}
|
| 440 |
+
ul {{ font-size: 11pt; margin-left: 20px; margin-top: 0; margin-bottom: 10px; line-height: 1.5; }}
|
| 441 |
hr {{ border: 1px solid #ccc; margin: 20px 0; }}
|
| 442 |
+
table {{
|
| 443 |
+
border-collapse: collapse;
|
| 444 |
+
width: 100%;
|
| 445 |
+
margin-bottom: 10px;
|
| 446 |
+
}}
|
| 447 |
+
th, td {{
|
| 448 |
+
border: 1px solid #ccc;
|
| 449 |
+
padding: 8px;
|
| 450 |
+
text-align: left;
|
| 451 |
+
}}
|
| 452 |
+
th {{
|
| 453 |
+
background-color: #f2f2f2;
|
| 454 |
+
}}
|
| 455 |
</style>
|
| 456 |
</head>
|
| 457 |
<body>
|
|
|
|
| 460 |
</body>
|
| 461 |
</html>
|
| 462 |
"""
|
| 463 |
+
# Generate PDF from HTML using xhtml2pdf (pisa)
|
| 464 |
pdf_buffer = io.BytesIO()
|
| 465 |
pisa_status = pisa.CreatePDF(full_html, dest=pdf_buffer)
|
| 466 |
if pisa_status.err:
|
|
|
|
| 481 |
final_report = compress_text(final_report, MAX_MESSAGE_LENGTH)
|
| 482 |
pdf_bytes = report_generator.generate_report_pdf_html(solution_content=final_report,
|
| 483 |
metadata=metadata)
|
| 484 |
+
# Create a temporary file for PDF download
|
| 485 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
|
| 486 |
tmp_file.write(pdf_bytes)
|
| 487 |
tmp_path = tmp_file.name
|
|
|
|
| 492 |
return f"Error generating report: {str(e)}", None
|
| 493 |
|
| 494 |
def extract_summary_from_crumbs(crumbs_list: list) -> str:
|
| 495 |
+
"""
|
| 496 |
+
Given a list of crumb records (each with 'url', 'summary', and 'full_content'),
|
| 497 |
+
extract and aggregate only the summary parts.
|
| 498 |
+
"""
|
| 499 |
aggregated = "\n".join([f"URL: {c['url']}\nSummary: {c['summary']}" for c in crumbs_list])
|
| 500 |
logging.info("extract_summary_from_crumbs: Aggregated crumb summary created.")
|
| 501 |
return aggregated
|
|
|
|
| 514 |
"Formulate this as a new research query that could lead to innovative insights.")
|
| 515 |
disruptive_query = openai_call(new_prompt, model="gpt-4o-mini", max_tokens_param=500)
|
| 516 |
logging.info(f"generate_surprise_report: Disruptive new query generated: {disruptive_query}")
|
| 517 |
+
|
| 518 |
+
# Generate tailored clarification questions for the disruptive query
|
| 519 |
clarifications_for_new = generate_tailored_questions(
|
| 520 |
os.getenv("OPENAI_API_KEY"),
|
| 521 |
+
disruptive_query + "\n\n IMPORTANT NOTE: in this specific iteration, generate also the responses for the questions asked (simulated)",
|
| 522 |
"", "", "", ""
|
| 523 |
)
|
| 524 |
logging.info(f"generate_surprise_report: Clarification questions for new query: {clarifications_for_new}")
|
| 525 |
+
|
| 526 |
+
# Run iterative deep research for the disruptive query
|
| 527 |
generator = iterative_deep_research_gen(
|
| 528 |
disruptive_query, reportstyle, breadth, depth, followup_clarifications,
|
| 529 |
include_domains, exclude_keywords, additional_clarifications,
|
|
|
|
| 538 |
appended_report = previous_report + "\n\n<div style='page-break-before: always;'></div>\n<h2>Surprise-Me Extension Report</h2>\n\n" + clarifications_for_new + "\n\n" + extension_report
|
| 539 |
return appended_report
|
| 540 |
|
| 541 |
+
def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: int, depth: int,
|
|
|
|
| 542 |
followup_clarifications: str,
|
| 543 |
include_domains: str,
|
| 544 |
exclude_keywords: str,
|
|
|
|
| 558 |
references_list = []
|
| 559 |
followup_suggestions = []
|
| 560 |
logging.info("iterative_deep_research_gen: Research started.")
|
| 561 |
+
for iteration in range(1, depth + 1):
|
| 562 |
+
process_log += f"\n--- Iteration {iteration} ---\n"
|
| 563 |
+
logging.info(f"iterative_deep_research_gen: Starting iteration {iteration}.")
|
| 564 |
+
combined_context = overall_context
|
| 565 |
+
if followup_suggestions:
|
| 566 |
+
# Deduplicate follow-up suggestions before adding them to context.
|
| 567 |
+
unique_suggestions = list(set(followup_suggestions))
|
| 568 |
+
combined_context += "\nFollow-up suggestions: " + ", ".join(unique_suggestions)
|
| 569 |
+
queries = generate_serp_queries(combined_context, breadth, depth, initial_query, selected_engines, results_per_query)
|
| 570 |
+
process_log += f"Generated queries: {queries}\n"
|
| 571 |
+
iteration_learnings = []
|
| 572 |
+
followup_suggestions = [] # reset for current iteration
|
| 573 |
+
for query_tuple in queries:
|
| 574 |
+
query_str, engine = query_tuple
|
| 575 |
+
mod_query = query_str
|
| 576 |
+
if include_domains.strip():
|
| 577 |
+
domains = [d.strip() for d in include_domains.split(",") if d.strip()]
|
| 578 |
+
domain_str = " OR ".join([f"site:{d}" for d in domains])
|
| 579 |
+
mod_query += f" ({domain_str})"
|
| 580 |
+
if exclude_keywords.strip():
|
| 581 |
+
for ex in [ex.strip() for ex in exclude_keywords.split(",") if ex.strip()]:
|
| 582 |
+
mod_query += f" -{ex}"
|
| 583 |
+
process_log += f"\nPerforming SERPAPI search with query: {mod_query} using engine: {engine}\n"
|
| 584 |
+
results = perform_serpapi_search(mod_query, engine, results_per_query)
|
| 585 |
+
|
| 586 |
+
# Instead of processing all results one-by-one, first filter them
|
| 587 |
+
filtered_results = filter_search_results(results, visited_urls, initial_query, followup_clarifications)
|
| 588 |
+
process_log += f"After filtering, {len(filtered_results)} results remain for processing.\n"
|
| 589 |
+
for res in filtered_results:
|
| 590 |
+
url = res.get("link", "")
|
| 591 |
+
if not url:
|
| 592 |
+
continue
|
| 593 |
+
content = ""
|
| 594 |
+
if url.lower().endswith(".pdf"):
|
| 595 |
+
content = process_pdf(url)
|
| 596 |
+
if "Error processing PDF" in content:
|
| 597 |
continue
|
| 598 |
+
process_log += f"Extracted PDF content from {url}\n"
|
| 599 |
+
else:
|
| 600 |
+
try:
|
| 601 |
+
response = requests.get(url, headers=HEADERS)
|
| 602 |
+
response.raise_for_status()
|
| 603 |
+
content = response.text
|
| 604 |
+
process_log += f"Extracted full page content from {url}\n"
|
| 605 |
+
except Exception as e:
|
| 606 |
+
logging.error(f"Error retrieving content from {url}: {e}")
|
| 607 |
+
process_log += f"Error retrieving content from {url}: {e}\n"
|
| 608 |
+
continue
|
| 609 |
+
analysis = analyze_with_gpt4o(initial_query, content)
|
| 610 |
+
analysis_summary = analysis.get("summary", "").strip()
|
| 611 |
+
process_log += (f"Summary: {analysis.get('summary')}, Follow-ups: {analysis.get('followups')}\n")
|
| 612 |
+
if not analysis_summary:
|
| 613 |
+
analysis_summary = content[:200] + "..." if len(content) > 200 else content
|
| 614 |
+
crumbs_list.append({
|
| 615 |
+
"url": url,
|
| 616 |
+
"summary": analysis_summary,
|
| 617 |
+
"full_content": content
|
| 618 |
+
})
|
| 619 |
+
if analysis.get("relevant", "no").lower() == "yes":
|
| 620 |
+
if url.startswith("http://") or url.startswith("https://"):
|
| 621 |
+
link_str = f" <a href='{url}'>[{ref_counter}]</a>"
|
| 622 |
else:
|
| 623 |
+
link_str = f" [{ref_counter}]"
|
| 624 |
+
summary_with_ref = analysis_summary + link_str
|
| 625 |
+
iteration_learnings.append(summary_with_ref)
|
| 626 |
+
references_list.append((ref_counter, url))
|
| 627 |
+
ref_counter += 1
|
| 628 |
+
if isinstance(analysis.get("followups"), list):
|
| 629 |
+
followup_suggestions.extend(analysis.get("followups"))
|
| 630 |
+
process_log += f"Iteration {iteration} extracted {len(iteration_learnings)} learnings.\n"
|
| 631 |
+
logging.info(f"iterative_deep_research_gen: Iteration {iteration} extracted {len(iteration_learnings)} learnings.")
|
| 632 |
+
overall_learnings.extend(iteration_learnings)
|
| 633 |
+
overall_context += f"\nIteration {iteration} learnings:\n" + "\n".join(iteration_learnings) + "\n"
|
| 634 |
+
if additional_clarifications.strip():
|
| 635 |
+
overall_context += "\nAdditional Clarifications from user: " + additional_clarifications.strip() + "\n"
|
| 636 |
+
process_log += "Appended additional clarifications to the context.\n"
|
| 637 |
+
progress_pct = int((iteration / depth) * 100)
|
| 638 |
+
yield (f"Progress: {progress_pct}%", None, None, None)
|
| 639 |
+
aggregated_crumbs = "\n".join([f"URL: {c['url']}\nSummary: {c['summary']}" for c in crumbs_list])
|
| 640 |
+
final_report = generate_final_report(initial_query, reportstyle, overall_learnings, list(visited_urls), aggregated_crumbs, references_list, pages=go_deeper)
|
| 641 |
+
alignment_assessment = assess_report_alignment(final_report, initial_query, followup_clarifications)
|
| 642 |
+
final_report += "\n\n\n\n\n**Report alignment assessment:**\n" + alignment_assessment
|
| 643 |
+
logging.info("iterative_deep_research_gen: Final report generated.")
|
| 644 |
+
yield ("", final_report, process_log, crumbs_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
|
| 646 |
def assess_report_alignment(report: str, initial_query: str, clarifications: str) -> str:
|
| 647 |
prompt = (
|
|
|
|
| 649 |
"and the clarification Q&A provided. Ensure that the report covers key points of the topic.\n\n"
|
| 650 |
"Initial Query: " + initial_query + "\nClarifications: " + clarifications + "\n\n"
|
| 651 |
"Research Report:\n" + report + "\n\n"
|
| 652 |
+
"Provide a short assessment in one paragraph on how well the report aligns with these requirements."
|
| 653 |
)
|
| 654 |
assessment = openai_call(prompt, model="gpt-3.5-turbo", max_tokens_param=200)
|
| 655 |
logging.info(f"assess_report_alignment: Assessment result: {assessment}")
|
| 656 |
return assessment
|
| 657 |
|
| 658 |
+
def run_deep_research(openai_api_key: str, serpapi_api_key: str, initial_query: str, reportstyle: str, breadth: int, depth: int,
|
| 659 |
+
followup_clarifications: str, include_domains: str,
|
| 660 |
+
exclude_keywords: str, additional_clarifications: str,
|
| 661 |
+
results_per_query: int, selected_engines, existing_crumbs: str, existing_report: str, existing_log: str,
|
| 662 |
+
pages: str, surprise_me: bool):
|
|
|
|
| 663 |
if not openai_api_key or not serpapi_api_key:
|
| 664 |
+
logging.error("run_deep_research: Invalid API keys provided.")
|
| 665 |
+
return "Please input valid API keys", "", "", "", ""
|
| 666 |
+
|
| 667 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
| 668 |
os.environ["SERPAPI_API_KEY"] = serpapi_api_key
|
| 669 |
|
|
|
|
| 675 |
if existing_crumbs:
|
| 676 |
extra_context += f"Existing Crumbs:\n{existing_crumbs}\n"
|
| 677 |
|
| 678 |
+
final_progress = ""
|
|
|
|
|
|
|
|
|
|
| 679 |
final_report = ""
|
| 680 |
+
final_process_log = ""
|
| 681 |
+
final_crumbs = ""
|
| 682 |
+
logging.info("run_deep_research: Starting deep research process.")
|
| 683 |
+
for progress, rep, proc_log, crumbs in iterative_deep_research_gen(
|
| 684 |
+
initial_query, reportstyle, breadth, depth, followup_clarifications,
|
| 685 |
+
include_domains, exclude_keywords, additional_clarifications,
|
| 686 |
+
extra_context, selected_engines, results_per_query, go_deeper=int(pages)):
|
| 687 |
if rep is None:
|
| 688 |
+
final_progress = progress
|
| 689 |
+
yield final_progress, None, None, None, None
|
| 690 |
else:
|
| 691 |
final_report = rep
|
| 692 |
+
final_process_log = proc_log
|
| 693 |
+
final_crumbs = crumbs
|
| 694 |
break
|
| 695 |
if surprise_me:
|
| 696 |
+
extended_report = generate_surprise_report(
|
| 697 |
+
final_report, final_crumbs, initial_query, reportstyle, breadth, depth,
|
| 698 |
+
followup_clarifications, include_domains, exclude_keywords, additional_clarifications,
|
| 699 |
+
results_per_query, selected_engines
|
| 700 |
+
)
|
| 701 |
final_report = extended_report
|
| 702 |
+
final_progress = "Progress: 100% (\"Surprise Me\" extension complete)"
|
| 703 |
+
logging.info("run_deep_research: Deep research process completed.")
|
| 704 |
+
yield (final_progress, final_report, final_report, final_process_log, final_crumbs)
|
| 705 |
|
| 706 |
def load_example(example_choice: str) -> str:
|
| 707 |
filename = ""
|
|
|
|
| 720 |
logging.error(f"load_example: Error loading {filename}: {e}")
|
| 721 |
return ""
|
| 722 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 723 |
def main():
|
| 724 |
custom_css = """
|
| 725 |
/* Overall container customization */
|
|
|
|
| 772 |
openai_api_key_input = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API Key here...", type="password")
|
| 773 |
serpapi_api_key_input = gr.Textbox(label="SERPAPI API Key", placeholder="Enter your SERPAPI API Key here...", type="password")
|
| 774 |
gr.Markdown("[Create OpenAI API Key](https://platform.openai.com/account/api-keys) | [Create SERPAPI API Key](https://serpapi.com/manage-api-key)")
|
| 775 |
+
gr.Markdown("You can check the open-source code - None of the user API keys are stored or logged.")
|
| 776 |
|
| 777 |
+
with gr.Accordion ("2] Research topic", open=False):
|
| 778 |
with gr.Row():
|
| 779 |
research_query = gr.Textbox(label="Research Query", placeholder="Enter your research query here...", lines=2, elem_id="research-query", scale=4)
|
| 780 |
refine_query_button = gr.Button("Refine my Query", scale=1)
|
| 781 |
|
| 782 |
with gr.Accordion("3] Q&A", open=False):
|
| 783 |
with gr.Row():
|
| 784 |
+
clarification_text = gr.Textbox(label="Clarification / Follow-Up Questions", placeholder="Tailored clarifying suggestions will appear here...", lines=6, scale = 4)
|
| 785 |
gen_followups = gr.Button("Generate Tailored Clarification Questions", scale=1)
|
| 786 |
|
| 787 |
with gr.Accordion("4] Search Parameters", open=False):
|
|
|
|
| 822 |
with gr.Accordion("5] Report", open=False, elem_classes="folder"):
|
| 823 |
progress_display = gr.Markdown("", elem_id="progress-display")
|
| 824 |
run_btn = gr.Button("Generate report")
|
| 825 |
+
final_report = gr.Markdown(label="Final Report (Markdown)", height = 800, min_height = 50)
|
| 826 |
with gr.Accordion("Generate PDF", open=False, elem_classes="folder"):
|
| 827 |
with gr.Column():
|
| 828 |
query_name = gr.Textbox(label="Query name", placeholder="Enter query name...", lines=1)
|