Guiyom commited on
Commit
18cc689
·
verified ·
1 Parent(s): 3d57a74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -25
app.py CHANGED
@@ -110,9 +110,11 @@ G --> H[Performance Evaluation - 45% Speed Improvement, 35% Risk Profiling, 50%
110
  - Take a deep breath, think step by step and think it well.
111
 
112
  // Examples
 
 
113
  -- flowchart --
114
  Important:
115
- - If the flow is "broader" than deep, choose LR (Left Right)
116
  - If the flow is "deeper" than broad (>3 levels), choose TD (Top Down)
117
 
118
  Top Down:
@@ -393,6 +395,12 @@ def openai_call(prompt: str, messages: list = None, model: str = "o3-mini",
393
  return err_msg
394
 
395
  def analyze_with_gpt4o(query: str, snippet: str, breadth: int, temperature: float = 0.7, max_tokens: int = 8000) -> dict:
 
 
 
 
 
 
396
  client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
397
  prompt = (f"""Analyze the following content from a query result:
398
 
@@ -423,6 +431,9 @@ Note: General Optimization Guidelines:
423
  For example: "Artificial intelligence" AND (mathematics OR geometry) -algebra,science AND history AND mathematics,...
424
  Return the result as a JSON object with the keys 'relevant', 'structure', and 'followups'. The 'structure' value should itself be a JSON object with keys 'Key Facts', 'Key Figures', 'Key Arguments', 'Key Quotes' and 'Summary'.
425
 
 
 
 
426
  Proceed."""
427
  )
428
  try:
@@ -533,7 +544,17 @@ def generate_final_report(initial_query: str, context: str, reportstyle: str, le
533
  word_count = pages * 500
534
  prompt = (f"""
535
  // Instructions:
536
- Using the following learnings and merged reference details from a deep research process on:
 
 
 
 
 
 
 
 
 
 
537
  '{initial_query}'
538
  Taking also into consideration the context:
539
  {context}
@@ -854,7 +875,7 @@ def generate_tailored_questions(openai_api_key: str, query: str, existing_qa: st
854
  def backup_fields(research_query: str,
855
  include_domains: str, exclude_keywords: str, additional_clarifications: str,
856
  selected_engines, results_per_query, breadth, depth, clarification_text: str,
857
- existing_report: str, existing_log: str, crumbs_box: str, final_report: str) -> str:
858
  data = {
859
  "openai_api_key": "",
860
  "serpapi_api_key": "",
@@ -870,7 +891,8 @@ def backup_fields(research_query: str,
870
  "existing_report": existing_report,
871
  "existing_log": existing_log,
872
  "crumbs_box": crumbs_box,
873
- "final_report": final_report
 
874
  }
875
  backup_json = json.dumps(data, indent=2)
876
  logging.info(f"backup_fields: Data backed up: {backup_json}")
@@ -894,10 +916,11 @@ def load_fields(backup_json: str):
894
  data.get("existing_report", ""),
895
  data.get("existing_log", ""),
896
  data.get("crumbs_box", ""),
897
- data.get("final_report", ""))
 
898
  except Exception as e:
899
  logging.error(f"load_fields error: {e}")
900
- return ("", "", "", "", "", "", [], 10, 4, 2, "", "", "", "", "")
901
 
902
  def refine_query(query: str, openai_api_key: str) -> str:
903
  os.environ["OPENAI_API_KEY"] = openai_api_key
@@ -1192,7 +1215,7 @@ def generate_surprise_report(previous_report: str, crumbs_list: list, initial_qu
1192
  generator = iterative_deep_research_gen(
1193
  disruptive_query, reportstyle, breadth, depth, followup_clarifications,
1194
  include_domains, exclude_keywords, additional_clarifications,
1195
- extra_context="", selected_engines=selected_engines, results_per_query=results_per_query, go_deeper=1
1196
  )
1197
  extension_report = ""
1198
  for progress, rep, proc_log, new_crumbs in generator:
@@ -1203,6 +1226,32 @@ def generate_surprise_report(previous_report: str, crumbs_list: list, initial_qu
1203
  appended_report = previous_report + "\n\n<div style='page-break-before: always;'></div>\n<h2>Surprise-Me Extension Report</h2>\n\n" + clarifications_for_new + "\n\n" + extension_report
1204
  return appended_report
1205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1206
  def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: int, depth: int,
1207
  followup_clarifications: str,
1208
  include_domains: str,
@@ -1211,6 +1260,7 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
1211
  extra_context: str = "",
1212
  selected_engines=None,
1213
  results_per_query: int = 10,
 
1214
  go_deeper: int = 8):
1215
  overall_context = extra_context + f"Initial Query: {initial_query}\n"
1216
  if followup_clarifications.strip():
@@ -1218,6 +1268,12 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
1218
  process_log = "Starting research with context:\n" + overall_context + "\n"
1219
  overall_learnings = []
1220
  visited_urls = set()
 
 
 
 
 
 
1221
  crumbs_list = []
1222
  ref_counter = 1
1223
  references_list = []
@@ -1232,10 +1288,20 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
1232
  unique_suggestions = list(set(followup_suggestions))
1233
  combined_context += "\nFollow-up suggestions: " + ", ".join(unique_suggestions)
1234
  queries = generate_serp_queries(combined_context, breadth, depth, initial_query, selected_engines, results_per_query)
1235
- process_log += f"Generated queries: {queries}\n"
 
 
 
 
 
 
 
 
 
 
1236
  iteration_learnings = []
1237
  followup_suggestions = [] # reset for current iteration
1238
- for query_tuple in queries:
1239
  query_str, engine = query_tuple
1240
  mod_query = query_str
1241
  if include_domains.strip():
@@ -1272,9 +1338,13 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
1272
  logging.error(f"Error retrieving content from {url}: {e}")
1273
  process_log += f"Error retrieving content from {url}: {e}\n"
1274
  continue
1275
-
1276
- # Clean the raw content to obtain concise text
1277
- cleaned_content = clean_content(raw_content)
 
 
 
 
1278
 
1279
  # Analyze the cleaned content with GPT-4o-mini
1280
  analysis = analyze_with_gpt4o(initial_query, cleaned_content, breadth)
@@ -1317,7 +1387,17 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
1317
  process_log += "Appended additional clarifications to the context.\n"
1318
  progress_pct = int((iteration / depth) * 100)
1319
  yield (f"Progress: {progress_pct}%", None, None, None)
1320
- aggregated_crumbs = "\n\n".join([f"Title: {c.get('title', 'No Title')}\nURL: {c['url']}\nSummary: {c['summary']}" for c in crumbs_list])
 
 
 
 
 
 
 
 
 
 
1321
  final_report = generate_final_report(initial_query, combined_context, reportstyle, overall_learnings, list(visited_urls), aggregated_crumbs, references_list, pages=go_deeper)
1322
 
1323
  # --- NEW STEP: Post-process final_report to replace visual and focus placeholders ---
@@ -1330,8 +1410,49 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
1330
  f"<p>---------</p><p><b>Report alignment assessment:</b> {alignment_assessment}</p> </div> </body></html>"
1331
  )
1332
  logging.info("iterative_deep_research_gen: Final report generated.")
1333
- yield ("", final_report, process_log, crumbs_list)
 
 
 
1334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1335
  def assess_report_alignment(report: str, initial_query: str, clarifications: str) -> str:
1336
  prompt = (
1337
  "Please assess the following research report in terms of its alignment with the initial user request "
@@ -1349,7 +1470,7 @@ def run_deep_research(openai_api_key: str, serpapi_api_key: str, initial_query:
1349
  followup_clarifications: str, include_domains: str,
1350
  exclude_keywords: str, additional_clarifications: str,
1351
  results_per_query: int, selected_engines, existing_crumbs: str, existing_report: str, existing_log: str,
1352
- pages: str, surprise_me: bool):
1353
  if not openai_api_key or not serpapi_api_key:
1354
  logging.error("run_deep_research: Invalid API keys provided.")
1355
  return "Please input valid API keys", "", "", "", ""
@@ -1370,13 +1491,13 @@ def run_deep_research(openai_api_key: str, serpapi_api_key: str, initial_query:
1370
  final_process_log = ""
1371
  final_crumbs = ""
1372
  logging.info("run_deep_research: Starting deep research process.")
1373
- for progress, rep, proc_log, crumbs in iterative_deep_research_gen(
1374
  initial_query, reportstyle, breadth, depth, followup_clarifications,
1375
  include_domains, exclude_keywords, additional_clarifications,
1376
- extra_context, selected_engines, results_per_query, go_deeper=int(pages)):
1377
  if rep is None:
1378
  final_progress = progress
1379
- yield final_progress, None, None, None, None
1380
  else:
1381
  final_report = rep
1382
  final_process_log = proc_log
@@ -1391,7 +1512,7 @@ def run_deep_research(openai_api_key: str, serpapi_api_key: str, initial_query:
1391
  final_report = extended_report
1392
  final_progress = "Progress: 100% (\"Surprise Me\" extension complete)"
1393
  logging.info("run_deep_research: Deep research process completed.")
1394
- yield (final_progress, final_report, final_report, final_process_log, final_crumbs)
1395
 
1396
  def load_example(example_choice: str) -> str:
1397
  filename = ""
@@ -1521,10 +1642,11 @@ def main():
1521
  report_file = gr.File(label="Download Report", visible=False, interactive=False, file_types=[".pdf"])
1522
  generate_button = gr.Button("Generate Report")
1523
 
1524
- with gr.Accordion("6] Extra Context (Crumbs, Existing Report & Log)", open=False):
1525
- existing_report = gr.Textbox(label="Existing Report (if any)", placeholder="Paste previously generated report here...", lines=4)
1526
- existing_log = gr.Textbox(label="Existing Process Log (if any)", placeholder="Paste previously generated log here...", lines=4)
1527
- crumbs_box = gr.Textbox(label="Existing Crumbs (All scraped sources, JSON)", placeholder="Paste existing crumbs JSON here...", lines=4)
 
1528
 
1529
  with gr.Accordion("7] Backup / Restore Fields", open=False):
1530
  backup_text = gr.Textbox(label="Backup JSON", placeholder="Backup output will appear here. You can also paste JSON here to load fields.", lines=6, interactive=True)
@@ -1550,9 +1672,9 @@ def main():
1550
  run_btn.click(
1551
  fn=run_deep_research,
1552
  inputs=[openai_api_key_input, serpapi_api_key_input, research_query, reportstyle, breadth, depth, clarification_text, include_domains, exclude_keywords,
1553
- additional_clarifications, results_per_query, selected_engines, existing_report, existing_log, crumbs_box,
1554
  pages_dropdown, surprise_me_checkbox],
1555
- outputs=[progress_display, final_report, existing_report, existing_log, crumbs_box],
1556
  show_progress=True,
1557
  api_name="deep_research"
1558
  )
 
110
  - Take a deep breath, think step by step and think it well.
111
 
112
  // Examples
113
+ Note: Pay attention for each example to what type of parenthesis / bracket is used and respect it scrupulously
114
+
115
  -- flowchart --
116
  Important:
117
+ - If the flow is "broader" than deep (>3 branches at the same level), choose LR (Left Right)
118
  - If the flow is "deeper" than broad (>3 levels), choose TD (Top Down)
119
 
120
  Top Down:
 
395
  return err_msg
396
 
397
  def analyze_with_gpt4o(query: str, snippet: str, breadth: int, temperature: float = 0.7, max_tokens: int = 8000) -> dict:
398
+ # measure snippet length
399
+ snippet_words = len(snippet.split())
400
+ # decide a proportional max tokens (cap at 3000 for example)
401
+ # e.g. 1 token ~ ~0.75 words, so we do something simplistic:
402
+ dynamic_tokens = min(3000, max(250, int(snippet_words * 0.5)))
403
+
404
  client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
405
  prompt = (f"""Analyze the following content from a query result:
406
 
 
431
  For example: "Artificial intelligence" AND (mathematics OR geometry) -algebra,science AND history AND mathematics,...
432
  Return the result as a JSON object with the keys 'relevant', 'structure', and 'followups'. The 'structure' value should itself be a JSON object with keys 'Key Facts', 'Key Figures', 'Key Arguments', 'Key Quotes' and 'Summary'.
433
 
434
+ 4. Ensure that the summary length and level of detail is proportional to the source length.
435
+ Source length: {snippet_words} words. You may produce a more detailed summary if the text is long.
436
+
437
  Proceed."""
438
  )
439
  try:
 
544
  word_count = pages * 500
545
  prompt = (f"""
546
  // Instructions:
547
+ - We want to incorporate as many relevant numbers, statistics, factual references, quotes from the sources,
548
+ - Explicit mentions of organizations, tools, projects, or people from the crumb data as possible.
549
+ - In your writing, do the following:
550
+ 1. Integrate numbers, quotes, and factual references systematically.
551
+ 2. Whenever you mention a figure or quote, add an inline reference [x] matching its source from the references.
552
+ 3. Specifically name relevant organizations, tools, project names, and people encountered in the crumbs or learnings.
553
+ 4. This is for academic purposes, so thorough citations and referencing are essential.
554
+ Note: Do not be shy to use the names (organizations, people, project, application, tools...) mentioned in the sources, we need this for academic correctness
555
+
556
+ // Sources
557
+ Use the following learnings and merged reference details from a deep research process on:
558
  '{initial_query}'
559
  Taking also into consideration the context:
560
  {context}
 
875
  def backup_fields(research_query: str,
876
  include_domains: str, exclude_keywords: str, additional_clarifications: str,
877
  selected_engines, results_per_query, breadth, depth, clarification_text: str,
878
+ existing_report: str, existing_log: str, crumbs_box: str, final_report: str, existing_queries_box: str) -> str:
879
  data = {
880
  "openai_api_key": "",
881
  "serpapi_api_key": "",
 
891
  "existing_report": existing_report,
892
  "existing_log": existing_log,
893
  "crumbs_box": crumbs_box,
894
+ "final_report": final_report,
895
+ "existing_queries": existing_queries_box
896
  }
897
  backup_json = json.dumps(data, indent=2)
898
  logging.info(f"backup_fields: Data backed up: {backup_json}")
 
916
  data.get("existing_report", ""),
917
  data.get("existing_log", ""),
918
  data.get("crumbs_box", ""),
919
+ data.get("final_report", ""),
920
+ data.get("existing_queries",""))
921
  except Exception as e:
922
  logging.error(f"load_fields error: {e}")
923
+ return ("", "", "", "", "", "", [], 10, 4, 2, "", "", "", "", "", "")
924
 
925
  def refine_query(query: str, openai_api_key: str) -> str:
926
  os.environ["OPENAI_API_KEY"] = openai_api_key
 
1215
  generator = iterative_deep_research_gen(
1216
  disruptive_query, reportstyle, breadth, depth, followup_clarifications,
1217
  include_domains, exclude_keywords, additional_clarifications,
1218
+ extra_context="", selected_engines=selected_engines, results_per_query=results_per_query, existing_queries, go_deeper=1
1219
  )
1220
  extension_report = ""
1221
  for progress, rep, proc_log, new_crumbs in generator:
 
1226
  appended_report = previous_report + "\n\n<div style='page-break-before: always;'></div>\n<h2>Surprise-Me Extension Report</h2>\n\n" + clarifications_for_new + "\n\n" + extension_report
1227
  return appended_report
1228
 
1229
+ def extract_structured_insights(html_text: str) -> str:
1230
+ """
1231
+ Extract only facts, figures, arguments, and quotes in a concise manner.
1232
+ Use BeautifulSoup to parse and remove anything not relevant to these categories.
1233
+ This function returns a short text suitable for summarization by the LLM.
1234
+ """
1235
+ soup = BeautifulSoup(html_text, "html.parser")
1236
+
1237
+ # We can decide to keep paragraphs that contain digits (numbers),
1238
+ # or words like "claim", "argument", "quote", etc. This is just an example heuristic.
1239
+ paragraphs = soup.find_all('p')
1240
+ curated_excerpts = []
1241
+ for p in paragraphs:
1242
+ text = p.get_text().strip()
1243
+ # If it has digits or certain keywords, we keep it
1244
+ if re.search(r'\d+', text) or re.search(r'\bargument\b|\bfact\b|\bfigure\b|\bstudy\b|\bquote\b', text, re.IGNORECASE):
1245
+ curated_excerpts.append(text)
1246
+
1247
+ # Combine them into a shorter snippet
1248
+ snippet = "\n".join(curated_excerpts)
1249
+ # If snippet is too short, fallback to the entire cleaned text
1250
+ if len(snippet.split()) < 30:
1251
+ snippet = clean_content(html_text)[:2000] # or some fallback length
1252
+
1253
+ return snippet
1254
+
1255
  def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: int, depth: int,
1256
  followup_clarifications: str,
1257
  include_domains: str,
 
1260
  extra_context: str = "",
1261
  selected_engines=None,
1262
  results_per_query: int = 10,
1263
+ existing_queries: str,
1264
  go_deeper: int = 8):
1265
  overall_context = extra_context + f"Initial Query: {initial_query}\n"
1266
  if followup_clarifications.strip():
 
1268
  process_log = "Starting research with context:\n" + overall_context + "\n"
1269
  overall_learnings = []
1270
  visited_urls = set()
1271
+ # Parse previously processed queries from existing_queries if provided
1272
+ processed_queries = set()
1273
+ for q_line in existing_queries.splitlines():
1274
+ q_line = q_line.strip()
1275
+ if q_line:
1276
+ processed_queries.add(q_line)
1277
  crumbs_list = []
1278
  ref_counter = 1
1279
  references_list = []
 
1288
  unique_suggestions = list(set(followup_suggestions))
1289
  combined_context += "\nFollow-up suggestions: " + ", ".join(unique_suggestions)
1290
  queries = generate_serp_queries(combined_context, breadth, depth, initial_query, selected_engines, results_per_query)
1291
+
1292
+ # ===================================================================
1293
+ # Skip queries already in processed_queries
1294
+ filtered_query_tuples = []
1295
+ for q_tuple in queries:
1296
+ q_text, eng = q_tuple
1297
+ if q_text not in processed_queries:
1298
+ filtered_query_tuples.append(q_tuple)
1299
+ processed_queries.add(q_text) # remember we've processed it
1300
+ # ===================================================================
1301
+ process_log += f"\nWill run {len(filtered_query_tuples)} new queries this iteration instead of {len(queries)} total.\n"
1302
  iteration_learnings = []
1303
  followup_suggestions = [] # reset for current iteration
1304
+ for query_tuple in filtered_query_tuples:
1305
  query_str, engine = query_tuple
1306
  mod_query = query_str
1307
  if include_domains.strip():
 
1338
  logging.error(f"Error retrieving content from {url}: {e}")
1339
  process_log += f"Error retrieving content from {url}: {e}\n"
1340
  continue
1341
+
1342
+ # 1) Clean and do minimal parse
1343
+ cleaned_html = clean_content(raw_content)
1344
+ # 2) Extract structured data
1345
+ semantically_rich_snippet = extract_structured_insights(cleaned_html)
1346
+ # 3) Summarize with LLM
1347
+ analysis = analyze_with_gpt4o(initial_query, semantically_rich_snippet, breadth)
1348
 
1349
  # Analyze the cleaned content with GPT-4o-mini
1350
  analysis = analyze_with_gpt4o(initial_query, cleaned_content, breadth)
 
1387
  process_log += "Appended additional clarifications to the context.\n"
1388
  progress_pct = int((iteration / depth) * 100)
1389
  yield (f"Progress: {progress_pct}%", None, None, None)
1390
+
1391
+ # chunk and filter all crumbs if breadth>3 and depth>2
1392
+ filtered_crumbs_list = crumbs_list
1393
+ if breadth > 3 and depth > 2:
1394
+ filtered_crumbs_list = filter_crumbs_in_batches(crumbs_list, initial_query, followup_clarifications)
1395
+
1396
+ # Now build aggregated crumb text from filtered_crumbs_list only
1397
+ aggregated_crumbs = "\n\n".join([
1398
+ f"Title: {c.get('title','No Title')}\nURL: {c['url']}\nSummary: {c['summary']}"
1399
+ for c in filtered_crumbs_list
1400
+ ])
1401
  final_report = generate_final_report(initial_query, combined_context, reportstyle, overall_learnings, list(visited_urls), aggregated_crumbs, references_list, pages=go_deeper)
1402
 
1403
  # --- NEW STEP: Post-process final_report to replace visual and focus placeholders ---
 
1410
  f"<p>---------</p><p><b>Report alignment assessment:</b> {alignment_assessment}</p> </div> </body></html>"
1411
  )
1412
  logging.info("iterative_deep_research_gen: Final report generated.")
1413
+ # We convert processed_queries to a string suitable for storing
1414
+ all_processed_queries_str = "\n".join(sorted(processed_queries))
1415
+
1416
+ yield ("", final_report, process_log, crumbs_list, all_processed_queries_str)
1417
 
1418
+ def filter_crumbs_in_batches(crumbs_list: list, initial_query: str, clarifications: str) -> list:
1419
+ """
1420
+ Splits crumbs into batches of 20, calls an LLM to decide keep/ignore each crumb.
1421
+ Returns the final list of accepted crumbs.
1422
+ """
1423
+ accepted = []
1424
+ batch_size = 20
1425
+ for i in range(0, len(crumbs_list), batch_size):
1426
+ batch = crumbs_list[i:i+batch_size]
1427
+ # Build a prompt describing each crumb
1428
+ prompt = "We have a set of crumbs. For each crumb, decide if it significantly adds new facts, figures, references, or quotes.\n"
1429
+ prompt += "Mark 'yes' if it is valuable for the final report, otherwise 'no'. Output JSON.\n\n"
1430
+ listing = []
1431
+ for idx, c in enumerate(batch):
1432
+ snippet_for_prompt = c["summary"][:500] # short snippet
1433
+ listing.append(f"Crumb {idx}: {snippet_for_prompt}")
1434
+ prompt += "\n".join(listing)
1435
+
1436
+ prompt += """
1437
+ Return a JSON object with structure:
1438
+ {
1439
+ "0": "yes" or "no",
1440
+ "1": "yes" or "no",
1441
+ ...
1442
+ }
1443
+ """
1444
+ decision_str = openai_call(prompt, model="o3-mini", max_tokens_param=1500)
1445
+ # parse JSON
1446
+ try:
1447
+ decisions = json.loads(decision_str)
1448
+ except:
1449
+ decisions = {}
1450
+ for idx, c in enumerate(batch):
1451
+ d = decisions.get(str(idx), "no").lower()
1452
+ if d == "yes":
1453
+ accepted.append(c)
1454
+ return accepted
1455
+
1456
  def assess_report_alignment(report: str, initial_query: str, clarifications: str) -> str:
1457
  prompt = (
1458
  "Please assess the following research report in terms of its alignment with the initial user request "
 
1470
  followup_clarifications: str, include_domains: str,
1471
  exclude_keywords: str, additional_clarifications: str,
1472
  results_per_query: int, selected_engines, existing_crumbs: str, existing_report: str, existing_log: str,
1473
+ existing_queries: str, pages: str, surprise_me: bool):
1474
  if not openai_api_key or not serpapi_api_key:
1475
  logging.error("run_deep_research: Invalid API keys provided.")
1476
  return "Please input valid API keys", "", "", "", ""
 
1491
  final_process_log = ""
1492
  final_crumbs = ""
1493
  logging.info("run_deep_research: Starting deep research process.")
1494
+ for progress, rep, proc_log, crumbs, all_processed_queries_str in iterative_deep_research_gen(
1495
  initial_query, reportstyle, breadth, depth, followup_clarifications,
1496
  include_domains, exclude_keywords, additional_clarifications,
1497
+ extra_context, selected_engines, results_per_query, existing_queries, go_deeper=int(pages)):
1498
  if rep is None:
1499
  final_progress = progress
1500
+ yield final_progress, None, None, None, None, all_processed_queries_str
1501
  else:
1502
  final_report = rep
1503
  final_process_log = proc_log
 
1512
  final_report = extended_report
1513
  final_progress = "Progress: 100% (\"Surprise Me\" extension complete)"
1514
  logging.info("run_deep_research: Deep research process completed.")
1515
+ yield (final_progress, final_report, final_report, final_process_log, final_crumbs, all_processed_queries_str)
1516
 
1517
  def load_example(example_choice: str) -> str:
1518
  filename = ""
 
1642
  report_file = gr.File(label="Download Report", visible=False, interactive=False, file_types=[".pdf"])
1643
  generate_button = gr.Button("Generate Report")
1644
 
1645
+ with gr.Accordion("6] Extra Context (Crumbs, Existing Report & Log, Processed Queries)", open=False):
1646
+ existing_report = gr.Textbox(label="Existing Report (if any)", ...)
1647
+ existing_log = gr.Textbox(label="Existing Process Log (if any)", ...)
1648
+ crumbs_box = gr.Textbox(label="Existing Crumbs (All sources, JSON)", ...)
1649
+ existing_queries_box = gr.Textbox(label="Existing Queries (processed queries)", placeholder="Paste processed queries here...", lines=4)
1650
 
1651
  with gr.Accordion("7] Backup / Restore Fields", open=False):
1652
  backup_text = gr.Textbox(label="Backup JSON", placeholder="Backup output will appear here. You can also paste JSON here to load fields.", lines=6, interactive=True)
 
1672
  run_btn.click(
1673
  fn=run_deep_research,
1674
  inputs=[openai_api_key_input, serpapi_api_key_input, research_query, reportstyle, breadth, depth, clarification_text, include_domains, exclude_keywords,
1675
+ additional_clarifications, results_per_query, selected_engines, existing_report, existing_log, existing_queries, crumbs_box,
1676
  pages_dropdown, surprise_me_checkbox],
1677
+ outputs=[progress_display, final_report, existing_report, existing_log, crumbs_box, existing_queries_box],
1678
  show_progress=True,
1679
  api_name="deep_research"
1680
  )