Guiyom commited on
Commit
48496cb
·
verified ·
1 Parent(s): 6dbee56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -36
app.py CHANGED
@@ -1422,24 +1422,17 @@ Output only the translated query."""
1422
  logging.info(f"make_multilingual_query: Transformed query: {finalquery}")
1423
  return finalquery
1424
 
1425
- def generate_query_tree(context: str, breadth: int, depth: int) -> list:
1426
- """
1427
- Generates a list of queries relevant to the provided context.
1428
-
1429
- Args:
1430
- context: The context string for which to generate queries.
1431
- breadth: The maximum number of queries to generate.
1432
- depth: This parameter is not used in the current implementation,
1433
- but it is kept for potential future extensions of this function.
1434
-
1435
- Returns:
1436
- A list of query strings.
1437
- """
1438
-
1439
  prompt = f"""
1440
- Generate a list of {breadth} search queries relevant to the following context:
 
1441
 
1442
- "{context}"
 
1443
 
1444
  // Requirements
1445
  - The queries should be suitable for a search engine.
@@ -1449,22 +1442,24 @@ def generate_query_tree(context: str, breadth: int, depth: int) -> list:
1449
 
1450
  // IMPORTANT:
1451
  - Return only valid JSON without any markdown code fences (```) or mention of json
 
1452
 
1453
  // EXAMPLE (if breadth = 4):
1454
 
1455
  {{
1456
  "queries": [
1457
- (Artificial Intelligence OR data science) AND mathematics,
1458
- (geometry OR algebra) AND research AND machine learning,
1459
- (calculus OR "differential equations") AND "AI applications",
1460
- "Statistics" AND "data analysis" AND "machine learning algorithms"
 
1461
  ]
1462
  }}
1463
 
1464
  Now generate the result.
1465
  """
1466
  messages = [] # Use prompt directly in openai_call, messages is handled there.
1467
- llm_response = openai_call(prompt=prompt, messages=messages)
1468
  cleaned_response = llm_response.strip()
1469
  if cleaned_response.startswith("```"):
1470
  cleaned_response = cleaned_response.strip("`").strip()
@@ -1481,7 +1476,7 @@ Now generate the result.
1481
 
1482
  def generate_serp_queries(context: str, breadth: int, depth: int, initial_query: str,
1483
  selected_engines=None, results_per_query: int = 10) -> list:
1484
- queries = generate_query_tree(context, breadth, depth)
1485
  prompt = f"""The research topic is: "{initial_query}".
1486
  Based on this query and the overall context: "{context}", suggest one or several languages (other than English) that might be relevant.
1487
 
@@ -1505,7 +1500,7 @@ google,google_scholar,google_trends,google_news,google_ai_overview,bing,bing_new
1505
  Which are most relevant? Output a comma separated list (e.g., "google,baidu").
1506
  If none are found, output "google".
1507
  """
1508
- identified_engines = openai_call(prompt_engines, model="gpt-4o-mini", max_tokens_param=20)
1509
  # Split and strip engines; if result is empty (or all empty strings), default to ["google"]
1510
  selected_engines = [e.strip() for e in identified_engines.split(",") if e.strip()]
1511
  if not selected_engines:
@@ -1530,7 +1525,7 @@ def generate_tailored_questions(openai_api_key: str, query: str, existing_qa: st
1530
  prompt = (
1531
  f"Context:\nSearch Topic: {query}\nExisting Clarification Q&A: {existing_qa}\nExisting Report: {existing_report}\n"
1532
  f"Existing Process Log: {existing_log}\nCrumbs: {crumbs}\n\n"
1533
- "Based on the above, generate FIVE new clarification questions, each phrased as either 'Would you like to know more about ...?' or open questions like 'What objectives would you like to reach with this search?' or 'Are there specific aspects you would like to search in particular such as xxx, yyy, ... (all related to the topic)?' "
1534
  "Each question should be followed by 'Response:' then 2 empty lines."
1535
  )
1536
  new_questions = openai_call(prompt, model="gpt-4o", max_tokens_param=300)
@@ -1867,7 +1862,7 @@ def generate_surprise_report(previous_report: str, crumbs_list: list, initial_qu
1867
  f"And the initial query: '{initial_query}',\n\n"
1868
  "please propose a disruptive, experimental, and ambitious new research hypothesis that goes beyond conventional boundaries (Overton-window) related to the topic. "
1869
  "Formulate this as a new research query that could lead to innovative insights.")
1870
- disruptive_query = openai_call(new_prompt, model="gpt-4o-mini", max_tokens_param=500)
1871
  logging.info(f"generate_surprise_report: Disruptive new query generated: {disruptive_query}")
1872
 
1873
  # Generate tailored clarification questions for the disruptive query
@@ -2097,8 +2092,10 @@ def filter_crumbs_in_batches(crumbs_list: list, initial_query: str, clarificatio
2097
  for i in range(0, len(crumbs_list), batch_size):
2098
  batch = crumbs_list[i:i+batch_size]
2099
  # Build a prompt describing each crumb
2100
- prompt = "We have a set of crumbs. For each crumb, decide if it significantly adds new facts, figures, references, or quotes. Don't be too selective, we still need to keep between 10 and 15 in each batch.\n"
2101
- prompt += "Mark 'yes' if it is valuable for the final report, otherwise 'no'. Output JSON.\n\n"
 
 
2102
  listing = []
2103
  for idx, c in enumerate(batch):
2104
  snippet_for_prompt = c["summary"][:500] # short snippet
@@ -2126,15 +2123,23 @@ Return a JSON object with structure:
2126
  return accepted
2127
 
2128
  def assess_report_alignment(report: str, initial_query: str, clarifications: str) -> str:
2129
- prompt = (
2130
- "Please assess the following research report in terms of its alignment with the initial user request "
2131
- "and the clarification Q&A provided. Ensure that the report covers key points of the topic.\n\n"
2132
- "Initial Query: " + initial_query + "\nClarifications: " + clarifications + "\n\n"
2133
- "Research Report:\n" + report + "\n\n"
2134
- "Provide a short assessment in one paragraph on how well the report aligns with these requirements, you can put in html tags for bold or italic."
2135
- "The output will be integrated in a html page for rendering"
2136
- )
2137
- assessment = openai_call(prompt, model="gpt-4o", max_tokens_param=400)
 
 
 
 
 
 
 
 
2138
  logging.info(f"assess_report_alignment: Assessment result: {assessment}")
2139
  return assessment
2140
 
 
1422
  logging.info(f"make_multilingual_query: Transformed query: {finalquery}")
1423
  return finalquery
1424
 
1425
+ def generate_query_tree(context: str, breadth: int, depth: int, selected_engines: list) -> list:
1426
+ if selected_engines == None:
1427
+ list_engines = "all relevant search engines"
1428
+ else:
1429
+ list_engines = ','.join(map(str, selected_engines))
 
 
 
 
 
 
 
 
 
1430
  prompt = f"""
1431
+ Generate a list of {breadth} search queries relevant to the following context:
1432
+ {context}
1433
 
1434
+ Thet list of engines selected is the following:
1435
+ {list_engines}
1436
 
1437
  // Requirements
1438
  - The queries should be suitable for a search engine.
 
1442
 
1443
  // IMPORTANT:
1444
  - Return only valid JSON without any markdown code fences (```) or mention of json
1445
+ - Think tactically on what would be the best query to get the most relevant search results for the query
1446
 
1447
  // EXAMPLE (if breadth = 4):
1448
 
1449
  {{
1450
  "queries": [
1451
+ '(Artificial Intelligence OR data science) AND mathematics',
1452
+ '(geometry OR algebra) AND research AND machine learning',
1453
+ '(calculus OR "differential equations") AND "Artificial intelligence"',
1454
+ 'filetype:pdf "artificial intelligence" OR "machine learning"',
1455
+ 'Statistics AND "data analysis" AND "machine learning"'
1456
  ]
1457
  }}
1458
 
1459
  Now generate the result.
1460
  """
1461
  messages = [] # Use prompt directly in openai_call, messages is handled there.
1462
+ llm_response = openai_call(prompt=prompt, model="o3-mini", messages=messages)
1463
  cleaned_response = llm_response.strip()
1464
  if cleaned_response.startswith("```"):
1465
  cleaned_response = cleaned_response.strip("`").strip()
 
1476
 
1477
  def generate_serp_queries(context: str, breadth: int, depth: int, initial_query: str,
1478
  selected_engines=None, results_per_query: int = 10) -> list:
1479
+ queries = generate_query_tree(context, breadth, depth, selected_engines)
1480
  prompt = f"""The research topic is: "{initial_query}".
1481
  Based on this query and the overall context: "{context}", suggest one or several languages (other than English) that might be relevant.
1482
 
 
1500
  Which are most relevant? Output a comma separated list (e.g., "google,baidu").
1501
  If none are found, output "google".
1502
  """
1503
+ identified_engines = openai_call(prompt_engines, model="o3-mini", max_tokens_param=20)
1504
  # Split and strip engines; if result is empty (or all empty strings), default to ["google"]
1505
  selected_engines = [e.strip() for e in identified_engines.split(",") if e.strip()]
1506
  if not selected_engines:
 
1525
  prompt = (
1526
  f"Context:\nSearch Topic: {query}\nExisting Clarification Q&A: {existing_qa}\nExisting Report: {existing_report}\n"
1527
  f"Existing Process Log: {existing_log}\nCrumbs: {crumbs}\n\n"
1528
+ "Based on the above, generate FIVE new clarification questions, preferably open questions like 'What objectives would you like to reach with this search?' or 'Are there specific aspects you would like to search in particular such as xxx, yyy, ...?' "
1529
  "Each question should be followed by 'Response:' then 2 empty lines."
1530
  )
1531
  new_questions = openai_call(prompt, model="gpt-4o", max_tokens_param=300)
 
1862
  f"And the initial query: '{initial_query}',\n\n"
1863
  "please propose a disruptive, experimental, and ambitious new research hypothesis that goes beyond conventional boundaries (Overton-window) related to the topic. "
1864
  "Formulate this as a new research query that could lead to innovative insights.")
1865
+ disruptive_query = openai_call(new_prompt, model="o3-mini", max_tokens_param=500)
1866
  logging.info(f"generate_surprise_report: Disruptive new query generated: {disruptive_query}")
1867
 
1868
  # Generate tailored clarification questions for the disruptive query
 
2092
  for i in range(0, len(crumbs_list), batch_size):
2093
  batch = crumbs_list[i:i+batch_size]
2094
  # Build a prompt describing each crumb
2095
+ prompt = """
2096
+ We have a set of crumbs. For each crumb, decide if it significantly adds new facts, figures, references, or quotes to the topic being investigated.Don't be too selective, we still need to keep between 10 and 15 in each batch.
2097
+ Mark 'yes' if it is valuable for the final report, otherwise 'no'. Output JSON.
2098
+ """
2099
  listing = []
2100
  for idx, c in enumerate(batch):
2101
  snippet_for_prompt = c["summary"][:500] # short snippet
 
2123
  return accepted
2124
 
2125
  def assess_report_alignment(report: str, initial_query: str, clarifications: str) -> str:
2126
+ prompt = f"""
2127
+ Please assess the following research report in terms of its alignment with the initial user request and the clarification Q&A provided.
2128
+ Ensure that the report covers key points of the topic.
2129
+
2130
+ Initial Query:
2131
+ {initial_query}
2132
+
2133
+ Clarifications:
2134
+ {clarifications}
2135
+
2136
+ Research Report:
2137
+ {report}
2138
+
2139
+ Provide a short assessment in one paragraph (between 2 html div tags) on how well the report aligns with these requirements, you can put in html tags for bold or italic.
2140
+ The output will be integrated in a html page for rendering.
2141
+ """
2142
+ assessment = openai_call(prompt, model="o3-mini", max_tokens_param=400)
2143
  logging.info(f"assess_report_alignment: Assessment result: {assessment}")
2144
  return assessment
2145