Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1422,24 +1422,17 @@ Output only the translated query."""
|
|
| 1422 |
logging.info(f"make_multilingual_query: Transformed query: {finalquery}")
|
| 1423 |
return finalquery
|
| 1424 |
|
| 1425 |
-
def generate_query_tree(context: str, breadth: int, depth: int) -> list:
|
| 1426 |
-
|
| 1427 |
-
|
| 1428 |
-
|
| 1429 |
-
|
| 1430 |
-
context: The context string for which to generate queries.
|
| 1431 |
-
breadth: The maximum number of queries to generate.
|
| 1432 |
-
depth: This parameter is not used in the current implementation,
|
| 1433 |
-
but it is kept for potential future extensions of this function.
|
| 1434 |
-
|
| 1435 |
-
Returns:
|
| 1436 |
-
A list of query strings.
|
| 1437 |
-
"""
|
| 1438 |
-
|
| 1439 |
prompt = f"""
|
| 1440 |
-
|
|
|
|
| 1441 |
|
| 1442 |
-
|
|
|
|
| 1443 |
|
| 1444 |
// Requirements
|
| 1445 |
- The queries should be suitable for a search engine.
|
|
@@ -1449,22 +1442,24 @@ def generate_query_tree(context: str, breadth: int, depth: int) -> list:
|
|
| 1449 |
|
| 1450 |
// IMPORTANT:
|
| 1451 |
- Return only valid JSON without any markdown code fences (```) or mention of json
|
|
|
|
| 1452 |
|
| 1453 |
// EXAMPLE (if breadth = 4):
|
| 1454 |
|
| 1455 |
{{
|
| 1456 |
"queries": [
|
| 1457 |
-
(Artificial Intelligence OR data science) AND mathematics,
|
| 1458 |
-
(geometry OR algebra) AND research AND machine learning,
|
| 1459 |
-
(calculus OR "differential equations") AND "
|
| 1460 |
-
|
|
|
|
| 1461 |
]
|
| 1462 |
}}
|
| 1463 |
|
| 1464 |
Now generate the result.
|
| 1465 |
"""
|
| 1466 |
messages = [] # Use prompt directly in openai_call, messages is handled there.
|
| 1467 |
-
llm_response = openai_call(prompt=prompt, messages=messages)
|
| 1468 |
cleaned_response = llm_response.strip()
|
| 1469 |
if cleaned_response.startswith("```"):
|
| 1470 |
cleaned_response = cleaned_response.strip("`").strip()
|
|
@@ -1481,7 +1476,7 @@ Now generate the result.
|
|
| 1481 |
|
| 1482 |
def generate_serp_queries(context: str, breadth: int, depth: int, initial_query: str,
|
| 1483 |
selected_engines=None, results_per_query: int = 10) -> list:
|
| 1484 |
-
queries = generate_query_tree(context, breadth, depth)
|
| 1485 |
prompt = f"""The research topic is: "{initial_query}".
|
| 1486 |
Based on this query and the overall context: "{context}", suggest one or several languages (other than English) that might be relevant.
|
| 1487 |
|
|
@@ -1505,7 +1500,7 @@ google,google_scholar,google_trends,google_news,google_ai_overview,bing,bing_new
|
|
| 1505 |
Which are most relevant? Output a comma separated list (e.g., "google,baidu").
|
| 1506 |
If none are found, output "google".
|
| 1507 |
"""
|
| 1508 |
-
identified_engines = openai_call(prompt_engines, model="
|
| 1509 |
# Split and strip engines; if result is empty (or all empty strings), default to ["google"]
|
| 1510 |
selected_engines = [e.strip() for e in identified_engines.split(",") if e.strip()]
|
| 1511 |
if not selected_engines:
|
|
@@ -1530,7 +1525,7 @@ def generate_tailored_questions(openai_api_key: str, query: str, existing_qa: st
|
|
| 1530 |
prompt = (
|
| 1531 |
f"Context:\nSearch Topic: {query}\nExisting Clarification Q&A: {existing_qa}\nExisting Report: {existing_report}\n"
|
| 1532 |
f"Existing Process Log: {existing_log}\nCrumbs: {crumbs}\n\n"
|
| 1533 |
-
"Based on the above, generate FIVE new clarification questions,
|
| 1534 |
"Each question should be followed by 'Response:' then 2 empty lines."
|
| 1535 |
)
|
| 1536 |
new_questions = openai_call(prompt, model="gpt-4o", max_tokens_param=300)
|
|
@@ -1867,7 +1862,7 @@ def generate_surprise_report(previous_report: str, crumbs_list: list, initial_qu
|
|
| 1867 |
f"And the initial query: '{initial_query}',\n\n"
|
| 1868 |
"please propose a disruptive, experimental, and ambitious new research hypothesis that goes beyond conventional boundaries (Overton-window) related to the topic. "
|
| 1869 |
"Formulate this as a new research query that could lead to innovative insights.")
|
| 1870 |
-
disruptive_query = openai_call(new_prompt, model="
|
| 1871 |
logging.info(f"generate_surprise_report: Disruptive new query generated: {disruptive_query}")
|
| 1872 |
|
| 1873 |
# Generate tailored clarification questions for the disruptive query
|
|
@@ -2097,8 +2092,10 @@ def filter_crumbs_in_batches(crumbs_list: list, initial_query: str, clarificatio
|
|
| 2097 |
for i in range(0, len(crumbs_list), batch_size):
|
| 2098 |
batch = crumbs_list[i:i+batch_size]
|
| 2099 |
# Build a prompt describing each crumb
|
| 2100 |
-
prompt = "
|
| 2101 |
-
|
|
|
|
|
|
|
| 2102 |
listing = []
|
| 2103 |
for idx, c in enumerate(batch):
|
| 2104 |
snippet_for_prompt = c["summary"][:500] # short snippet
|
|
@@ -2126,15 +2123,23 @@ Return a JSON object with structure:
|
|
| 2126 |
return accepted
|
| 2127 |
|
| 2128 |
def assess_report_alignment(report: str, initial_query: str, clarifications: str) -> str:
|
| 2129 |
-
prompt =
|
| 2130 |
-
|
| 2131 |
-
|
| 2132 |
-
|
| 2133 |
-
|
| 2134 |
-
|
| 2135 |
-
|
| 2136 |
-
|
| 2137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2138 |
logging.info(f"assess_report_alignment: Assessment result: {assessment}")
|
| 2139 |
return assessment
|
| 2140 |
|
|
|
|
| 1422 |
logging.info(f"make_multilingual_query: Transformed query: {finalquery}")
|
| 1423 |
return finalquery
|
| 1424 |
|
| 1425 |
+
def generate_query_tree(context: str, breadth: int, depth: int, selected_engines: list) -> list:
|
| 1426 |
+
if selected_engines == None:
|
| 1427 |
+
list_engines = "all relevant search engines"
|
| 1428 |
+
else:
|
| 1429 |
+
list_engines = ','.join(map(str, selected_engines))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1430 |
prompt = f"""
|
| 1431 |
+
Generate a list of {breadth} search queries relevant to the following context:
|
| 1432 |
+
{context}
|
| 1433 |
|
| 1434 |
+
Thet list of engines selected is the following:
|
| 1435 |
+
{list_engines}
|
| 1436 |
|
| 1437 |
// Requirements
|
| 1438 |
- The queries should be suitable for a search engine.
|
|
|
|
| 1442 |
|
| 1443 |
// IMPORTANT:
|
| 1444 |
- Return only valid JSON without any markdown code fences (```) or mention of json
|
| 1445 |
+
- Think tactically on what would be the best query to get the most relevant search results for the query
|
| 1446 |
|
| 1447 |
// EXAMPLE (if breadth = 4):
|
| 1448 |
|
| 1449 |
{{
|
| 1450 |
"queries": [
|
| 1451 |
+
'(Artificial Intelligence OR data science) AND mathematics',
|
| 1452 |
+
'(geometry OR algebra) AND research AND machine learning',
|
| 1453 |
+
'(calculus OR "differential equations") AND "Artificial intelligence"',
|
| 1454 |
+
'filetype:pdf "artificial intelligence" OR "machine learning"',
|
| 1455 |
+
'Statistics AND "data analysis" AND "machine learning"'
|
| 1456 |
]
|
| 1457 |
}}
|
| 1458 |
|
| 1459 |
Now generate the result.
|
| 1460 |
"""
|
| 1461 |
messages = [] # Use prompt directly in openai_call, messages is handled there.
|
| 1462 |
+
llm_response = openai_call(prompt=prompt, model="o3-mini", messages=messages)
|
| 1463 |
cleaned_response = llm_response.strip()
|
| 1464 |
if cleaned_response.startswith("```"):
|
| 1465 |
cleaned_response = cleaned_response.strip("`").strip()
|
|
|
|
| 1476 |
|
| 1477 |
def generate_serp_queries(context: str, breadth: int, depth: int, initial_query: str,
|
| 1478 |
selected_engines=None, results_per_query: int = 10) -> list:
|
| 1479 |
+
queries = generate_query_tree(context, breadth, depth, selected_engines)
|
| 1480 |
prompt = f"""The research topic is: "{initial_query}".
|
| 1481 |
Based on this query and the overall context: "{context}", suggest one or several languages (other than English) that might be relevant.
|
| 1482 |
|
|
|
|
| 1500 |
Which are most relevant? Output a comma separated list (e.g., "google,baidu").
|
| 1501 |
If none are found, output "google".
|
| 1502 |
"""
|
| 1503 |
+
identified_engines = openai_call(prompt_engines, model="o3-mini", max_tokens_param=20)
|
| 1504 |
# Split and strip engines; if result is empty (or all empty strings), default to ["google"]
|
| 1505 |
selected_engines = [e.strip() for e in identified_engines.split(",") if e.strip()]
|
| 1506 |
if not selected_engines:
|
|
|
|
| 1525 |
prompt = (
|
| 1526 |
f"Context:\nSearch Topic: {query}\nExisting Clarification Q&A: {existing_qa}\nExisting Report: {existing_report}\n"
|
| 1527 |
f"Existing Process Log: {existing_log}\nCrumbs: {crumbs}\n\n"
|
| 1528 |
+
"Based on the above, generate FIVE new clarification questions, preferably open questions like 'What objectives would you like to reach with this search?' or 'Are there specific aspects you would like to search in particular such as xxx, yyy, ...?' "
|
| 1529 |
"Each question should be followed by 'Response:' then 2 empty lines."
|
| 1530 |
)
|
| 1531 |
new_questions = openai_call(prompt, model="gpt-4o", max_tokens_param=300)
|
|
|
|
| 1862 |
f"And the initial query: '{initial_query}',\n\n"
|
| 1863 |
"please propose a disruptive, experimental, and ambitious new research hypothesis that goes beyond conventional boundaries (Overton-window) related to the topic. "
|
| 1864 |
"Formulate this as a new research query that could lead to innovative insights.")
|
| 1865 |
+
disruptive_query = openai_call(new_prompt, model="o3-mini", max_tokens_param=500)
|
| 1866 |
logging.info(f"generate_surprise_report: Disruptive new query generated: {disruptive_query}")
|
| 1867 |
|
| 1868 |
# Generate tailored clarification questions for the disruptive query
|
|
|
|
| 2092 |
for i in range(0, len(crumbs_list), batch_size):
|
| 2093 |
batch = crumbs_list[i:i+batch_size]
|
| 2094 |
# Build a prompt describing each crumb
|
| 2095 |
+
prompt = """
|
| 2096 |
+
We have a set of crumbs. For each crumb, decide if it significantly adds new facts, figures, references, or quotes to the topic being investigated.Don't be too selective, we still need to keep between 10 and 15 in each batch.
|
| 2097 |
+
Mark 'yes' if it is valuable for the final report, otherwise 'no'. Output JSON.
|
| 2098 |
+
"""
|
| 2099 |
listing = []
|
| 2100 |
for idx, c in enumerate(batch):
|
| 2101 |
snippet_for_prompt = c["summary"][:500] # short snippet
|
|
|
|
| 2123 |
return accepted
|
| 2124 |
|
| 2125 |
def assess_report_alignment(report: str, initial_query: str, clarifications: str) -> str:
|
| 2126 |
+
prompt = f"""
|
| 2127 |
+
Please assess the following research report in terms of its alignment with the initial user request and the clarification Q&A provided.
|
| 2128 |
+
Ensure that the report covers key points of the topic.
|
| 2129 |
+
|
| 2130 |
+
Initial Query:
|
| 2131 |
+
{initial_query}
|
| 2132 |
+
|
| 2133 |
+
Clarifications:
|
| 2134 |
+
{clarifications}
|
| 2135 |
+
|
| 2136 |
+
Research Report:
|
| 2137 |
+
{report}
|
| 2138 |
+
|
| 2139 |
+
Provide a short assessment in one paragraph (between 2 html div tags) on how well the report aligns with these requirements, you can put in html tags for bold or italic.
|
| 2140 |
+
The output will be integrated in a html page for rendering.
|
| 2141 |
+
"""
|
| 2142 |
+
assessment = openai_call(prompt, model="o3-mini", max_tokens_param=400)
|
| 2143 |
logging.info(f"assess_report_alignment: Assessment result: {assessment}")
|
| 2144 |
return assessment
|
| 2145 |
|