Spaces:

10gen
/

deepsearchitv2

Runtime error

App Files Files Community

Guiyom commited on Feb 23, 2025

Commit

48496cb

verified ·

1 Parent(s): 6dbee56

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -36

app.py CHANGED Viewed

@@ -1422,24 +1422,17 @@ Output only the translated query."""
     logging.info(f"make_multilingual_query: Transformed query: {finalquery}")
     return finalquery
-def generate_query_tree(context: str, breadth: int, depth: int) -> list:
-    """
-    Generates a list of queries relevant to the provided context.
-    Args:
-        context: The context string for which to generate queries.
-        breadth: The maximum number of queries to generate.
-        depth:  This parameter is not used in the current implementation,
-                but it is kept for potential future extensions of this function.
-    Returns:
-        A list of query strings.
-    """
     prompt = f"""
-    Generate a list of {breadth} search queries relevant to the following context:
-    "{context}"
 // Requirements
 - The queries should be suitable for a search engine.
@@ -1449,22 +1442,24 @@ def generate_query_tree(context: str, breadth: int, depth: int) -> list:
 // IMPORTANT:
 - Return only valid JSON without any markdown code fences (```) or mention of json
 // EXAMPLE (if breadth = 4):
     {{
       "queries": [
-        (Artificial Intelligence OR data science) AND mathematics,
-        (geometry OR algebra) AND research AND machine learning,
-        (calculus OR "differential equations") AND "AI applications",
-        "Statistics" AND "data analysis" AND "machine learning algorithms"
       ]
     }}
 Now generate the result.
     """
     messages = []  # Use prompt directly in openai_call, messages is handled there.
-    llm_response = openai_call(prompt=prompt, messages=messages)
     cleaned_response = llm_response.strip()
     if cleaned_response.startswith("```"):
         cleaned_response = cleaned_response.strip("`").strip()
@@ -1481,7 +1476,7 @@ Now generate the result.
 def generate_serp_queries(context: str, breadth: int, depth: int, initial_query: str,
                           selected_engines=None, results_per_query: int = 10) -> list:
-    queries = generate_query_tree(context, breadth, depth)
     prompt = f"""The research topic is: "{initial_query}".
 Based on this query and the overall context: "{context}", suggest one or several languages (other than English) that might be relevant.
@@ -1505,7 +1500,7 @@ google,google_scholar,google_trends,google_news,google_ai_overview,bing,bing_new
 Which are most relevant? Output a comma separated list (e.g., "google,baidu").
 If none are found, output "google".
 """
-        identified_engines = openai_call(prompt_engines, model="gpt-4o-mini", max_tokens_param=20)
         # Split and strip engines; if result is empty (or all empty strings), default to ["google"]
         selected_engines = [e.strip() for e in identified_engines.split(",") if e.strip()]
         if not selected_engines:
@@ -1530,7 +1525,7 @@ def generate_tailored_questions(openai_api_key: str, query: str, existing_qa: st
     prompt = (
         f"Context:\nSearch Topic: {query}\nExisting Clarification Q&A: {existing_qa}\nExisting Report: {existing_report}\n"
         f"Existing Process Log: {existing_log}\nCrumbs: {crumbs}\n\n"
-        "Based on the above, generate FIVE new clarification questions, each phrased as either 'Would you like to know more about ...?' or open questions like 'What objectives would you like to reach with this search?' or 'Are there specific aspects you would like to search in particular such as xxx, yyy, ... (all related to the topic)?' "
         "Each question should be followed by 'Response:' then 2 empty lines."
     )
     new_questions = openai_call(prompt, model="gpt-4o", max_tokens_param=300)
@@ -1867,7 +1862,7 @@ def generate_surprise_report(previous_report: str, crumbs_list: list, initial_qu
                   f"And the initial query: '{initial_query}',\n\n"
                   "please propose a disruptive, experimental, and ambitious new research hypothesis that goes beyond conventional boundaries (Overton-window) related to the topic. "
                   "Formulate this as a new research query that could lead to innovative insights.")
-    disruptive_query = openai_call(new_prompt, model="gpt-4o-mini", max_tokens_param=500)
     logging.info(f"generate_surprise_report: Disruptive new query generated: {disruptive_query}")
     # Generate tailored clarification questions for the disruptive query
@@ -2097,8 +2092,10 @@ def filter_crumbs_in_batches(crumbs_list: list, initial_query: str, clarificatio
     for i in range(0, len(crumbs_list), batch_size):
         batch = crumbs_list[i:i+batch_size]
         # Build a prompt describing each crumb
-        prompt = "We have a set of crumbs. For each crumb, decide if it significantly adds new facts, figures, references, or quotes. Don't be too selective, we still need to keep between 10 and 15 in each batch.\n"
-        prompt += "Mark 'yes' if it is valuable for the final report, otherwise 'no'. Output JSON.\n\n"
         listing = []
         for idx, c in enumerate(batch):
             snippet_for_prompt = c["summary"][:500]  # short snippet
@@ -2126,15 +2123,23 @@ Return a JSON object with structure:
     return accepted
 def assess_report_alignment(report: str, initial_query: str, clarifications: str) -> str:
-    prompt = (
-        "Please assess the following research report in terms of its alignment with the initial user request "
-        "and the clarification Q&A provided. Ensure that the report covers key points of the topic.\n\n"
-        "Initial Query: " + initial_query + "\nClarifications: " + clarifications + "\n\n"
-        "Research Report:\n" + report + "\n\n"
-        "Provide a short assessment in one paragraph on how well the report aligns with these requirements, you can put in html tags for bold or italic."
-        "The output will be integrated in a html page for rendering"
-    )
-    assessment = openai_call(prompt, model="gpt-4o", max_tokens_param=400)
     logging.info(f"assess_report_alignment: Assessment result: {assessment}")
     return assessment

     logging.info(f"make_multilingual_query: Transformed query: {finalquery}")
     return finalquery
+def generate_query_tree(context: str, breadth: int, depth: int, selected_engines: list) -> list:
+    if selected_engines == None:
+        list_engines = "all relevant search engines"
+    else:
+        list_engines = ','.join(map(str, selected_engines))
     prompt = f"""
+Generate a list of {breadth} search queries relevant to the following context:
+{context}
+Thet list of engines selected is the following:
+{list_engines}
 // Requirements
 - The queries should be suitable for a search engine.
 // IMPORTANT:
 - Return only valid JSON without any markdown code fences (```) or mention of json
+- Think tactically on what would be the best query to get the most relevant search results for the query
 // EXAMPLE (if breadth = 4):
     {{
       "queries": [
+        '(Artificial Intelligence OR data science) AND mathematics',
+        '(geometry OR algebra) AND research AND machine learning',
+        '(calculus OR "differential equations") AND "Artificial intelligence"',
+        'filetype:pdf "artificial intelligence" OR "machine learning"',
+        'Statistics AND "data analysis" AND "machine learning"'
       ]
     }}
 Now generate the result.
     """
     messages = []  # Use prompt directly in openai_call, messages is handled there.
+    llm_response = openai_call(prompt=prompt, model="o3-mini", messages=messages)
     cleaned_response = llm_response.strip()
     if cleaned_response.startswith("```"):
         cleaned_response = cleaned_response.strip("`").strip()
 def generate_serp_queries(context: str, breadth: int, depth: int, initial_query: str,
                           selected_engines=None, results_per_query: int = 10) -> list:
+    queries = generate_query_tree(context, breadth, depth, selected_engines)
     prompt = f"""The research topic is: "{initial_query}".
 Based on this query and the overall context: "{context}", suggest one or several languages (other than English) that might be relevant.
 Which are most relevant? Output a comma separated list (e.g., "google,baidu").
 If none are found, output "google".
 """
+        identified_engines = openai_call(prompt_engines, model="o3-mini", max_tokens_param=20)
         # Split and strip engines; if result is empty (or all empty strings), default to ["google"]
         selected_engines = [e.strip() for e in identified_engines.split(",") if e.strip()]
         if not selected_engines:
     prompt = (
         f"Context:\nSearch Topic: {query}\nExisting Clarification Q&A: {existing_qa}\nExisting Report: {existing_report}\n"
         f"Existing Process Log: {existing_log}\nCrumbs: {crumbs}\n\n"
+        "Based on the above, generate FIVE new clarification questions, preferably open questions like 'What objectives would you like to reach with this search?' or 'Are there specific aspects you would like to search in particular such as xxx, yyy, ...?' "
         "Each question should be followed by 'Response:' then 2 empty lines."
     )
     new_questions = openai_call(prompt, model="gpt-4o", max_tokens_param=300)
                   f"And the initial query: '{initial_query}',\n\n"
                   "please propose a disruptive, experimental, and ambitious new research hypothesis that goes beyond conventional boundaries (Overton-window) related to the topic. "
                   "Formulate this as a new research query that could lead to innovative insights.")
+    disruptive_query = openai_call(new_prompt, model="o3-mini", max_tokens_param=500)
     logging.info(f"generate_surprise_report: Disruptive new query generated: {disruptive_query}")
     # Generate tailored clarification questions for the disruptive query
     for i in range(0, len(crumbs_list), batch_size):
         batch = crumbs_list[i:i+batch_size]
         # Build a prompt describing each crumb
+        prompt = """
+We have a set of crumbs. For each crumb, decide if it significantly adds new facts, figures, references, or quotes to the topic being investigated.Don't be too selective, we still need to keep between 10 and 15 in each batch.
+Mark 'yes' if it is valuable for the final report, otherwise 'no'. Output JSON.
+"""
         listing = []
         for idx, c in enumerate(batch):
             snippet_for_prompt = c["summary"][:500]  # short snippet
     return accepted
 def assess_report_alignment(report: str, initial_query: str, clarifications: str) -> str:
+    prompt = f"""
+Please assess the following research report in terms of its alignment with the initial user request and the clarification Q&A provided.
+Ensure that the report covers key points of the topic.
+Initial Query:
+{initial_query}
+Clarifications:
+{clarifications}
+Research Report:
+{report}
+Provide a short assessment in one paragraph (between 2 html div tags) on how well the report aligns with these requirements, you can put in html tags for bold or italic.
+The output will be integrated in a html page for rendering.
+"""
+    assessment = openai_call(prompt, model="o3-mini", max_tokens_param=400)
     logging.info(f"assess_report_alignment: Assessment result: {assessment}")
     return assessment