Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -232,7 +232,24 @@ Note: General Optimization Guidelines:
|
|
| 232 |
4. Ensure that the summary length and level of detail is proportional to the source length.
|
| 233 |
Source length: {snippet_words} words. You may produce a more detailed summary if the text is long.
|
| 234 |
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
- "relevant": "yes" or "no"
|
| 237 |
- url: full url
|
| 238 |
- title: title
|
|
@@ -2439,33 +2456,43 @@ def generate_query_tree(context: str, breadth: int, depth: int, selected_engines
|
|
| 2439 |
# If selected_engines is None, provide a fallback string
|
| 2440 |
list_engines = "all relevant search engines" if selected_engines is None else ','.join(map(str, selected_engines))
|
| 2441 |
|
| 2442 |
-
|
| 2443 |
-
Generate
|
| 2444 |
-
|
| 2445 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2446 |
|
| 2447 |
-
// Requirements
|
| 2448 |
-
- The queries should be suitable for a search engine.
|
| 2449 |
-
- Each query should combine terms using logical operators (AND, OR) where appropriate.
|
| 2450 |
-
- Do not include explanations or introductory phrases,
|
| 2451 |
-
- Just output a JSON object containing a list of strings named 'queries'.
|
| 2452 |
-
|
| 2453 |
-
// IMPORTANT:
|
| 2454 |
-
- Return only valid JSON without any markdown code fences (```) or mention of json
|
| 2455 |
-
|
| 2456 |
-
// EXAMPLE (if breadth = 4):
|
| 2457 |
-
{{
|
| 2458 |
-
"queries": [
|
| 2459 |
-
(Artificial Intelligence OR data science) AND mathematics,
|
| 2460 |
-
(geometry OR algebra) AND research AND machine learning,
|
| 2461 |
-
(calculus OR "differential equations") AND "AI applications",
|
| 2462 |
-
"Statistics" AND "data analysis" AND "machine learning algorithms"
|
| 2463 |
-
]
|
| 2464 |
-
}}
|
| 2465 |
-
|
| 2466 |
-
Do not include any extra text, markdown formatting, or commentary. Output the JSON starting from "{{" and ending with "}}".
|
| 2467 |
-
Now generate the result.
|
| 2468 |
-
"""
|
| 2469 |
messages = []
|
| 2470 |
llm_response = llm_call(prompt=prompt, messages=messages, model="o3-mini", temperature=0, max_tokens_param=1500)
|
| 2471 |
logging.info(f"Generated query tree: {llm_response}")
|
|
|
|
| 232 |
4. Ensure that the summary length and level of detail is proportional to the source length.
|
| 233 |
Source length: {snippet_words} words. You may produce a more detailed summary if the text is long.
|
| 234 |
|
| 235 |
+
// Special guidance for follow-up search queries
|
| 236 |
+
1. Query Progression:
|
| 237 |
+
- Begin with foundational/conceptual queries
|
| 238 |
+
- Progress to methodological/technical terms
|
| 239 |
+
- Culminate in specialized/applied combinations
|
| 240 |
+
|
| 241 |
+
2. Term Optimization:
|
| 242 |
+
- Use Boolean logic (AND/OR) strategically
|
| 243 |
+
- Include both general terminology AND domain-specific jargon
|
| 244 |
+
- Add temporal filters when relevant (e.g., "since 2018", "2020-2023")
|
| 245 |
+
- Consider geographical/cultural modifiers if applicable
|
| 246 |
+
|
| 247 |
+
3. Query Structure:
|
| 248 |
+
- Prioritize conceptual combinations over simple keyword matching
|
| 249 |
+
- Use quotation marks for exact phrases and hyphenation for compound terms
|
| 250 |
+
- Include emerging terminology variants (e.g., "LLMs" OR "large language models")
|
| 251 |
+
|
| 252 |
+
// IMPORTANT: Format your response as a proper JSON object with these fields:
|
| 253 |
- "relevant": "yes" or "no"
|
| 254 |
- url: full url
|
| 255 |
- title: title
|
|
|
|
| 2456 |
# If selected_engines is None, provide a fallback string
|
| 2457 |
list_engines = "all relevant search engines" if selected_engines is None else ','.join(map(str, selected_engines))
|
| 2458 |
|
| 2459 |
+
prompt = f"""
|
| 2460 |
+
Generate {breadth} search queries for academic research exploring: "{context}"
|
| 2461 |
+
|
| 2462 |
+
// Research Strategy Requirements
|
| 2463 |
+
1. Query Progression:
|
| 2464 |
+
- Begin with foundational/conceptual queries
|
| 2465 |
+
- Progress to methodological/technical terms
|
| 2466 |
+
- Culminate in specialized/applied combinations
|
| 2467 |
+
|
| 2468 |
+
2. Term Optimization:
|
| 2469 |
+
- Use Boolean logic (AND/OR) strategically
|
| 2470 |
+
- Include both general terminology AND domain-specific jargon
|
| 2471 |
+
- Add temporal filters when relevant (e.g., "since 2018", "2020-2023")
|
| 2472 |
+
- Consider geographical/cultural modifiers if applicable
|
| 2473 |
+
|
| 2474 |
+
3. Query Structure:
|
| 2475 |
+
- Prioritize conceptual combinations over simple keyword matching
|
| 2476 |
+
- Use quotation marks for exact phrases and hyphenation for compound terms
|
| 2477 |
+
- Include emerging terminology variants (e.g., "LLMs" OR "large language models")
|
| 2478 |
+
|
| 2479 |
+
// Output Requirements
|
| 2480 |
+
- Strictly valid JSON format: {{"queries": ["query1", "query2"]}}
|
| 2481 |
+
- No Markdown, code fences, or supplementary text
|
| 2482 |
+
- Clean string formatting without special characters
|
| 2483 |
+
|
| 2484 |
+
// Example (breadth=4):
|
| 2485 |
+
{{
|
| 2486 |
+
"queries": [
|
| 2487 |
+
"Fundamental theories AND (Artificial Intelligence OR machine learning)",
|
| 2488 |
+
"(Computational mathematics OR statistical modeling) AND research paradigms",
|
| 2489 |
+
"\"Deep learning architectures\" AND (optimization techniques OR neural networks)",
|
| 2490 |
+
"\"Generative AI\" AND industrial applications AND (2020-2024 OR recent developments)"
|
| 2491 |
+
]
|
| 2492 |
+
}}
|
| 2493 |
+
|
| 2494 |
+
Generate queries that systematically explore the research landscape from multiple conceptual angles."""
|
| 2495 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2496 |
messages = []
|
| 2497 |
llm_response = llm_call(prompt=prompt, messages=messages, model="o3-mini", temperature=0, max_tokens_param=1500)
|
| 2498 |
logging.info(f"Generated query tree: {llm_response}")
|