Guiyom commited on
Commit
60c0305
·
verified ·
1 Parent(s): c75cae4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -27
app.py CHANGED
@@ -232,7 +232,24 @@ Note: General Optimization Guidelines:
232
  4. Ensure that the summary length and level of detail is proportional to the source length.
233
  Source length: {snippet_words} words. You may produce a more detailed summary if the text is long.
234
 
235
- IMPORTANT: Format your response as a proper JSON object with these fields:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  - "relevant": "yes" or "no"
237
  - url: full url
238
  - title: title
@@ -2439,33 +2456,43 @@ def generate_query_tree(context: str, breadth: int, depth: int, selected_engines
2439
  # If selected_engines is None, provide a fallback string
2440
  list_engines = "all relevant search engines" if selected_engines is None else ','.join(map(str, selected_engines))
2441
 
2442
- prompt = f"""
2443
- Generate a list of {breadth} search queries relevant to the following context:
2444
-
2445
- "{context}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2446
 
2447
- // Requirements
2448
- - The queries should be suitable for a search engine.
2449
- - Each query should combine terms using logical operators (AND, OR) where appropriate.
2450
- - Do not include explanations or introductory phrases,
2451
- - Just output a JSON object containing a list of strings named 'queries'.
2452
-
2453
- // IMPORTANT:
2454
- - Return only valid JSON without any markdown code fences (```) or mention of json
2455
-
2456
- // EXAMPLE (if breadth = 4):
2457
- {{
2458
- "queries": [
2459
- (Artificial Intelligence OR data science) AND mathematics,
2460
- (geometry OR algebra) AND research AND machine learning,
2461
- (calculus OR "differential equations") AND "AI applications",
2462
- "Statistics" AND "data analysis" AND "machine learning algorithms"
2463
- ]
2464
- }}
2465
-
2466
- Do not include any extra text, markdown formatting, or commentary. Output the JSON starting from "{{" and ending with "}}".
2467
- Now generate the result.
2468
- """
2469
  messages = []
2470
  llm_response = llm_call(prompt=prompt, messages=messages, model="o3-mini", temperature=0, max_tokens_param=1500)
2471
  logging.info(f"Generated query tree: {llm_response}")
 
232
  4. Ensure that the summary length and level of detail is proportional to the source length.
233
  Source length: {snippet_words} words. You may produce a more detailed summary if the text is long.
234
 
235
+ // Special guidance for follow-up search queries
236
+ 1. Query Progression:
237
+ - Begin with foundational/conceptual queries
238
+ - Progress to methodological/technical terms
239
+ - Culminate in specialized/applied combinations
240
+
241
+ 2. Term Optimization:
242
+ - Use Boolean logic (AND/OR) strategically
243
+ - Include both general terminology AND domain-specific jargon
244
+ - Add temporal filters when relevant (e.g., "since 2018", "2020-2023")
245
+ - Consider geographical/cultural modifiers if applicable
246
+
247
+ 3. Query Structure:
248
+ - Prioritize conceptual combinations over simple keyword matching
249
+ - Use quotation marks for exact phrases and hyphenation for compound terms
250
+ - Include emerging terminology variants (e.g., "LLMs" OR "large language models")
251
+
252
+ // IMPORTANT: Format your response as a proper JSON object with these fields:
253
  - "relevant": "yes" or "no"
254
  - url: full url
255
  - title: title
 
2456
  # If selected_engines is None, provide a fallback string
2457
  list_engines = "all relevant search engines" if selected_engines is None else ','.join(map(str, selected_engines))
2458
 
2459
+ prompt = f"""
2460
+ Generate {breadth} search queries for academic research exploring: "{context}"
2461
+
2462
+ // Research Strategy Requirements
2463
+ 1. Query Progression:
2464
+ - Begin with foundational/conceptual queries
2465
+ - Progress to methodological/technical terms
2466
+ - Culminate in specialized/applied combinations
2467
+
2468
+ 2. Term Optimization:
2469
+ - Use Boolean logic (AND/OR) strategically
2470
+ - Include both general terminology AND domain-specific jargon
2471
+ - Add temporal filters when relevant (e.g., "since 2018", "2020-2023")
2472
+ - Consider geographical/cultural modifiers if applicable
2473
+
2474
+ 3. Query Structure:
2475
+ - Prioritize conceptual combinations over simple keyword matching
2476
+ - Use quotation marks for exact phrases and hyphenation for compound terms
2477
+ - Include emerging terminology variants (e.g., "LLMs" OR "large language models")
2478
+
2479
+ // Output Requirements
2480
+ - Strictly valid JSON format: {{"queries": ["query1", "query2"]}}
2481
+ - No Markdown, code fences, or supplementary text
2482
+ - Clean string formatting without special characters
2483
+
2484
+ // Example (breadth=4):
2485
+ {{
2486
+ "queries": [
2487
+ "Fundamental theories AND (Artificial Intelligence OR machine learning)",
2488
+ "(Computational mathematics OR statistical modeling) AND research paradigms",
2489
+ "\"Deep learning architectures\" AND (optimization techniques OR neural networks)",
2490
+ "\"Generative AI\" AND industrial applications AND (2020-2024 OR recent developments)"
2491
+ ]
2492
+ }}
2493
+
2494
+ Generate queries that systematically explore the research landscape from multiple conceptual angles."""
2495
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2496
  messages = []
2497
  llm_response = llm_call(prompt=prompt, messages=messages, model="o3-mini", temperature=0, max_tokens_param=1500)
2498
  logging.info(f"Generated query tree: {llm_response}")