derkaal commited on
Commit
216d3ae
·
verified ·
1 Parent(s): 3bfcda8

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. gaiaX/question_handlers.py +38 -228
  2. requirements.txt +1 -11
gaiaX/question_handlers.py CHANGED
@@ -27,48 +27,29 @@ def detect_question_type(question_text: str) -> str:
27
  # Convert to lowercase for case-insensitive matching
28
  text = question_text.lower()
29
 
30
- # Check for media content questions (videos, YouTube, audio, etc.)
31
- if any(keyword in text for keyword in ["video", "youtube", "watch", "channel", "podcast",
32
- "stream", "streaming", "media", "transcript",
33
- "audio", "recording", "listen", "sound", "speech",
34
- "voice", "mp3", "wav", "spoken", "transcribe"]):
35
- return "media_content"
36
-
37
- # Check for current events or real-time information questions
38
- if any(keyword in text for keyword in ["current", "recent", "latest", "news", "today",
39
- "this year", "this month", "this week", "update"]):
40
- return "current_events"
41
-
42
  # Check for mathematical questions
43
- if any(keyword in text for keyword in ["calculate", "compute", "equation", "formula", "derivative",
44
- "integral", "probability", "statistics", "math"]):
45
  return "mathematical"
46
 
47
  # Check for technical implementation questions
48
- if any(keyword in text for keyword in ["implement", "code", "algorithm", "function", "class",
49
- "method", "programming", "pseudocode", "complexity"]):
50
  return "technical"
51
 
52
  # Check for context-based questions
53
- if any(keyword in text for keyword in ["context", "file", "document", "text", "analyze",
54
- "based on", "according to", "refer to"]):
55
  return "context_based"
56
 
57
- # Check for categorization questions
58
- if any(keyword in text for keyword in ["categorize", "classify", "sort", "group", "list of",
59
- "which are", "identify the", "separate", "distinguish between",
60
- "fruits", "vegetables", "animals", "plants", "types of",
61
- "categories of", "examples of", "create a list", "make a list"]):
62
- return "categorization"
63
-
64
  # Check for ethical/societal questions
65
- if any(keyword in text for keyword in ["ethics", "ethical", "society", "impact", "bias",
66
- "fairness", "responsible", "governance"]):
67
  return "ethical"
68
 
69
  # Check for factual knowledge questions
70
- if any(keyword in text for keyword in ["define", "explain", "describe", "what is", "who is",
71
- "when was", "history", "concept"]):
72
  return "factual"
73
 
74
  # Default to general if no specific type is detected
@@ -93,14 +74,36 @@ def handle_factual_question(agent: Any, question: dict, context: str = None) ->
93
  enhanced_question = question.copy()
94
 
95
  question_text = question.get("question", "")
96
- enhanced_text = f"""
97
- [FACTUAL KNOWLEDGE QUESTION]
98
 
99
- {question_text}
 
 
100
 
101
- Please provide a precise, accurate answer based on established facts and knowledge.
102
- Include relevant examples and cite important research or developments when applicable.
103
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  enhanced_question["question"] = enhanced_text
106
 
@@ -271,193 +274,6 @@ def handle_general_question(agent: Any, question: dict, context: str = None) ->
271
  return get_agent_response(agent, enhanced_question)
272
 
273
 
274
- def handle_current_events_question(agent: Any, question: dict, context: str = None) -> str:
275
- """
276
- Handle questions about current events or real-time information.
277
-
278
- Args:
279
- agent: Initialized LangChain agent
280
- question: Dictionary containing question data
281
- context: Optional context text
282
-
283
- Returns:
284
- Agent's response as a string
285
- """
286
- logger.info("Handling current events question")
287
-
288
- # Enhance the question with specific instructions for current events questions
289
- enhanced_question = question.copy()
290
-
291
- question_text = question.get("question", "")
292
- enhanced_text = f"""
293
- [CURRENT EVENTS QUESTION]
294
-
295
- {question_text}
296
-
297
- Please provide an up-to-date answer by:
298
- - Using search tools to find the most recent information
299
- - Citing sources and their publication dates
300
- - Synthesizing information from multiple sources when appropriate
301
- - Clearly distinguishing between facts and opinions
302
- - Indicating any uncertainties or conflicting information
303
-
304
- Make sure to use search tools to verify the most current information before answering.
305
- """
306
-
307
- if context:
308
- enhanced_text += f"\n\nContext:\n{context}"
309
-
310
- enhanced_question["question"] = enhanced_text
311
-
312
- # Get response from the agent
313
- return get_agent_response(agent, enhanced_question)
314
-
315
-
316
- def handle_media_content_question(agent: Any, question: dict, context: str = None) -> str:
317
- """
318
- Handle questions about media content (videos, podcasts, audio files, etc.).
319
-
320
- Args:
321
- agent: Initialized LangChain agent
322
- question: Dictionary containing question data
323
- context: Optional context text
324
-
325
- Returns:
326
- Agent's response as a string
327
- """
328
- logger.info("Handling media content question")
329
-
330
- # Detect if this is an audio-specific question
331
- question_text = question.get("question", "")
332
- is_audio_question = any(keyword in question_text.lower() for keyword in
333
- ["audio", "sound", "listen", "recording", "speech", "voice",
334
- "podcast", "mp3", "wav", "spoken", "transcribe", "recipe audio"])
335
-
336
- # Check if context contains audio file detection message
337
- has_audio_file = False
338
- audio_file_path = None
339
- if context and "Audio file detected" in context:
340
- has_audio_file = True
341
- # Try to extract the file path
342
- import re
343
- path_match = re.search(r"path: (.*?)($|\n)", context)
344
- if path_match:
345
- audio_file_path = path_match.group(1).strip()
346
-
347
- # Enhance the question with specific instructions for media content questions
348
- enhanced_question = question.copy()
349
-
350
- if is_audio_question or has_audio_file:
351
- # Audio-specific instructions
352
- enhanced_text = f"""
353
- [AUDIO CONTENT QUESTION]
354
-
355
- {question_text}
356
-
357
- Please provide a comprehensive answer by:
358
- - Using audio transcription tools if an audio file is provided
359
- - For recipe audio, extracting ingredients and steps using specialized tools
360
- - Analyzing the transcribed content in relation to the question
361
- - Formatting the response according to any specific request in the question
362
- - Providing clear, structured information extracted from the audio
363
-
364
- """
365
-
366
- if audio_file_path:
367
- enhanced_text += f"\nAn audio file has been detected. Use the transcribe_audio tool with the path: {audio_file_path}\n"
368
-
369
- # Check if it's a recipe question
370
- if "recipe" in question_text.lower() or "ingredient" in question_text.lower():
371
- enhanced_text += f"\nThis appears to be a recipe-related question. After transcription, use the extract_ingredients_from_audio tool with the path: {audio_file_path}\n"
372
- else:
373
- # Video/general media instructions
374
- enhanced_text = f"""
375
- [MEDIA CONTENT QUESTION]
376
-
377
- {question_text}
378
-
379
- Please provide a comprehensive answer by:
380
- - Using YouTube search tools to find relevant videos if needed
381
- - Retrieving and analyzing video transcripts when appropriate
382
- - Summarizing key points from the media content
383
- - Connecting the media content to the specific question being asked
384
- - Citing the source, creator, and publication date of the media
385
- - Formatting the response according to any specific request in the question
386
-
387
- Make sure to use YouTube tools to search for and analyze relevant videos before answering.
388
- """
389
-
390
- if context:
391
- enhanced_text += f"\n\nContext:\n{context}"
392
-
393
- enhanced_question["question"] = enhanced_text
394
-
395
- # Get response from the agent
396
- return get_agent_response(agent, enhanced_question)
397
-
398
-
399
- def handle_categorization_question(agent: Any, question: dict, context: str = None) -> str:
400
- """
401
- Handle categorization questions (e.g., classifying items into groups).
402
-
403
- Args:
404
- agent: Initialized LangChain agent
405
- question: Dictionary containing question data
406
- context: Optional context text
407
-
408
- Returns:
409
- Agent's response as a string
410
- """
411
- logger.info("Handling categorization question")
412
-
413
- # Enhance the question with specific instructions for categorization questions
414
- enhanced_question = question.copy()
415
-
416
- question_text = question.get("question", "")
417
- enhanced_text = f"""
418
- [CATEGORIZATION QUESTION]
419
-
420
- {question_text}
421
-
422
- Please provide a careful and accurate categorization by:
423
- - Paying close attention to the specific classification system requested (botanical, culinary, etc.)
424
- - For botanical categorization:
425
- * Fruits develop from the flower of a plant and contain seeds
426
- * Vegetables come from other parts of the plant (leaves, stems, roots, bulbs)
427
- * Some botanical fruits are culinarily considered vegetables (tomatoes, bell peppers, cucumbers, etc.)
428
- * The following items are botanically fruits (develop from flowers and contain seeds):
429
- - Green beans (legume fruits)
430
- - Bell peppers (berry fruits)
431
- - Zucchini (pepo fruits)
432
- - Corn kernels (grain fruits/caryopsis)
433
- - Whole allspice (berry fruits)
434
- - Tomatoes (berry fruits)
435
- - Eggplants (berry fruits)
436
- - Cucumbers (pepo fruits)
437
- - Pumpkins (pepo fruits)
438
- - Avocados (berry fruits)
439
- - Olives (drupe fruits)
440
- - For culinary categorization:
441
- * Sweet or tart items served as dessert or snacks are typically considered fruits
442
- * Items used in savory dishes are typically considered vegetables
443
- * Many culinary vegetables are botanically fruits (tomatoes, eggplants, bell peppers, etc.)
444
- - When in doubt about classification systems, default to the most common usage unless specified otherwise
445
- - Herbs like basil, cilantro, and parsley are considered vegetables in culinary contexts
446
- - Sweet potatoes are root vegetables (true botanical vegetables)
447
- - Broccoli, celery, and lettuce are true botanical vegetables (not fruits)
448
-
449
- Ensure your categorization is complete and accurate according to the specified criteria.
450
- """
451
-
452
- if context:
453
- enhanced_text += f"\n\nContext:\n{context}"
454
-
455
- enhanced_question["question"] = enhanced_text
456
-
457
- # Get response from the agent
458
- return get_agent_response(agent, enhanced_question)
459
-
460
-
461
  def process_question(agent: Any, question: dict, api_base_url: str = API_BASE_URL) -> dict:
462
  """
463
  Process a single question using the appropriate handler.
@@ -502,12 +318,6 @@ def process_question(agent: Any, question: dict, api_base_url: str = API_BASE_UR
502
  answer = handle_mathematical_question(agent, question, context)
503
  elif question_type == "context_based":
504
  answer = handle_context_based_question(agent, question, context)
505
- elif question_type == "current_events":
506
- answer = handle_current_events_question(agent, question, context)
507
- elif question_type == "media_content":
508
- answer = handle_media_content_question(agent, question, context)
509
- elif question_type == "categorization":
510
- answer = handle_categorization_question(agent, question, context)
511
  else:
512
  answer = handle_general_question(agent, question, context)
513
 
 
27
  # Convert to lowercase for case-insensitive matching
28
  text = question_text.lower()
29
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # Check for mathematical questions
31
+ if any(keyword in text for keyword in ["calculate", "compute", "equation", "formula", "derivative",
32
+ "integral", "probability", "statistics", "math"]):
33
  return "mathematical"
34
 
35
  # Check for technical implementation questions
36
+ if any(keyword in text for keyword in ["implement", "code", "algorithm", "function", "class",
37
+ "method", "programming", "pseudocode", "complexity"]):
38
  return "technical"
39
 
40
  # Check for context-based questions
41
+ if any(keyword in text for keyword in ["context", "file", "document", "text", "analyze",
42
+ "based on", "according to", "refer to"]):
43
  return "context_based"
44
 
 
 
 
 
 
 
 
45
  # Check for ethical/societal questions
46
+ if any(keyword in text for keyword in ["ethics", "ethical", "society", "impact", "bias",
47
+ "fairness", "responsible", "governance"]):
48
  return "ethical"
49
 
50
  # Check for factual knowledge questions
51
+ if any(keyword in text for keyword in ["define", "explain", "describe", "what is", "who is",
52
+ "when was", "history", "concept"]):
53
  return "factual"
54
 
55
  # Default to general if no specific type is detected
 
74
  enhanced_question = question.copy()
75
 
76
  question_text = question.get("question", "")
 
 
77
 
78
+ # Check if this is a counting question
79
+ is_counting_question = any(keyword in question_text.lower() for keyword in
80
+ ["how many", "count", "number of", "total number", "quantity"])
81
 
82
+ if is_counting_question:
83
+ enhanced_text = f"""
84
+ [FACTUAL COUNTING QUESTION]
85
+
86
+ {question_text}
87
+
88
+ This is a counting question. Please:
89
+ 1. Be precise and verify information from multiple sources when possible
90
+ 2. Carefully distinguish between different categories (e.g., studio albums vs. live albums vs. compilations)
91
+ 3. Pay careful attention to date ranges and ensure items fall within the specified period
92
+ 4. Count only the items that exactly match all criteria in the question
93
+ 5. When using Wikipedia as a source, make sure to check the entire article for complete information
94
+ 6. For discographies, verify the type of each album before counting it
95
+ 7. List all items you're counting to ensure accuracy
96
+ 8. Double-check your count before providing the final answer
97
+ """
98
+ else:
99
+ enhanced_text = f"""
100
+ [FACTUAL KNOWLEDGE QUESTION]
101
+
102
+ {question_text}
103
+
104
+ Please provide a precise, accurate answer based on established facts and knowledge.
105
+ Include relevant examples and cite important research or developments when applicable.
106
+ """
107
 
108
  enhanced_question["question"] = enhanced_text
109
 
 
274
  return get_agent_response(agent, enhanced_question)
275
 
276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  def process_question(agent: Any, question: dict, api_base_url: str = API_BASE_URL) -> dict:
278
  """
279
  Process a single question using the appropriate handler.
 
318
  answer = handle_mathematical_question(agent, question, context)
319
  elif question_type == "context_based":
320
  answer = handle_context_based_question(agent, question, context)
 
 
 
 
 
 
321
  else:
322
  answer = handle_general_question(agent, question, context)
323
 
requirements.txt CHANGED
@@ -15,14 +15,4 @@ pandas>=2.0.0
15
  # Utility dependencies
16
  tqdm>=4.66.1
17
  pydantic>=2.4.0
18
- tenacity>=8.2.3
19
-
20
- # Audio processing dependencies
21
- pydub>=0.25.1
22
- SpeechRecognition>=3.10.0
23
-
24
- # External information sources dependencies
25
- google-api-python-client>=2.100.0 # For YouTube API
26
- youtube-transcript-api>=0.6.1 # For YouTube transcripts
27
- google-search-results>=2.4.2 # For SerpAPI
28
- tavily-python>=0.2.6 # For Tavily search
 
15
  # Utility dependencies
16
  tqdm>=4.66.1
17
  pydantic>=2.4.0
18
+ tenacity>=8.2.3