gabejavitt commited on
Commit
1d49677
·
verified ·
1 Parent(s): 2541381

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +520 -601
app.py CHANGED
@@ -41,7 +41,7 @@ from langchain_core.documents import Document
41
  # CONFIGURATION
42
  # =============================================================================
43
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
44
- MAX_TURNS = 25 # Increased for planning/reflection
45
  MAX_MESSAGE_LENGTH = 8000
46
  REFLECT_EVERY_N_TURNS = 5
47
 
@@ -143,7 +143,7 @@ def find_file(path: str) -> Optional[Path]:
143
  # =============================================================================
144
 
145
  class ThinkInput(BaseModel):
146
- reasoning: str = Field(description="Your step-by-step reasoning for a logic puzzle (keep under 200 chars)")
147
 
148
  @tool(args_schema=ThinkInput)
149
  def think_through_logic(reasoning: str) -> str:
@@ -155,150 +155,101 @@ def think_through_logic(reasoning: str) -> str:
155
  - You need to reason through a logical problem
156
  - No external information is needed, just thinking
157
 
158
- After thinking through the logic, use calculator if math is involved,
159
- then validate_answer and final_answer_tool.
160
-
161
- NOTE: Keep reasoning summary brief (under 200 chars).
162
  """
163
- print(f"🧠 Thinking through logic: {reasoning[:100]}...")
164
 
165
- return f"""✅ Logic reasoning recorded: {reasoning}
 
 
 
 
 
166
 
167
- Now:
168
- 1. If there's any math to calculate, use calculator()
169
- 2. Once you have the answer, call validate_answer()
170
- 3. Then call final_answer_tool() with just the answer"""
171
 
172
 
173
  class PlanInput(BaseModel):
174
- question: str = Field(description="Brief summary of the task (keep under 100 chars)")
175
 
176
  @tool(args_schema=PlanInput)
177
- def create_plan(question: str) -> str:
178
  """
179
- Creates a step-by-step plan for answering a question.
180
- CRITICAL: Call this FIRST for any multi-step or complex question.
181
-
182
- This helps you think through:
183
- 1. What information do you need?
184
- 2. In what order should you gather it?
185
- 3. What tools will you use?
186
-
187
- After calling this, execute the plan step-by-step.
188
-
189
- NOTE: Keep the question summary brief (under 100 chars) to avoid errors.
190
  """
191
- print(f"📋 Planning phase initiated for: {question[:100]}...")
192
 
193
- return f"""✅ Plan Created. Now execute these steps methodically:
194
 
195
- PLANNING FRAMEWORK:
196
- 1. GOAL: What exact answer format is needed?
197
- 2. REQUIREMENTS: What data/information is required?
198
- 3. STRATEGY: What's the most efficient path?
199
- 4. EXECUTION: List concrete actions in order
200
 
201
- Now proceed with Step 1 of your plan."""
202
 
203
 
204
  class ReflectInput(BaseModel):
205
- current_situation: str = Field(description="What you've tried so far (keep brief, under 100 chars)")
206
 
207
  @tool(args_schema=ReflectInput)
208
- def reflect_on_progress(current_situation: str) -> str:
209
  """
210
- Reflects on your progress and suggests what to do next.
211
-
212
- Call this when:
213
- - You feel stuck or uncertain
214
- - Tools keep failing
215
- - You're not making progress
216
- - You've taken 5+ steps without getting closer to the answer
217
-
218
- This helps you step back and reconsider your approach.
219
-
220
- NOTE: Keep the situation summary brief (under 100 chars).
221
  """
222
- print(f"🤔 Reflection initiated: {current_situation[:100]}...")
223
 
224
- return f"""🔍 REFLECTION ANALYSIS:
225
-
226
- Current situation: {current_situation}
227
 
228
- CRITICAL QUESTIONS TO ASK YOURSELF:
229
- 1. Have I gathered the information I actually need?
230
- 2. Am I using the right tools for this task?
231
- 3. Am I going in circles (repeating similar actions)?
232
- 4. Should I try a completely different approach?
233
- 5. Do I have enough information to answer now?
234
 
235
- NEXT STEPS:
236
- - If stuck: Try a different tool or search query
237
- - If missing info: Identify exactly what's missing
238
- - If have info: Proceed to final_answer_tool
239
- - If uncertain: Break problem into smaller pieces
240
-
241
- Take a different approach now."""
242
 
243
 
244
  class ValidateInput(BaseModel):
245
- proposed_answer: str = Field(description="The answer you plan to submit")
246
- original_question: str = Field(description="The original question")
247
 
248
  @tool(args_schema=ValidateInput)
249
  def validate_answer(proposed_answer: str, original_question: str) -> str:
250
  """
251
- Validates your proposed answer before submission.
252
- CRITICAL: ALWAYS call this before final_answer_tool.
253
-
254
- Checks:
255
- - Does the answer match what was asked?
256
- - Is it in the correct format?
257
- - Are there any obvious issues?
258
-
259
- If validation passes, then call final_answer_tool.
260
- If validation fails, gather more information or correct the format.
261
  """
262
- print(f"✓ Validating answer: '{proposed_answer[:50]}...'")
263
 
264
  issues = []
265
  warnings = []
266
 
267
  # Check for conversational fluff
268
- fluff_phrases = ["the answer is", "based on", "according to", "i found that", "here is", "final answer"]
269
- if any(phrase in proposed_answer.lower() for phrase in fluff_phrases):
270
- issues.append("❌ Remove conversational text. Provide ONLY the answer.")
271
-
272
- # Check for number format if question asks for numbers
273
- number_keywords = ["how many", "what number", "count", "total", "sum"]
274
- if any(kw in original_question.lower() for kw in number_keywords):
275
- if not any(char.isdigit() for char in proposed_answer):
276
- warnings.append("⚠️ Question seems to ask for a number, but answer contains no digits.")
277
-
278
- # Check for list format
279
- if "list" in original_question.lower() and "," not in proposed_answer:
280
- warnings.append("⚠️ Question asks for a list, consider comma-separated format.")
281
-
282
- # Check for yes/no questions
283
- if original_question.lower().strip().startswith(("is ", "are ", "was ", "were ", "do ", "does ", "did ", "can ", "will ")):
284
- if proposed_answer.lower() not in ["yes", "no", "true", "false"]:
285
- warnings.append("⚠️ This looks like a yes/no question. Consider simple yes/no answer.")
286
-
287
- # Check for code fences or markdown
288
  if "```" in proposed_answer:
289
- issues.append("❌ Remove code fences (```) from the answer.")
290
 
291
  # Check length
292
  if len(proposed_answer) > 500:
293
- warnings.append("⚠️ Answer is quite long. Are you sure this is just the answer and not an explanation?")
 
 
 
 
 
294
 
295
  if issues:
296
- return "🚫 VALIDATION FAILED:\n" + "\n".join(issues) + "\n\nFix these issues before calling final_answer_tool."
297
 
298
  if warnings:
299
- return "⚠️ VALIDATION WARNINGS:\n" + "\n".join(warnings) + "\n\nConsider these points, but you may proceed if confident."
300
 
301
- return "✅ VALIDATION PASSED: Answer looks good! Proceed with final_answer_tool now."
302
 
303
 
304
  # =============================================================================
@@ -306,48 +257,31 @@ def validate_answer(proposed_answer: str, original_question: str) -> str:
306
  # =============================================================================
307
 
308
  class SearchInput(BaseModel):
309
- query: str = Field(description="The search query.")
310
 
311
  @tool(args_schema=SearchInput)
312
  def search_tool(query: str) -> str:
313
- """
314
- Searches the web using DuckDuckGo.
315
- Use for: recent information, facts, general web searches.
316
-
317
- Tips:
318
- - Keep queries concise and specific
319
- - Include year for time-sensitive queries (e.g., "GDP Brazil 2016")
320
- - Try different phrasings if first search doesn't help
321
- """
322
  if not isinstance(query, str) or not query.strip():
323
- return "Error: Invalid input. 'query' must be a non-empty string."
324
 
325
  print(f"🔍 Searching: {query}")
326
  try:
327
  search = DuckDuckGoSearchRun()
328
  result = search.run(query)
329
- if len(result) > MAX_MESSAGE_LENGTH:
330
- result = result[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(result)} total chars]"
331
- return result
332
  except Exception as e:
333
- return f"Error running search for '{query}': {str(e)}"
334
 
335
 
336
  class CalcInput(BaseModel):
337
- expression: str = Field(description="Mathematical expression to evaluate (e.g., '2 + 2', 'sqrt(16)', '45 * 1.2')")
338
 
339
  @tool(args_schema=CalcInput)
340
  def calculator(expression: str) -> str:
341
  """
342
- Evaluates mathematical expressions.
343
- Use this for ANY calculations instead of code_interpreter.
344
-
345
- Supports: +, -, *, /, **, sqrt, sin, cos, tan, log, exp, pi, e, abs, round
346
-
347
- Examples:
348
- - calculator("127 * 83")
349
- - calculator("sqrt(144)")
350
- - calculator("(45 + 23) / 2")
351
  """
352
  if not isinstance(expression, str) or not expression.strip():
353
  return "Error: Invalid expression."
@@ -355,7 +289,6 @@ def calculator(expression: str) -> str:
355
  print(f"🧮 Calculating: {expression}")
356
 
357
  try:
358
- # Create safe namespace with math functions
359
  import math
360
  safe_dict = {
361
  'sqrt': math.sqrt, 'sin': math.sin, 'cos': math.cos, 'tan': math.tan,
@@ -365,47 +298,33 @@ def calculator(expression: str) -> str:
365
  }
366
 
367
  result = eval(expression, {"__builtins__": {}}, safe_dict)
368
- return f"{result}"
369
  except Exception as e:
370
- return f"Error evaluating '{expression}': {str(e)}\nMake sure to use proper syntax (e.g., sqrt(16), not sqrt 16)"
371
 
372
 
373
  class CodeInput(BaseModel):
374
- code: str = Field(description="Python code to execute. MUST include print() for output.")
375
 
376
  @tool(args_schema=CodeInput)
377
  def code_interpreter(code: str) -> str:
378
  """
379
- Executes Python code for complex data processing.
380
-
381
- WHEN TO USE:
382
- - Data analysis (CSV, Excel files)
383
- - Complex calculations with loops/conditionals
384
- - String manipulation
385
- - Date/time calculations
386
-
387
- WHEN NOT TO USE:
388
- - Simple math (use calculator instead)
389
- - Web searches (use search_tool)
390
-
391
- Available libraries: pandas as pd, numpy as np, json, re, datetime
392
-
393
  CRITICAL: Always use print() to output results!
394
  """
395
  if not isinstance(code, str):
396
- return "Error: Invalid input. 'code' must be a string."
397
 
398
  # Safety checks
399
- dangerous_patterns = ['__import__', 'eval(', 'compile(', 'subprocess', 'os.system', 'exec(']
400
- code_lower = code.lower()
401
- for pattern in dangerous_patterns:
402
- if pattern in code_lower:
403
- return f"Error: Potentially dangerous operation '{pattern}' is not allowed."
404
 
405
- if 'open(' in code_lower and any(mode in code for mode in ["'w'", '"w"', "'a'", '"a"', "'wb'", '"wb"']):
406
- return "Error: Writing files is not allowed in code_interpreter. Use write_file tool instead."
407
 
408
- print(f"💻 Executing code...")
409
  output_stream = io.StringIO()
410
  error_stream = io.StringIO()
411
 
@@ -424,126 +343,113 @@ def code_interpreter(code: str) -> str:
424
  stderr = error_stream.getvalue()
425
 
426
  if stderr:
427
- return f"Error in execution:\n{stderr}\n\nStdout (if any):\n{stdout}"
428
 
429
  if stdout:
430
- if len(stdout) > MAX_MESSAGE_LENGTH:
431
- stdout = stdout[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(stdout)} total chars]"
432
- return f"{stdout}"
433
 
434
- return "Code executed but produced no output. Remember to use print() to display results!"
435
 
436
  except Exception as e:
437
- tb_str = traceback.format_exc()
438
- return f"Execution failed:\n{tb_str}"
439
 
440
 
441
  class ReadFileInput(BaseModel):
442
- path: str = Field(description="Path to the file to read")
443
 
444
  @tool(args_schema=ReadFileInput)
445
  def read_file(path: str) -> str:
446
- """Reads a file from the filesystem."""
447
  if not isinstance(path, str) or not path.strip():
448
- return "Error: Invalid input. 'path' must be a non-empty string."
449
 
450
- print(f"📄 Reading file: {path}")
451
 
452
  file_path = find_file(path)
453
  if not file_path:
454
- cwd_files = os.listdir(".")
455
- return (f"Error: File not found: '{path}'\n"
456
- f"Files in current directory: {cwd_files}")
457
 
458
  try:
459
  content = file_path.read_text(encoding='utf-8')
460
  return truncate_if_needed(content)
461
  except UnicodeDecodeError:
462
- size = file_path.stat().st_size
463
- ext = file_path.suffix
464
- return (f"File appears to be binary ({size} bytes). Cannot display as text.\n"
465
- f"File type: {ext}\n"
466
- f"Consider using audio_transcription_tool for audio files.")
467
  except Exception as e:
468
- return f"Error reading file: {str(e)}"
469
 
470
 
471
  class WriteFileInput(BaseModel):
472
- path: str = Field(description="Path where file should be written")
473
- content: str = Field(description="Content to write to the file")
474
 
475
  @tool(args_schema=WriteFileInput)
476
  def write_file(path: str, content: str) -> str:
477
- """Writes content to a file."""
478
- if not isinstance(path, str) or not path.strip():
479
- return "Error: Invalid input. 'path' must be a non-empty string."
480
- if not isinstance(content, str):
481
- return "Error: Invalid input. 'content' must be a string."
482
 
483
- print(f"✍️ Writing file: {path}")
484
 
485
  try:
486
  file_path = Path.cwd() / path
487
  file_path.parent.mkdir(parents=True, exist_ok=True)
488
  file_path.write_text(content, encoding='utf-8')
489
- return f"Successfully wrote {len(content)} characters to '{path}'."
490
  except Exception as e:
491
- return f"Error writing file '{path}': {str(e)}"
492
 
493
 
494
  class ListDirInput(BaseModel):
495
- path: str = Field(description="Directory path to list", default=".")
496
-
497
 
498
  @tool(args_schema=ListDirInput)
499
  def list_directory(path: str = ".") -> str:
500
- """Lists files and directories in a path."""
501
- print(f"📁 Listing directory: {path}")
502
 
503
  try:
504
  dir_path = Path.cwd() / path if path != "." else Path.cwd()
505
 
506
  if not dir_path.is_dir():
507
- return f"Error: '{path}' is not a valid directory."
508
 
509
  items = sorted(dir_path.iterdir())
510
 
511
  if not items:
512
  return f"Directory '{path}' is empty."
513
 
514
- files, directories = [], []
515
 
516
  for item in items:
517
  if item.is_dir():
518
- directories.append(f"📁 {item.name}/")
519
  else:
520
- size = item.stat().st_size
521
- files.append(f"📄 {item.name} ({size} bytes)")
522
 
523
  result = f"Contents of '{path}':\n\n"
524
- if directories:
525
- result += "Directories:\n" + "\n".join(directories) + "\n\n"
526
  if files:
527
  result += "Files:\n" + "\n".join(files)
528
 
529
  return result
530
  except Exception as e:
531
- return f"Error listing directory '{path}': {str(e)}"
532
 
533
 
534
  class AudioInput(BaseModel):
535
- file_path: str = Field(description="Path to audio file to transcribe")
536
 
537
  @tool(args_schema=AudioInput)
538
  def audio_transcription_tool(file_path: str) -> str:
539
- """Transcribes audio files to text using Whisper."""
540
- if not isinstance(file_path, str) or not file_path.strip():
541
- return "Error: Invalid input. 'file_path' must be a non-empty string."
542
 
543
- print(f"🎤 Transcribing audio: {file_path}")
544
 
545
  if asr_pipeline is None:
546
- return "Error: ASR pipeline is not available."
547
 
548
  audio_path = find_file(file_path)
549
  if not audio_path:
@@ -554,23 +460,23 @@ def audio_transcription_tool(file_path: str) -> str:
554
  result_text = transcription.get("text", "")
555
 
556
  if not result_text:
557
- return "Error: Transcription produced no text."
558
 
559
  return f"Transcription:\n{truncate_if_needed(result_text)}"
560
  except Exception as e:
561
- return f"Error transcribing '{file_path}': {str(e)}"
562
 
563
 
564
  class YoutubeInput(BaseModel):
565
- video_url: str = Field(description="YouTube video URL")
566
 
567
  @tool(args_schema=YoutubeInput)
568
  def get_youtube_transcript(video_url: str) -> str:
569
- """Fetches transcript/captions from a YouTube video."""
570
- if not isinstance(video_url, str) or not video_url.strip():
571
- return "Error: Invalid input. 'video_url' must be a non-empty string."
572
 
573
- print(f"📺 Getting YouTube transcript: {video_url}")
574
 
575
  try:
576
  video_id = None
@@ -580,125 +486,101 @@ def get_youtube_transcript(video_url: str) -> str:
580
  video_id = video_url.split("youtu.be/")[1].split("?")[0]
581
 
582
  if not video_id:
583
- return f"Error: Could not extract YouTube video ID from '{video_url}'."
584
 
585
  transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
586
 
587
  if not transcript_list:
588
- return "Error: No transcript found for this video."
589
 
590
  full_transcript = " ".join([item["text"] for item in transcript_list])
591
- return f"YouTube Transcript:\n{truncate_if_needed(full_transcript)}"
592
  except Exception as e:
593
- return f"Error getting transcript for '{video_url}': {str(e)}"
594
 
595
 
596
  class ScrapeInput(BaseModel):
597
- url: str = Field(description="URL to scrape (must start with http:// or https://)")
598
- query: str = Field(description="Specific question or information to find on the page")
599
 
600
  @tool(args_schema=ScrapeInput)
601
  def scrape_and_retrieve(url: str, query: str) -> str:
602
  """
603
- Scrapes a webpage and uses RAG to find relevant information.
604
-
605
- Use when:
606
- - You need specific information from a known webpage
607
- - Search results give you a URL that contains the answer
608
- - You need to extract data from a specific website
609
  """
610
- if not (url.lower().startswith(('http://', 'https://'))):
611
- return f"Error: Invalid URL. Must start with http:// or https://. Got: '{url}'"
612
- if not query or not query.strip():
613
- return "Error: A query is required to search the page content."
614
 
615
  if global_embeddings is None or global_text_splitter is None:
616
  if not initialize_rag_components():
617
- return "Error: RAG components could not be initialized."
618
 
619
- print(f"🌐 Scraping & retrieving from: {url}")
620
 
621
  try:
622
- headers = {
623
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
624
- }
625
  response = requests.get(url, headers=headers, timeout=20)
626
  response.raise_for_status()
627
 
628
  soup = BeautifulSoup(response.text, 'html.parser')
629
 
630
- for tag in soup(["script", "style", "nav", "footer", "aside", "header", "iframe", "noscript"]):
631
  tag.extract()
632
 
633
- main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile('content|main|article', re.I)) or soup.body
634
 
635
- if not main_content:
636
- return "Error: Could not find main content on the page."
637
 
638
- text = main_content.get_text(separator='\n', strip=True)
639
- lines = [line.strip() for line in text.splitlines()]
640
- text = '\n'.join(line for line in lines if line)
641
 
642
- if not text or len(text) < 50:
643
- return f"Error: Scraped content was too short or empty (length: {len(text)})."
644
 
645
  chunks = global_text_splitter.split_text(text)
646
 
647
  if not chunks:
648
- return "Error: Text could not be split into chunks."
649
 
650
- docs = [Document(page_content=chunk, metadata={"source": url}) for chunk in chunks]
651
 
652
  db = FAISS.from_documents(docs, global_embeddings)
653
-
654
  retriever = db.as_retriever(search_kwargs={"k": 5})
655
- retrieved_docs = retriever.invoke(query)
656
-
657
- if not retrieved_docs:
658
- return f"No relevant information found on {url} for query: '{query}'\n\nThe page was successfully scraped but doesn't seem to contain information matching your query."
659
-
660
- context_parts = []
661
- for i, doc in enumerate(retrieved_docs, 1):
662
- context_parts.append(f"[Chunk {i}]\n{doc.page_content}")
663
 
664
- context = "\n\n---\n\n".join(context_parts)
 
665
 
666
- result = f"Relevant information from {url}:\n\n{context}"
667
 
668
- return truncate_if_needed(result)
669
 
670
  except requests.RequestException as e:
671
- return f"Error fetching URL {url}: {str(e)}\n\nThe website may be blocking requests or may be temporarily unavailable."
672
  except Exception as e:
673
- tb_str = traceback.format_exc()
674
- return f"Error processing {url}: {str(e)}\n\nDetails:\n{tb_str}"
675
 
676
 
677
  class FinalAnswerInput(BaseModel):
678
- answer: str = Field(description="The final answer - EXACTLY what was asked for, nothing more")
679
 
680
  @tool(args_schema=FinalAnswerInput)
681
  def final_answer_tool(answer: str) -> str:
682
  """
683
- Submit your final answer.
684
-
685
- CRITICAL RULES:
686
- 1. ALWAYS call validate_answer() before this
687
- 2. The answer must be EXACTLY what was asked for
688
- 3. NO conversational text (no "The answer is...", etc.)
689
  4. NO explanations
690
- 5. Match the requested format exactly
691
-
692
- Examples:
693
- - If asked for a number: "42" (not "The answer is 42")
694
- - If asked for a list: "red, blue, green" (not "The colors are: red, blue, green")
695
- - If asked yes/no: "yes" (not "Yes, it is true")
696
  """
697
  if not isinstance(answer, str):
698
- try:
699
- answer = str(answer)
700
- except:
701
- return "Error: Invalid input. 'answer' must be a string."
702
 
703
  print(f"✅ FINAL ANSWER SUBMITTED: {answer}")
704
  return answer
@@ -708,8 +590,8 @@ def final_answer_tool(answer: str) -> str:
708
  # DEFINED TOOLS LIST
709
  # =============================================================================
710
  defined_tools = [
711
- # Planning & Reflection (use these strategically!)
712
- think_through_logic, # NEW: For logic puzzles
713
  create_plan,
714
  reflect_on_progress,
715
  validate_answer,
@@ -724,17 +606,16 @@ defined_tools = [
724
  write_file,
725
  list_directory,
726
 
727
- # Specialized tools
728
  audio_transcription_tool,
729
  get_youtube_transcript,
730
  scrape_and_retrieve,
731
 
732
- # Final answer
733
  final_answer_tool
734
  ]
735
 
736
 
737
-
738
  # =============================================================================
739
  # AGENT STATE
740
  # =============================================================================
@@ -744,152 +625,155 @@ class AgentState(TypedDict):
744
  has_plan: bool
745
  consecutive_errors: int
746
  tool_history: List[str]
 
747
 
748
 
749
  # =============================================================================
750
- # FALLBACK PARSER
751
  # =============================================================================
752
  def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
753
- """Parses malformed tool call strings from an LLM response."""
754
- print(f"Fallback parsing LLM content (first 500 chars):\n{content[:500]}")
 
755
  tool_name = None
756
  tool_input = None
757
- cleaned_str = None
758
-
759
- # STRATEGY 1: Parse Groq's <function=name{...}> format
760
- groq_match = re.search(
761
- r"<function=(\w+)\s*(\{.*?\})\s*(?:>|</function>)",
762
- content,
763
- re.DOTALL
764
- )
765
 
 
 
766
  if groq_match:
767
  try:
768
  tool_name = groq_match.group(1).strip()
769
  json_str = groq_match.group(2).strip()
770
-
771
- # Unescape unicode and clean up
772
  json_str = json_str.encode().decode('unicode_escape')
773
-
774
  tool_input = json.loads(json_str)
775
- print(f"🔧 Fallback: Parsed Groq format for '{tool_name}'")
776
-
777
- except Exception as e:
778
- print(f"⚠️ Fallback: Failed to parse Groq format: {e}")
779
  tool_name = None
780
-
781
- # STRATEGY 2: Try original <function(tool_name)>...{json_string}... format
782
  if not tool_name:
783
- func_match = re.search(
784
- r"<function[(=]\s*([^)]+)\s*[)>](.*)",
785
- content,
786
- re.DOTALL | re.IGNORECASE
787
- )
788
-
789
  if func_match:
790
  try:
791
  tool_name = func_match.group(1).strip().replace("'", "").replace('"', '')
792
- remaining_content = func_match.group(2)
793
-
794
- json_start_index = remaining_content.find('{')
795
- if json_start_index != -1:
796
- json_str = remaining_content[json_start_index:]
797
- cleaned_str = json_str.strip()
798
- cleaned_str = ''.join(c for c in cleaned_str if c.isprintable() or c in '\n\r\t')
799
- cleaned_str = cleaned_str.strip().rstrip(',')
800
-
801
- tool_input = json.loads(cleaned_str)
802
- print(f"🔧 Fallback: Parsed standard format for '{tool_name}'")
803
- else:
804
- print(f"⚠️ Fallback: Found <function> but no JSON blob.")
805
- tool_name = None
806
-
807
- except json.JSONDecodeError as e:
808
- print(f"⚠️ Fallback: json.loads failed, trying ast.literal_eval.")
809
- try:
810
- if cleaned_str:
811
- potential_input = ast.literal_eval(cleaned_str)
812
- if isinstance(potential_input, dict):
813
- tool_input = potential_input
814
- print(f"🔧 Fallback: Parsed with ast.literal_eval for '{tool_name}'")
815
- else:
816
- tool_name = None
817
- else:
818
- tool_name = None
819
- except:
820
- tool_name = None
821
 
822
- # STRATEGY 3: Look for simple tool mentions and create default calls
823
- if not tool_name and content:
824
- # Look for tool name mentions
825
  for tool in tools:
826
- if tool.name in content.lower():
827
  tool_name = tool.name
828
- # Create minimal valid input
829
  tool_input = {}
 
 
830
  if tool.args_schema:
831
  schema = tool.args_schema.model_json_schema()
832
- for prop, details in schema.get('properties', {}).items():
833
  if prop in schema.get('required', []):
834
- # Extract value from content if possible
835
- tool_input[prop] = "summarized_input"
836
- print(f"🔧 Fallback: Created default call for mentioned tool '{tool_name}'")
 
837
  break
838
 
839
- # FINAL VALIDATION
 
 
 
 
 
 
 
 
840
  if tool_name and tool_input is not None:
841
- if any(t.name == tool_name for t in tools):
842
- tool_call = ToolCall(
843
- name=tool_name,
844
- args=tool_input,
845
- id=str(uuid.uuid4())
846
- )
847
- print(f"✅ Successfully created tool call: {tool_name}")
848
- return [tool_call]
849
  else:
850
- print(f"❌ Tool '{tool_name}' not found in available tools")
851
 
852
- print("❌ Failed to parse any valid tool call from content")
853
  return []
854
 
855
 
856
  # =============================================================================
857
- # CONDITIONAL EDGE FUNCTION
858
  # =============================================================================
859
  def should_continue(state: AgentState):
860
- """Decide whether to continue, call tools, or end."""
861
- last_message = state['messages'][-1]
 
 
 
 
862
  current_turn = state.get('turn', 0)
863
-
864
- # Check for final_answer_tool
865
- if isinstance(last_message, AIMessage) and last_message.tool_calls:
866
- for tool_call in last_message.tool_calls:
867
- if tool_call.get("name") == "final_answer_tool":
868
- print("--- Condition: final_answer_tool called, ending. ---")
869
- return END
870
-
871
- # Check turn limit
872
  if current_turn >= MAX_TURNS:
873
- print(f"--- Condition: Max turns ({MAX_TURNS}) reached. Ending. ---")
874
  return END
875
-
876
- # Route to tools if tool calls exist
 
 
 
 
 
877
  if isinstance(last_message, AIMessage) and last_message.tool_calls:
878
- print("--- Condition: Tools called, routing to tools node. ---")
 
 
 
 
 
 
 
 
 
 
 
879
  return "tools"
880
 
881
- # Loop prevention
882
- if len(state['messages']) > 2 and isinstance(last_message, AIMessage) and isinstance(state['messages'][-2], AIMessage):
883
- print(f"--- Condition: Detected 2+ consecutive AI messages (Turn {current_turn}). Ending to prevent loop. ---")
884
- return END
885
-
886
- # Loop back to agent
887
- print(f"--- Condition: No tool call (Turn {current_turn}). Continuing to agent. ---")
 
 
 
 
 
888
  return "agent"
889
 
890
 
891
  # =============================================================================
892
- # ENHANCED AGENT CLASS WITH PLANNING & REFLECTION
893
  # =============================================================================
894
  class PlanningReflectionAgent:
895
  def __init__(self):
@@ -897,148 +781,100 @@ class PlanningReflectionAgent:
897
 
898
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
899
  if not GROQ_API_KEY:
900
- raise ValueError("GROQ_API_KEY environment variable is not set!")
901
 
902
  self.tools = defined_tools
903
 
904
- # Initialize RAG Components
905
  if not initialize_rag_components():
906
- print("⚠️ Warning: RAG components failed to initialize.")
907
 
908
  # Build tool descriptions
909
  tool_desc_list = []
910
  for tool in self.tools:
911
  if tool.args_schema:
912
  schema = tool.args_schema.model_json_schema()
913
- args_desc = []
914
- for prop, details in schema.get('properties', {}).items():
915
- desc = details.get('description', '')
916
- args_desc.append(f" - {prop}: {desc}")
917
- args_str = "\n".join(args_desc)
918
- desc = f"- {tool.name}:\n {tool.description}\n Args:\n{args_str}"
919
  else:
920
  desc = f"- {tool.name}: {tool.description}"
921
  tool_desc_list.append(desc)
922
  tool_descriptions = "\n".join(tool_desc_list)
923
 
924
- # Enhanced System Prompt with Planning & Reflection
925
- self.system_prompt = f"""You are an elite AI agent designed for the GAIA benchmark - the most challenging question-answering tasks.
926
-
927
- 🎯 YOUR MISSION: Provide the EXACT answer in the EXACT format requested.
928
 
929
  ═══════════════════════════════════════════════════════════════
930
- 📋 QUESTION TYPES & STRATEGIES:
931
  ═══════════════════════════════════════════════════════════════
932
 
933
- **TYPE 1: LOGIC PUZZLES / RIDDLES** (No tools needed)
934
- - Riddles, brain teasers, logical reasoning problems
935
- - Strategy: Think through the logic, use calculator for any math
936
- - Example: "If all but 30 of 200 coins are face-up, make equal face-down piles"
937
- This is pure logic. Think it through, then use final_answer_tool
938
-
939
- **TYPE 2: FACTUAL QUESTIONS** (Need web search)
940
- - Who, what, when, where questions about real world
941
- - Strategy: search_tool → scrape_and_retrieve if needed
942
- - Example: "What was Einstein's birthplace population in 1900?"
943
-
944
- **TYPE 3: DATA ANALYSIS** (Need files + code)
945
- - Questions about CSV, Excel, or other data files
946
- - Strategy: list_directory → read_file → code_interpreter
947
- - Example: "What's the average of column X in data.csv?"
948
-
949
- **TYPE 4: CALCULATIONS** (Need calculator/code)
950
- - Math problems, computations
951
- - Strategy: calculator for simple math, code_interpreter for complex
952
- - Example: "What is 127 × 83 + sqrt(144)?"
953
 
954
  ═══════════════════════════════════════════════════════════════
955
- 📋 MANDATORY PROTOCOL:
956
  ════════��══════════════════════════════════════════════════════
957
 
958
- **PHASE 1: IDENTIFY QUESTION TYPE**
959
- ├─ Is this a logic puzzle? → Think through it, use calculator if needed
960
- ├─ Need real-world facts?Use search/scrape tools
961
- ├─ Need to analyze files? Use file/code tools
962
- └─ Just math? Use calculator
963
-
964
- **PHASE 2: FOR TOOL-BASED QUESTIONS**
965
- ├─ 1. Call create_plan() for multi-step questions
966
- ├─ 2. Execute ONE step at a time
967
- ├─ 3. After EACH tool, evaluate the result
968
- └─ 4. Ask: "Do I have enough to answer now?"
969
-
970
- **PHASE 3: FOR LOGIC PUZZLES**
971
- ├─ 1. Think through the logic step-by-step
972
- ├─ 2. Use calculator ONLY if there's arithmetic
973
- ├─ 3. Once you've solved it, call validate_answer()
974
- └─ 4. Then call final_answer_tool()
975
-
976
- **PHASE 4: REFLECTION (If stuck)**
977
- ├─ If no progress after 3-5 turns → call reflect_on_progress()
978
- ├─ If tools keep failing → try different approach
979
- └─ If going in circles → step back and reconsider
980
-
981
- **PHASE 5: VALIDATION & SUBMISSION**
982
- ├─ 1. When you have the answer → call validate_answer()
983
- ├─ 2. If validation passes → call final_answer_tool()
984
- └─ 3. If validation fails → fix the issue first
985
 
986
- ═══════════════════════════════════════════════════════════════
987
- 🎓 EXAMPLES - LEARN FROM THESE:
988
- ═══════════════════════════════════════════════════════════════
989
 
990
- **Example 1: Logic Puzzle (NO TOOLS EXCEPT CALCULATOR/FINAL)**
991
- Q: If you have 200 coins with 30 face-down, and divide into 2 piles with equal face-down...
992
- Turn 1: Think through: If I take 30 coins and flip them all, one pile has X face-down...
993
- Turn 2: calculator("30") → 30
994
- Turn 3: validate_answer("30", original_q) → ✅ Pass
995
- Turn 4: final_answer_tool("30")
996
-
997
- **Example 2: Simple Math**
998
- Q: What is 127 × 83?
999
- Turn 1: calculator("127 * 83") → 10541
1000
- Turn 2: validate_answer("10541", "What is 127 × 83?") → ✅ Pass
1001
- Turn 3: final_answer_tool("10541")
1002
-
1003
- **Example 3: Multi-step Research**
1004
- Q: What was the population of Einstein's birthplace in 1900?
1005
- Turn 1: create_plan("Brief: Einstein birthplace pop 1900")
1006
- Turn 2: search_tool("Albert Einstein birthplace") → Ulm, Germany
1007
- Turn 3: search_tool("Ulm Germany population 1900") → approximately 50,000
1008
- Turn 4: validate_answer("50000", "What was the population...") → ✅ Pass
1009
- Turn 5: final_answer_tool("50000")
1010
-
1011
- **Example 4: File + Calculation**
1012
- Q: What's the average of the 'score' column in data.csv?
1013
- Turn 1: list_directory(".") → [files shown]
1014
- Turn 2: read_file("data.csv") → [content]
1015
- Turn 3: code_interpreter("import pandas as pd; df = pd.read_csv('data.csv'); print(df['score'].mean())")
1016
- → 78.5
1017
- Turn 4: validate_answer("78.5", "What's the average...") → ✅ Pass
1018
- Turn 5: final_answer_tool("78.5")
1019
-
1020
- **Example 5: Getting Unstuck**
1021
- Q: What's the GDP of the 2016 Olympics host?
1022
- Turn 1: search_tool("2016 Olympics") → [general info, no clear answer]
1023
- Turn 2: search_tool("Olympics 2016 location") → [still unclear]
1024
- Turn 3: reflect_on_progress("Searching but not getting host country")
1025
- → Try: "2016 Summer Olympics host country"
1026
- Turn 4: search_tool("2016 Summer Olympics host country") → Brazil
1027
- Turn 5: search_tool("Brazil GDP 2016") → $1.796 trillion
1028
- Turn 6: validate_answer("1.796 trillion", original_q) → ✅ Pass
1029
- Turn 7: final_answer_tool("1.796 trillion")
1030
 
1031
  ═══════════════════════════════════════════════════════════════
1032
- ⚠️ CRITICAL RULES - NEVER VIOLATE THESE:
1033
  ═══════════════════════════════════════════════════════════════
1034
 
1035
- 1. **IDENTIFY QUESTION TYPE FIRST**: Logic puzzle vs. factual vs. data vs. math
1036
- 2. **LOGIC PUZZLES**: Don't use search/file tools. Just think + validate + final_answer
1037
- 3. **ONE STEP AT A TIME**: Don't try to do multiple things in one turn
1038
- 4. **EXACT FORMAT**: Answer must be EXACTLY what was asked for
1039
- 5. **NO FLUFF**: Never add "The answer is" or explanations in final answer
1040
- 6. **ALWAYS VALIDATE**: Call validate_answer() before final_answer_tool()
1041
- 7. **DON'T LOOP**: If 2 consecutive turns produce no tool calls, you're stuck - call a tool!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1042
 
1043
  ═══════════════════════════════════════════════════════════════
1044
  📚 AVAILABLE TOOLS:
@@ -1047,29 +883,36 @@ Turn 7: final_answer_tool("1.796 trillion")
1047
  {tool_descriptions}
1048
 
1049
  ═══════════════════════════════════════════════════════════════
1050
- 🎯 REMEMBER:
1051
- - Logic puzzles: Think → Calculator (if needed) → Validate → Final Answer
1052
- - Factual questions: Plan → Search → Validate → Final Answer
1053
- - Always call a tool - never just output reasoning text!
 
 
 
 
 
 
1054
  ═══════════════════════════════════════════════════════════════
1055
  """
1056
 
1057
  print("Initializing Groq LLM...")
1058
  try:
 
1059
  self.llm_with_tools = ChatGroq(
1060
  temperature=0,
1061
  groq_api_key=GROQ_API_KEY,
1062
  model_name="llama-3.3-70b-versatile",
1063
  max_tokens=4096,
1064
  timeout=60
1065
- ).bind_tools(self.tools, tool_choice="auto")
1066
- print("✅ LLM initialized.")
1067
 
1068
  except Exception as e:
1069
  print(f"❌ Error initializing Groq: {e}")
1070
  raise
1071
 
1072
- # Agent Node with Enhanced Logic
1073
  def agent_node(state: AgentState):
1074
  current_turn = state.get('turn', 0) + 1
1075
  print(f"\n{'='*70}")
@@ -1078,46 +921,57 @@ Turn 7: final_answer_tool("1.796 trillion")
1078
 
1079
  if current_turn > MAX_TURNS:
1080
  return {
1081
- "messages": [SystemMessage(content="Max turns reached. Submitting best available answer.")],
1082
  "turn": current_turn
1083
  }
1084
 
1085
- # Check if we should auto-trigger reflection
1086
- should_reflect = False
1087
  consecutive_errors = state.get('consecutive_errors', 0)
 
1088
 
1089
- if current_turn > 5 and current_turn % REFLECT_EVERY_N_TURNS == 0:
1090
- should_reflect = True
1091
- print("🤔 Auto-triggering reflection (periodic check)")
1092
 
1093
- if consecutive_errors >= 3:
1094
- should_reflect = True
1095
- print("🤔 Auto-triggering reflection (multiple errors)")
 
 
 
 
 
 
1096
 
1097
  # Add reflection hint if needed
1098
- messages_to_send = state["messages"].copy()
1099
- if should_reflect and not state.get('has_plan', False):
1100
  hint = SystemMessage(
1101
- content="⚠️ SYSTEM HINT: You've been working for several turns. Consider calling reflect_on_progress() to evaluate your approach."
1102
  )
1103
  messages_to_send.append(hint)
 
1104
 
1105
- # Invoke LLM with better error handling
1106
  max_retries = 3
1107
  ai_message = None
 
1108
  for attempt in range(max_retries):
1109
  try:
1110
  ai_message = self.llm_with_tools.invoke(messages_to_send)
1111
- break
 
 
 
 
 
 
 
1112
  except Exception as e:
1113
  error_str = str(e)
1114
  print(f"⚠️ LLM attempt {attempt+1}/{max_retries} failed: {error_str[:200]}")
1115
 
1116
- # If it's a tool_use_failed error, try without forcing tools
1117
  if "tool_use_failed" in error_str and attempt < max_retries - 1:
1118
- print("🔧 Retrying without strict tool enforcement...")
1119
  try:
1120
- # Try with a simpler LLM call
1121
  simple_llm = ChatGroq(
1122
  temperature=0,
1123
  groq_api_key=os.getenv("GROQ_API_KEY"),
@@ -1125,32 +979,56 @@ Turn 7: final_answer_tool("1.796 trillion")
1125
  max_tokens=4096,
1126
  timeout=60
1127
  )
1128
- ai_message = simple_llm.invoke(messages_to_send)
1129
- # Manually parse for tool calls
1130
- if ai_message.content:
1131
- parsed_calls = parse_tool_call_from_string(ai_message.content, self.tools)
1132
- if parsed_calls:
1133
- ai_message.tool_calls = parsed_calls
 
 
 
 
 
 
1134
  ai_message.content = ""
1135
- break
 
1136
  except Exception as e2:
1137
- print(f"⚠️ Simple LLM also failed: {e2}")
1138
 
1139
  if attempt == max_retries - 1:
1140
- # Last resort: return a message asking to proceed differently
 
1141
  ai_message = AIMessage(
1142
- content="I need to approach this differently. Let me try a more direct method."
 
 
 
 
 
1143
  )
1144
  else:
1145
  time.sleep(2 ** attempt)
1146
 
1147
- # Fallback Parsing
1148
- if not ai_message.tool_calls and isinstance(ai_message.content, str) and ai_message.content.strip():
1149
- parsed_tool_calls = parse_tool_call_from_string(ai_message.content, self.tools)
1150
- if parsed_tool_calls:
1151
- print("🔧 Fallback: Successfully rebuilt tool call")
1152
- ai_message.tool_calls = parsed_tool_calls
1153
- ai_message.content = ""
 
 
 
 
 
 
 
 
 
 
 
1154
 
1155
  # Track tool usage
1156
  tool_history = state.get('tool_history', [])
@@ -1164,34 +1042,45 @@ Turn 7: final_answer_tool("1.796 trillion")
1164
  if tool_name == "create_plan":
1165
  has_plan = True
1166
  else:
1167
- print(f"💭 Reasoning: {ai_message.content[:200]}...")
 
1168
 
1169
  return {
1170
  "messages": [ai_message],
1171
  "turn": current_turn,
1172
  "has_plan": has_plan,
1173
- "tool_history": tool_history
 
1174
  }
1175
 
1176
- # Tool Node with Error Tracking
1177
  def tool_node_wrapper(state: AgentState):
1178
- """Wraps tool execution to track errors"""
1179
- tool_node = ToolNode(self.tools)
1180
- result = tool_node(state)
1181
 
1182
- # Check if last message is a tool error
1183
- if result['messages']:
 
 
 
 
 
 
 
 
1184
  last_msg = result['messages'][-1]
1185
- if isinstance(last_msg, ToolMessage) and "Error" in last_msg.content:
1186
- consecutive_errors = state.get('consecutive_errors', 0) + 1
1187
- result['consecutive_errors'] = consecutive_errors
1188
- else:
1189
- result['consecutive_errors'] = 0
 
1190
 
 
1191
  return result
1192
 
1193
- # Build Graph
1194
- print("Building Planning & Reflection Agent graph...")
1195
  graph_builder = StateGraph(AgentState)
1196
 
1197
  graph_builder.add_node("agent", agent_node)
@@ -1212,10 +1101,10 @@ Turn 7: final_answer_tool("1.796 trillion")
1212
  graph_builder.add_edge("tools", "agent")
1213
 
1214
  self.graph = graph_builder.compile()
1215
- print("✅ Planning & Reflection Agent graph compiled successfully.")
1216
-
1217
 
1218
  def __call__(self, question: str) -> str:
 
1219
  print(f"\n{'='*70}")
1220
  print(f"🎯 NEW QUESTION")
1221
  print(f"{'='*70}")
@@ -1230,99 +1119,129 @@ Turn 7: final_answer_tool("1.796 trillion")
1230
  "turn": 0,
1231
  "has_plan": False,
1232
  "consecutive_errors": 0,
1233
- "tool_history": []
 
1234
  }
1235
 
1236
  final_answer = "AGENT FAILED TO PRODUCE ANSWER"
 
 
1237
  try:
1238
  config = {"recursion_limit": MAX_TURNS + 10}
 
1239
  for event in self.graph.stream(graph_input, stream_mode="values", config=config):
1240
-
1241
  if not event.get('messages'):
1242
  continue
1243
 
1244
- last_message = event["messages"][-1]
 
1245
 
1246
- # Check for final answer extraction
1247
  if isinstance(last_message, AIMessage) and last_message.tool_calls:
1248
- if last_message.tool_calls[0].get("name") == "final_answer_tool":
1249
- final_answer_args = last_message.tool_calls[0].get('args', {})
1250
- if 'answer' in final_answer_args:
1251
- final_answer = final_answer_args['answer']
1252
- print(f"\n{'='*70}")
1253
- print(f"✅ FINAL ANSWER CAPTURED: '{final_answer}'")
1254
- print(f"{'='*70}\n")
1255
- break
1256
- else:
1257
- print(f"⚠️ final_answer_tool called without 'answer' argument")
1258
- final_answer = "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER"
1259
- break
1260
 
1261
  elif isinstance(last_message, ToolMessage):
1262
- result_preview = last_message.content[:300].replace('\n', ' ')
1263
- print(f"📊 Tool Result: {result_preview}...")
 
1264
  elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
1265
- print(f"💭 AI Reasoning: {last_message.content[:300]}...")
1266
-
1267
- # Final Answer Cleaning
1268
- cleaned_answer = str(final_answer).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1269
 
1270
- # Remove common prefixes
1271
- prefixes_to_remove = [
1272
- "The answer is:", "Here is the answer:", "Based on the information:",
1273
- "Final Answer:", "Answer:", "The final answer is:", "My answer is:",
1274
- "According to", "I found that", "The result is:"
 
1275
  ]
1276
- for prefix in prefixes_to_remove:
1277
- if cleaned_answer.lower().startswith(prefix.lower()):
1278
- potential_answer = cleaned_answer[len(prefix):].strip()
1279
- if potential_answer:
1280
- cleaned_answer = potential_answer
1281
  break
1282
 
1283
- # Remove code fences
1284
- cleaned_answer = remove_fences_simple(cleaned_answer)
1285
 
1286
- # Remove surrounding backticks
1287
- while cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
1288
- cleaned_answer = cleaned_answer[1:-1].strip()
1289
 
1290
- # Remove quotes if they wrap the entire answer
1291
- if (cleaned_answer.startswith('"') and cleaned_answer.endswith('"')) or \
1292
- (cleaned_answer.startswith("'") and cleaned_answer.endswith("'")):
1293
- cleaned_answer = cleaned_answer[1:-1].strip()
1294
 
1295
- # Remove trailing periods for non-sentence answers
1296
- if cleaned_answer.endswith('.') and len(cleaned_answer.split()) < 10:
1297
- cleaned_answer = cleaned_answer[:-1]
1298
 
1299
  print(f"\n{'='*70}")
1300
- print(f"🎉 FINAL CLEANED ANSWER")
1301
  print(f"{'='*70}")
1302
- print(f"{cleaned_answer}")
1303
  print(f"{'='*70}\n")
1304
 
1305
- return cleaned_answer
1306
 
1307
  except Exception as e:
1308
- print(f"❌ Error running agent graph: {e}")
1309
- tb_str = traceback.format_exc()
1310
- print(tb_str)
1311
- return f"AGENT GRAPH ERROR: {e}"
1312
 
1313
 
1314
  # =============================================================================
1315
  # GLOBAL AGENT INSTANTIATION
1316
  # =============================================================================
 
 
1317
  try:
1318
  initialize_rag_components()
1319
 
1320
  agent = PlanningReflectionAgent()
1321
- print("✅ Global PlanningReflectionAgent instantiated successfully.")
 
 
 
 
 
 
 
 
1322
  if asr_pipeline is None:
1323
- print("⚠️ Global ASR Pipeline failed to load.")
 
1324
  except Exception as e:
1325
- print(f"❌ FATAL: Could not instantiate global agent: {e}")
1326
  traceback.print_exc()
1327
  agent = None
1328
 
 
41
  # CONFIGURATION
42
  # =============================================================================
43
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
44
+ MAX_TURNS = 25
45
  MAX_MESSAGE_LENGTH = 8000
46
  REFLECT_EVERY_N_TURNS = 5
47
 
 
143
  # =============================================================================
144
 
145
  class ThinkInput(BaseModel):
146
+ reasoning: str = Field(description="Brief reasoning summary (under 150 chars)")
147
 
148
  @tool(args_schema=ThinkInput)
149
  def think_through_logic(reasoning: str) -> str:
 
155
  - You need to reason through a logical problem
156
  - No external information is needed, just thinking
157
 
158
+ After thinking, use calculator if math is involved, then validate and submit answer.
 
 
 
159
  """
160
+ print(f"🧠 Thinking: {reasoning[:100]}...")
161
 
162
+ return f"""✅ Logic reasoning recorded.
163
+
164
+ Next steps:
165
+ 1. If math needed → use calculator()
166
+ 2. Once you have answer → use validate_answer()
167
+ 3. Then → use final_answer_tool()
168
 
169
+ Remember: You MUST call another tool. Do not output reasoning text."""
 
 
 
170
 
171
 
172
  class PlanInput(BaseModel):
173
+ task_summary: str = Field(description="Very brief task summary (under 80 chars)")
174
 
175
  @tool(args_schema=PlanInput)
176
+ def create_plan(task_summary: str) -> str:
177
  """
178
+ Creates a plan for multi-step questions. Use for complex tasks only.
179
+ Keep the summary VERY brief to avoid errors.
 
 
 
 
 
 
 
 
 
180
  """
181
+ print(f"📋 Planning: {task_summary[:80]}...")
182
 
183
+ return f"""✅ Plan created for: {task_summary}
184
 
185
+ FRAMEWORK:
186
+ 1. What info do I need?
187
+ 2. What tools will I use?
188
+ 3. In what order?
 
189
 
190
+ Now execute step 1. You MUST call a tool next."""
191
 
192
 
193
  class ReflectInput(BaseModel):
194
+ situation: str = Field(description="Brief situation summary (under 80 chars)")
195
 
196
  @tool(args_schema=ReflectInput)
197
+ def reflect_on_progress(situation: str) -> str:
198
  """
199
+ Reflects on progress when stuck. Use after 5+ turns without progress.
200
+ Keep situation summary VERY brief.
 
 
 
 
 
 
 
 
 
201
  """
202
+ print(f"🤔 Reflecting: {situation[:80]}...")
203
 
204
+ return f"""🔍 REFLECTION on: {situation}
 
 
205
 
206
+ QUESTIONS:
207
+ 1. Am I using the right approach?
208
+ 2. Should I try a different tool?
209
+ 3. Do I actually have the answer already?
 
 
210
 
211
+ Take a DIFFERENT approach now. You MUST call a tool next."""
 
 
 
 
 
 
212
 
213
 
214
  class ValidateInput(BaseModel):
215
+ proposed_answer: str = Field(description="The answer to validate")
216
+ original_question: str = Field(description="Original question (first 100 chars)")
217
 
218
  @tool(args_schema=ValidateInput)
219
  def validate_answer(proposed_answer: str, original_question: str) -> str:
220
  """
221
+ Validates answer format before submission. ALWAYS use before final_answer_tool.
 
 
 
 
 
 
 
 
 
222
  """
223
+ print(f"✓ Validating: '{proposed_answer[:50]}...'")
224
 
225
  issues = []
226
  warnings = []
227
 
228
  # Check for conversational fluff
229
+ fluff = ["the answer is", "based on", "according to", "i found", "here is"]
230
+ if any(p in proposed_answer.lower() for p in fluff):
231
+ issues.append("❌ Remove conversational text. Answer only.")
232
+
233
+ # Check for code fences
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  if "```" in proposed_answer:
235
+ issues.append("❌ Remove code fences (```).")
236
 
237
  # Check length
238
  if len(proposed_answer) > 500:
239
+ warnings.append("⚠️ Answer very long. Just the answer?")
240
+
241
+ # Check for number questions
242
+ if any(k in original_question.lower() for k in ["how many", "what number", "count"]):
243
+ if not any(c.isdigit() for c in proposed_answer):
244
+ warnings.append("⚠️ Question asks for number but answer has no digits.")
245
 
246
  if issues:
247
+ return "🚫 VALIDATION FAILED:\n" + "\n".join(issues) + "\n\nFix then retry."
248
 
249
  if warnings:
250
+ return "⚠️ WARNINGS:\n" + "\n".join(warnings) + "\n\nConsider fixing, or proceed if confident."
251
 
252
+ return "✅ VALIDATION PASSED! Now call final_answer_tool() with this answer."
253
 
254
 
255
  # =============================================================================
 
257
  # =============================================================================
258
 
259
  class SearchInput(BaseModel):
260
+ query: str = Field(description="Search query (concise)")
261
 
262
  @tool(args_schema=SearchInput)
263
  def search_tool(query: str) -> str:
264
+ """Searches web via DuckDuckGo. Use for facts, recent info."""
 
 
 
 
 
 
 
 
265
  if not isinstance(query, str) or not query.strip():
266
+ return "Error: Invalid query."
267
 
268
  print(f"🔍 Searching: {query}")
269
  try:
270
  search = DuckDuckGoSearchRun()
271
  result = search.run(query)
272
+ return truncate_if_needed(result)
 
 
273
  except Exception as e:
274
+ return f"Search error: {str(e)}"
275
 
276
 
277
  class CalcInput(BaseModel):
278
+ expression: str = Field(description="Math expression (e.g., '2+2', 'sqrt(16)')")
279
 
280
  @tool(args_schema=CalcInput)
281
  def calculator(expression: str) -> str:
282
  """
283
+ Evaluates math expressions. Use for ANY calculations.
284
+ Supports: +, -, *, /, **, sqrt, sin, cos, log, pi, e, etc.
 
 
 
 
 
 
 
285
  """
286
  if not isinstance(expression, str) or not expression.strip():
287
  return "Error: Invalid expression."
 
289
  print(f"🧮 Calculating: {expression}")
290
 
291
  try:
 
292
  import math
293
  safe_dict = {
294
  'sqrt': math.sqrt, 'sin': math.sin, 'cos': math.cos, 'tan': math.tan,
 
298
  }
299
 
300
  result = eval(expression, {"__builtins__": {}}, safe_dict)
301
+ return str(result)
302
  except Exception as e:
303
+ return f"Calculation error for '{expression}': {str(e)}"
304
 
305
 
306
  class CodeInput(BaseModel):
307
+ code: str = Field(description="Python code (MUST include print() for output)")
308
 
309
  @tool(args_schema=CodeInput)
310
  def code_interpreter(code: str) -> str:
311
  """
312
+ Executes Python code. Use for data processing, complex logic.
313
+ Available: pandas, numpy, json, re, datetime
 
 
 
 
 
 
 
 
 
 
 
 
314
  CRITICAL: Always use print() to output results!
315
  """
316
  if not isinstance(code, str):
317
+ return "Error: code must be string."
318
 
319
  # Safety checks
320
+ dangerous = ['__import__', 'eval(', 'compile(', 'subprocess', 'os.system', 'exec(']
321
+ if any(d in code.lower() for d in dangerous):
322
+ return f"Error: Dangerous operation not allowed."
 
 
323
 
324
+ if 'open(' in code.lower() and any(m in code for m in ["'w'", '"w"', "'a'", '"a"']):
325
+ return "Error: File writing not allowed. Use write_file tool."
326
 
327
+ print(f"💻 Executing code ({len(code)} chars)...")
328
  output_stream = io.StringIO()
329
  error_stream = io.StringIO()
330
 
 
343
  stderr = error_stream.getvalue()
344
 
345
  if stderr:
346
+ return f"Error:\n{stderr}\n\nStdout:\n{stdout}"
347
 
348
  if stdout:
349
+ return truncate_if_needed(stdout)
 
 
350
 
351
+ return "Code executed but no output. Remember to use print()!"
352
 
353
  except Exception as e:
354
+ return f"Execution failed:\n{traceback.format_exc()}"
 
355
 
356
 
357
  class ReadFileInput(BaseModel):
358
+ path: str = Field(description="File path")
359
 
360
  @tool(args_schema=ReadFileInput)
361
  def read_file(path: str) -> str:
362
+ """Reads file content."""
363
  if not isinstance(path, str) or not path.strip():
364
+ return "Error: Invalid path."
365
 
366
+ print(f"📄 Reading: {path}")
367
 
368
  file_path = find_file(path)
369
  if not file_path:
370
+ return f"Error: File not found: '{path}'\nCWD files: {os.listdir('.')}"
 
 
371
 
372
  try:
373
  content = file_path.read_text(encoding='utf-8')
374
  return truncate_if_needed(content)
375
  except UnicodeDecodeError:
376
+ return f"Error: Binary file. Size: {file_path.stat().st_size} bytes. Try audio_transcription_tool for audio."
 
 
 
 
377
  except Exception as e:
378
+ return f"Read error: {str(e)}"
379
 
380
 
381
  class WriteFileInput(BaseModel):
382
+ path: str = Field(description="File path")
383
+ content: str = Field(description="Content to write")
384
 
385
  @tool(args_schema=WriteFileInput)
386
  def write_file(path: str, content: str) -> str:
387
+ """Writes content to file."""
388
+ if not path or not isinstance(content, str):
389
+ return "Error: Invalid inputs."
 
 
390
 
391
+ print(f"✍️ Writing: {path}")
392
 
393
  try:
394
  file_path = Path.cwd() / path
395
  file_path.parent.mkdir(parents=True, exist_ok=True)
396
  file_path.write_text(content, encoding='utf-8')
397
+ return f"Wrote {len(content)} chars to '{path}'."
398
  except Exception as e:
399
+ return f"Write error: {str(e)}"
400
 
401
 
402
  class ListDirInput(BaseModel):
403
+ path: str = Field(description="Directory path", default=".")
 
404
 
405
  @tool(args_schema=ListDirInput)
406
  def list_directory(path: str = ".") -> str:
407
+ """Lists directory contents."""
408
+ print(f"📁 Listing: {path}")
409
 
410
  try:
411
  dir_path = Path.cwd() / path if path != "." else Path.cwd()
412
 
413
  if not dir_path.is_dir():
414
+ return f"Error: '{path}' not a directory."
415
 
416
  items = sorted(dir_path.iterdir())
417
 
418
  if not items:
419
  return f"Directory '{path}' is empty."
420
 
421
+ files, dirs = [], []
422
 
423
  for item in items:
424
  if item.is_dir():
425
+ dirs.append(f"📁 {item.name}/")
426
  else:
427
+ files.append(f"📄 {item.name} ({item.stat().st_size} bytes)")
 
428
 
429
  result = f"Contents of '{path}':\n\n"
430
+ if dirs:
431
+ result += "Directories:\n" + "\n".join(dirs) + "\n\n"
432
  if files:
433
  result += "Files:\n" + "\n".join(files)
434
 
435
  return result
436
  except Exception as e:
437
+ return f"List error: {str(e)}"
438
 
439
 
440
  class AudioInput(BaseModel):
441
+ file_path: str = Field(description="Audio file path")
442
 
443
  @tool(args_schema=AudioInput)
444
  def audio_transcription_tool(file_path: str) -> str:
445
+ """Transcribes audio using Whisper."""
446
+ if not file_path:
447
+ return "Error: Invalid file path."
448
 
449
+ print(f"🎤 Transcribing: {file_path}")
450
 
451
  if asr_pipeline is None:
452
+ return "Error: ASR not available."
453
 
454
  audio_path = find_file(file_path)
455
  if not audio_path:
 
460
  result_text = transcription.get("text", "")
461
 
462
  if not result_text:
463
+ return "Error: Transcription empty."
464
 
465
  return f"Transcription:\n{truncate_if_needed(result_text)}"
466
  except Exception as e:
467
+ return f"Transcription error: {str(e)}"
468
 
469
 
470
  class YoutubeInput(BaseModel):
471
+ video_url: str = Field(description="YouTube URL")
472
 
473
  @tool(args_schema=YoutubeInput)
474
  def get_youtube_transcript(video_url: str) -> str:
475
+ """Fetches YouTube video transcript."""
476
+ if not video_url:
477
+ return "Error: Invalid URL."
478
 
479
+ print(f"📺 YouTube transcript: {video_url}")
480
 
481
  try:
482
  video_id = None
 
486
  video_id = video_url.split("youtu.be/")[1].split("?")[0]
487
 
488
  if not video_id:
489
+ return f"Error: Could not extract video ID."
490
 
491
  transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
492
 
493
  if not transcript_list:
494
+ return "Error: No transcript found."
495
 
496
  full_transcript = " ".join([item["text"] for item in transcript_list])
497
+ return f"Transcript:\n{truncate_if_needed(full_transcript)}"
498
  except Exception as e:
499
+ return f"Transcript error: {str(e)}"
500
 
501
 
502
  class ScrapeInput(BaseModel):
503
+ url: str = Field(description="URL (must start with http:// or https://)")
504
+ query: str = Field(description="What to find on the page")
505
 
506
  @tool(args_schema=ScrapeInput)
507
  def scrape_and_retrieve(url: str, query: str) -> str:
508
  """
509
+ Scrapes webpage and uses RAG to find relevant info.
510
+ Use when you need specific info from a known URL.
 
 
 
 
511
  """
512
+ if not url.startswith(('http://', 'https://')):
513
+ return f"Error: Invalid URL format."
514
+ if not query:
515
+ return "Error: Query required."
516
 
517
  if global_embeddings is None or global_text_splitter is None:
518
  if not initialize_rag_components():
519
+ return "Error: RAG not initialized."
520
 
521
+ print(f"🌐 Scraping: {url}")
522
 
523
  try:
524
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
 
 
525
  response = requests.get(url, headers=headers, timeout=20)
526
  response.raise_for_status()
527
 
528
  soup = BeautifulSoup(response.text, 'html.parser')
529
 
530
+ for tag in soup(["script", "style", "nav", "footer", "aside", "header", "iframe"]):
531
  tag.extract()
532
 
533
+ main = soup.find('main') or soup.find('article') or soup.body
534
 
535
+ if not main:
536
+ return "Error: No main content found."
537
 
538
+ text = main.get_text(separator='\n', strip=True)
539
+ lines = [l.strip() for l in text.splitlines() if l.strip()]
540
+ text = '\n'.join(lines)
541
 
542
+ if len(text) < 50:
543
+ return f"Error: Content too short ({len(text)} chars)."
544
 
545
  chunks = global_text_splitter.split_text(text)
546
 
547
  if not chunks:
548
+ return "Error: Could not chunk text."
549
 
550
+ docs = [Document(page_content=c, metadata={"source": url}) for c in chunks]
551
 
552
  db = FAISS.from_documents(docs, global_embeddings)
 
553
  retriever = db.as_retriever(search_kwargs={"k": 5})
554
+ retrieved = retriever.invoke(query)
 
 
 
 
 
 
 
555
 
556
+ if not retrieved:
557
+ return f"No relevant info found for: '{query}'"
558
 
559
+ context = "\n\n---\n\n".join([f"[Chunk {i+1}]\n{d.page_content}" for i, d in enumerate(retrieved)])
560
 
561
+ return truncate_if_needed(f"From {url}:\n\n{context}")
562
 
563
  except requests.RequestException as e:
564
+ return f"Fetch error: {str(e)}"
565
  except Exception as e:
566
+ return f"Scrape error: {str(e)}\n{traceback.format_exc()}"
 
567
 
568
 
569
  class FinalAnswerInput(BaseModel):
570
+ answer: str = Field(description="Final answer - EXACTLY what was asked, nothing more")
571
 
572
  @tool(args_schema=FinalAnswerInput)
573
  def final_answer_tool(answer: str) -> str:
574
  """
575
+ Submit final answer. CRITICAL RULES:
576
+ 1. ALWAYS call validate_answer() first
577
+ 2. Answer must be EXACTLY what was asked
578
+ 3. NO conversational text
 
 
579
  4. NO explanations
580
+ 5. Match requested format exactly
 
 
 
 
 
581
  """
582
  if not isinstance(answer, str):
583
+ answer = str(answer)
 
 
 
584
 
585
  print(f"✅ FINAL ANSWER SUBMITTED: {answer}")
586
  return answer
 
590
  # DEFINED TOOLS LIST
591
  # =============================================================================
592
  defined_tools = [
593
+ # Planning & Reflection
594
+ think_through_logic,
595
  create_plan,
596
  reflect_on_progress,
597
  validate_answer,
 
606
  write_file,
607
  list_directory,
608
 
609
+ # Specialized
610
  audio_transcription_tool,
611
  get_youtube_transcript,
612
  scrape_and_retrieve,
613
 
614
+ # Final
615
  final_answer_tool
616
  ]
617
 
618
 
 
619
  # =============================================================================
620
  # AGENT STATE
621
  # =============================================================================
 
625
  has_plan: bool
626
  consecutive_errors: int
627
  tool_history: List[str]
628
+ last_tool_was_thinking: bool
629
 
630
 
631
  # =============================================================================
632
+ # ENHANCED FALLBACK PARSER
633
  # =============================================================================
634
  def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
635
+ """Enhanced parser with multiple strategies."""
636
+ print(f"🔧 Fallback parsing (first 300 chars):\n{content[:300]}")
637
+
638
  tool_name = None
639
  tool_input = None
 
 
 
 
 
 
 
 
640
 
641
+ # STRATEGY 1: Groq's <function=name{...}> format
642
+ groq_match = re.search(r"<function=(\w+)\s*(\{.*?\})\s*(?:>|</function>)", content, re.DOTALL)
643
  if groq_match:
644
  try:
645
  tool_name = groq_match.group(1).strip()
646
  json_str = groq_match.group(2).strip()
 
 
647
  json_str = json_str.encode().decode('unicode_escape')
 
648
  tool_input = json.loads(json_str)
649
+ print(f" Parsed Groq format: {tool_name}")
650
+ except:
 
 
651
  tool_name = None
652
+
653
+ # STRATEGY 2: Standard <function(name)>{...} format
654
  if not tool_name:
655
+ func_match = re.search(r"<function[(=]\s*([^)]+)\s*[)>](.*)", content, re.DOTALL | re.IGNORECASE)
 
 
 
 
 
656
  if func_match:
657
  try:
658
  tool_name = func_match.group(1).strip().replace("'", "").replace('"', '')
659
+ remaining = func_match.group(2)
660
+ json_start = remaining.find('{')
661
+ if json_start != -1:
662
+ json_str = remaining[json_start:].strip().rstrip(',')
663
+ tool_input = json.loads(json_str)
664
+ print(f"✓ Parsed standard format: {tool_name}")
665
+ except:
666
+ tool_name = None
667
+
668
+ # STRATEGY 3: Tool mention with code block → wrap in code_interpreter
669
+ if not tool_name and "```python" in content:
670
+ try:
671
+ code_match = re.search(r"```python\n(.*?)```", content, re.DOTALL)
672
+ if code_match:
673
+ code = code_match.group(1).strip()
674
+ tool_name = "code_interpreter"
675
+ tool_input = {"code": code}
676
+ print(f"✓ Extracted Python code → code_interpreter")
677
+ except:
678
+ pass
 
 
 
 
 
 
 
 
 
679
 
680
+ # STRATEGY 4: Direct tool mention create minimal valid call
681
+ if not tool_name:
 
682
  for tool in tools:
683
+ if tool.name.lower() in content.lower():
684
  tool_name = tool.name
 
685
  tool_input = {}
686
+
687
+ # Try to extract arguments from content
688
  if tool.args_schema:
689
  schema = tool.args_schema.model_json_schema()
690
+ for prop in schema.get('properties', {}).keys():
691
  if prop in schema.get('required', []):
692
+ # Use placeholder
693
+ tool_input[prop] = "auto_extracted"
694
+
695
+ print(f"✓ Found mention of '{tool_name}' → creating default call")
696
  break
697
 
698
+ # STRATEGY 5: Emergency - if no tool detected, force a reasonable one
699
+ if not tool_name:
700
+ # If content looks like reasoning, use think_through_logic
701
+ if len(content) > 50 and not any(kw in content.lower() for kw in ["error", "failed", "invalid"]):
702
+ tool_name = "think_through_logic"
703
+ tool_input = {"reasoning": content[:150]}
704
+ print(f"⚠️ No tool detected → forcing think_through_logic")
705
+
706
+ # Validate and create tool call
707
  if tool_name and tool_input is not None:
708
+ matching_tools = [t for t in tools if t.name == tool_name]
709
+ if matching_tools:
710
+ return [ToolCall(name=tool_name, args=tool_input, id=str(uuid.uuid4()))]
 
 
 
 
 
711
  else:
712
+ print(f"❌ Tool '{tool_name}' not in available tools")
713
 
714
+ print("❌ All parsing strategies failed")
715
  return []
716
 
717
 
718
  # =============================================================================
719
+ # CONDITIONAL EDGE FUNCTION (FIXED)
720
  # =============================================================================
721
  def should_continue(state: AgentState):
722
+ """Decide next step with robust logic."""
723
+ messages = state.get('messages', [])
724
+ if not messages:
725
+ return "agent"
726
+
727
+ last_message = messages[-1]
728
  current_turn = state.get('turn', 0)
729
+
730
+ # Debug: Print what we're checking
731
+ msg_type = type(last_message).__name__
732
+ print(f"📍 Conditional check - Turn {current_turn}, Last msg type: {msg_type}")
733
+
734
+ # 1. Check turn limit
 
 
 
735
  if current_turn >= MAX_TURNS:
736
+ print(f"🛑 Max turns ({MAX_TURNS}) reached")
737
  return END
738
+
739
+ # 2. If last message is ToolMessage, agent needs to process it
740
+ if isinstance(last_message, ToolMessage):
741
+ print(f"📨 Tool result received from '{last_message.name}' → back to agent")
742
+ return "agent"
743
+
744
+ # 3. If last message is AIMessage with tool calls
745
  if isinstance(last_message, AIMessage) and last_message.tool_calls:
746
+ # Check each tool call explicitly
747
+ for tc in last_message.tool_calls:
748
+ tool_name = tc.get("name", "")
749
+ print(f"🔧 Tool call detected: '{tool_name}'")
750
+
751
+ # ONLY end if it's explicitly final_answer_tool
752
+ if tool_name == "final_answer_tool":
753
+ print(f"✅ final_answer_tool confirmed → ending")
754
+ return END
755
+
756
+ # Not final answer, route to tools
757
+ print(f"🔄 Routing to tools node")
758
  return "tools"
759
 
760
+ # 4. If AIMessage but no tool calls (reasoning text)
761
+ if isinstance(last_message, AIMessage) and not last_message.tool_calls:
762
+ # Check for consecutive AI messages (loop)
763
+ if len(messages) >= 2 and isinstance(messages[-2], AIMessage) and not messages[-2].tool_calls:
764
+ print(f"⚠️ Loop detected: 2 consecutive AI messages without tools")
765
+ return END
766
+
767
+ print(f"💭 AI message without tool call → continuing to agent (will force tool)")
768
+ return "agent"
769
+
770
+ # 5. Default: continue to agent
771
+ print(f"🔄 Default → continuing to agent")
772
  return "agent"
773
 
774
 
775
  # =============================================================================
776
+ # ENHANCED AGENT CLASS
777
  # =============================================================================
778
  class PlanningReflectionAgent:
779
  def __init__(self):
 
781
 
782
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
783
  if not GROQ_API_KEY:
784
+ raise ValueError("GROQ_API_KEY not set!")
785
 
786
  self.tools = defined_tools
787
 
788
+ # Initialize RAG
789
  if not initialize_rag_components():
790
+ print("⚠️ RAG components failed to initialize.")
791
 
792
  # Build tool descriptions
793
  tool_desc_list = []
794
  for tool in self.tools:
795
  if tool.args_schema:
796
  schema = tool.args_schema.model_json_schema()
797
+ args_desc = [f" - {p}: {d.get('description', '')}"
798
+ for p, d in schema.get('properties', {}).items()]
799
+ desc = f"- {tool.name}:\n {tool.description}\n" + "\n".join(args_desc)
 
 
 
800
  else:
801
  desc = f"- {tool.name}: {tool.description}"
802
  tool_desc_list.append(desc)
803
  tool_descriptions = "\n".join(tool_desc_list)
804
 
805
+ # ULTRA-AGGRESSIVE SYSTEM PROMPT
806
+ self.system_prompt = f"""You are an elite AI agent for GAIA benchmark. Your ONLY job: provide the EXACT answer requested.
 
 
807
 
808
  ═══════════════════════════════════════════════════════════════
809
+ ⚠️ ABSOLUTE RULES - VIOLATE THESE AND YOU FAIL:
810
  ═══════════════════════════════════════════════════════════════
811
 
812
+ 1. **EVERY TURN MUST CALL EXACTLY ONE TOOL** - No exceptions
813
+ 2. **NEVER OUTPUT REASONING TEXT WITHOUT A TOOL CALL** - You will fail
814
+ 3. **IDENTIFY QUESTION TYPE FIRST** - Logic? Factual? Data? Math?
815
+ 4. **LOGIC PUZZLES**: think_through_logic calculator (if needed) validate final_answer
816
+ 5. **FACTUAL QUESTIONS**: search_tool validate final_answer
817
+ 6. **DATA QUESTIONS**: read_file → code_interpreter → validate → final_answer
818
+ 7. **ALWAYS VALIDATE**: Call validate_answer() before final_answer_tool()
819
+ 8. **FINAL ANSWER FORMAT**: EXACTLY what was asked. NO "The answer is..." or explanations
 
 
 
 
 
 
 
 
 
 
 
 
820
 
821
  ═══════════════════════════════════════════════════════════════
822
+ 📋 QUESTION TYPE GUIDE:
823
  ════════��══════════════════════════════════════════════════════
824
 
825
+ **RIDDLES/LOGIC PUZZLES** (No web search needed):
826
+ - Brain teasers, puzzles, logical deduction
827
+ - Strategy: think_through_logic calculator (if math) validate final_answer
828
+ - Example: "If 200 coins, 30 face-down, divide into equal piles..."
829
+ Turn 1: think_through_logic("Adventurer takes 30 coins and flips them")
830
+ Turn 2: calculator("30") [if needed]
831
+ Turn 3: validate_answer("30", question)
832
+ Turn 4: final_answer_tool("30")
833
+
834
+ **FACTUAL/RESEARCH** (Need web):
835
+ - Who, what, when, where questions
836
+ - Strategy: search_tool → scrape_and_retrieve → validate → final_answer
837
+ - Example: "What was Einstein's birthplace population in 1900?"
838
+ Turn 1: search_tool("Albert Einstein birthplace")
839
+ Turn 2: search_tool("Ulm Germany population 1900")
840
+ Turn 3: validate_answer("50000", question)
841
+ Turn 4: final_answer_tool("50000")
 
 
 
 
 
 
 
 
 
 
842
 
843
+ **DATA ANALYSIS** (Need files):
844
+ - CSV/Excel questions
845
+ - Strategy: list_directory → read_file → code_interpreter → validate → final_answer
846
 
847
+ **SIMPLE MATH**:
848
+ - Calculations
849
+ - Strategy: calculator() validate_answer() final_answer_tool()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
850
 
851
  ═══════════════════════════════════════════════════════════════
852
+ 🎓 CRITICAL EXAMPLES:
853
  ═══════════════════════════════════════════════════════════════
854
 
855
+ Example 1: Logic Puzzle
856
+ Q: "Coin riddle with 200 coins, 30 face-down..."
857
+ CORRECT:
858
+ Turn 1: think_through_logic("Take 30 coins, flip all")
859
+ Turn 2: validate_answer("30", "coin riddle...")
860
+ Turn 3: final_answer_tool("30")
861
+
862
+ ❌ WRONG:
863
+ Turn 1: [reasoning text without tool] ← FAILS!
864
+
865
+ Example 2: Letter Bank Puzzle
866
+ Q: "Use letters to spell sentences, which letters need changing?"
867
+ ✅ CORRECT:
868
+ Turn 1: code_interpreter("code to count letters...")
869
+ Turn 2: validate_answer("A, B, C", question)
870
+ Turn 3: final_answer_tool("A, B, C")
871
+
872
+ Example 3: Math Problem
873
+ Q: "System of equations to solve..."
874
+ ✅ CORRECT:
875
+ Turn 1: code_interpreter("import numpy; solve equations...")
876
+ Turn 2: validate_answer("0, 1, 2", question)
877
+ Turn 3: final_answer_tool("0, 1, 2")
878
 
879
  ═══════════════════════════════════════════════════════════════
880
  📚 AVAILABLE TOOLS:
 
883
  {tool_descriptions}
884
 
885
  ═══════════════════════════════════════════════════════════════
886
+ EXECUTION RULES:
887
+ ═══════════════════════════════════════════════════════════════
888
+
889
+ - If you output text without a tool call, you have FAILED
890
+ - If you're unsure, use think_through_logic() to organize thoughts
891
+ - ALWAYS call a tool - preferably the right one for the question type
892
+ - After EVERY tool result, decide: "Do I have the answer? → validate → submit"
893
+ - If stuck after 3 turns: call reflect_on_progress()
894
+
895
+ REMEMBER: One tool per turn. No reasoning without tools. Exact answer format.
896
  ═══════════════════════════════════════════════════════════════
897
  """
898
 
899
  print("Initializing Groq LLM...")
900
  try:
901
+ # Use tool_choice="any" to FORCE tool usage
902
  self.llm_with_tools = ChatGroq(
903
  temperature=0,
904
  groq_api_key=GROQ_API_KEY,
905
  model_name="llama-3.3-70b-versatile",
906
  max_tokens=4096,
907
  timeout=60
908
+ ).bind_tools(self.tools, tool_choice="any") # FORCE tool calls
909
+ print("✅ LLM initialized with FORCED tool usage.")
910
 
911
  except Exception as e:
912
  print(f"❌ Error initializing Groq: {e}")
913
  raise
914
 
915
+ # Agent Node with AGGRESSIVE tool forcing
916
  def agent_node(state: AgentState):
917
  current_turn = state.get('turn', 0) + 1
918
  print(f"\n{'='*70}")
 
921
 
922
  if current_turn > MAX_TURNS:
923
  return {
924
+ "messages": [SystemMessage(content="Max turns reached.")],
925
  "turn": current_turn
926
  }
927
 
928
+ # Check if we should force reflection
 
929
  consecutive_errors = state.get('consecutive_errors', 0)
930
+ should_reflect = (current_turn > 5 and current_turn % REFLECT_EVERY_N_TURNS == 0) or consecutive_errors >= 3
931
 
932
+ messages_to_send = state["messages"].copy()
 
 
933
 
934
+ # Add tool-forcing message if last turn had no tool call
935
+ if len(messages_to_send) >= 2:
936
+ last_msg = messages_to_send[-1]
937
+ if isinstance(last_msg, AIMessage) and not last_msg.tool_calls:
938
+ force_msg = SystemMessage(
939
+ content="⚠️ CRITICAL: You MUST call a tool this turn. NO reasoning text. Pick the most appropriate tool and call it now."
940
+ )
941
+ messages_to_send.append(force_msg)
942
+ print("🚨 Injecting tool-forcing message")
943
 
944
  # Add reflection hint if needed
945
+ if should_reflect:
 
946
  hint = SystemMessage(
947
+ content="⚠️ HINT: Multiple turns without progress. Consider calling reflect_on_progress() or try a different approach."
948
  )
949
  messages_to_send.append(hint)
950
+ print("🤔 Injecting reflection hint")
951
 
952
+ # Invoke LLM with retries and fallback
953
  max_retries = 3
954
  ai_message = None
955
+
956
  for attempt in range(max_retries):
957
  try:
958
  ai_message = self.llm_with_tools.invoke(messages_to_send)
959
+
960
+ # If we got a valid response with tool calls, break
961
+ if ai_message.tool_calls:
962
+ break
963
+
964
+ # If no tool calls, this is a problem
965
+ print(f"⚠️ LLM returned no tool calls on attempt {attempt+1}")
966
+
967
  except Exception as e:
968
  error_str = str(e)
969
  print(f"⚠️ LLM attempt {attempt+1}/{max_retries} failed: {error_str[:200]}")
970
 
971
+ # If tool_use_failed, try without strict binding
972
  if "tool_use_failed" in error_str and attempt < max_retries - 1:
973
+ print("🔧 Trying without strict tool enforcement...")
974
  try:
 
975
  simple_llm = ChatGroq(
976
  temperature=0,
977
  groq_api_key=os.getenv("GROQ_API_KEY"),
 
979
  max_tokens=4096,
980
  timeout=60
981
  )
982
+
983
+ # Add explicit tool forcing to the message
984
+ force_tool_msg = SystemMessage(
985
+ content="You MUST call a tool. Respond with a tool call, not reasoning text."
986
+ )
987
+ ai_message = simple_llm.invoke(messages_to_send + [force_tool_msg])
988
+
989
+ # Try to parse tool calls from content
990
+ if ai_message.content and not ai_message.tool_calls:
991
+ parsed = parse_tool_call_from_string(ai_message.content, self.tools)
992
+ if parsed:
993
+ ai_message.tool_calls = parsed
994
  ai_message.content = ""
995
+ print("✓ Fallback parsing succeeded")
996
+ break
997
  except Exception as e2:
998
+ print(f"⚠️ Fallback also failed: {e2}")
999
 
1000
  if attempt == max_retries - 1:
1001
+ # Last resort: inject a default tool call
1002
+ print("🚨 All attempts failed - forcing think_through_logic")
1003
  ai_message = AIMessage(
1004
+ content="",
1005
+ tool_calls=[ToolCall(
1006
+ name="think_through_logic",
1007
+ args={"reasoning": "Processing question"},
1008
+ id=str(uuid.uuid4())
1009
+ )]
1010
  )
1011
  else:
1012
  time.sleep(2 ** attempt)
1013
 
1014
+ # If still no tool calls after all attempts, force one
1015
+ if not ai_message.tool_calls:
1016
+ if isinstance(ai_message.content, str) and ai_message.content.strip():
1017
+ # Try one more parse
1018
+ parsed = parse_tool_call_from_string(ai_message.content, self.tools)
1019
+ if parsed:
1020
+ ai_message.tool_calls = parsed
1021
+ ai_message.content = ""
1022
+ print("✓ Final parse succeeded")
1023
+ else:
1024
+ # Absolute last resort
1025
+ print("🚨 EMERGENCY: Forcing think_through_logic")
1026
+ ai_message.tool_calls = [ToolCall(
1027
+ name="think_through_logic",
1028
+ args={"reasoning": "analyzing question"},
1029
+ id=str(uuid.uuid4())
1030
+ )]
1031
+ ai_message.content = ""
1032
 
1033
  # Track tool usage
1034
  tool_history = state.get('tool_history', [])
 
1042
  if tool_name == "create_plan":
1043
  has_plan = True
1044
  else:
1045
+ print(f"⚠️ No tool call (this shouldn't happen!)")
1046
+ print(f"💭 Content: {ai_message.content[:200]}...")
1047
 
1048
  return {
1049
  "messages": [ai_message],
1050
  "turn": current_turn,
1051
  "has_plan": has_plan,
1052
+ "tool_history": tool_history,
1053
+ "last_tool_was_thinking": ai_message.tool_calls and ai_message.tool_calls[0]['name'] == 'think_through_logic'
1054
  }
1055
 
1056
+ # Tool Node with Error Tracking (FIXED)
1057
  def tool_node_wrapper(state: AgentState):
1058
+ """Executes tools and tracks errors."""
1059
+ print(f"🔧 Executing tools...")
 
1060
 
1061
+ # Create fresh ToolNode instance
1062
+ tool_executor = ToolNode(self.tools)
1063
+
1064
+ # Invoke properly
1065
+ result = tool_executor.invoke(state)
1066
+
1067
+ # Track errors
1068
+ consecutive_errors = state.get('consecutive_errors', 0)
1069
+
1070
+ if result.get('messages'):
1071
  last_msg = result['messages'][-1]
1072
+ if isinstance(last_msg, ToolMessage):
1073
+ if "Error" in last_msg.content or "error" in last_msg.content.lower():
1074
+ consecutive_errors += 1
1075
+ print(f"⚠️ Tool error detected (consecutive: {consecutive_errors})")
1076
+ else:
1077
+ consecutive_errors = 0
1078
 
1079
+ result['consecutive_errors'] = consecutive_errors
1080
  return result
1081
 
1082
+ # Build Graph
1083
+ print("Building graph...")
1084
  graph_builder = StateGraph(AgentState)
1085
 
1086
  graph_builder.add_node("agent", agent_node)
 
1101
  graph_builder.add_edge("tools", "agent")
1102
 
1103
  self.graph = graph_builder.compile()
1104
+ print("✅ Graph compiled successfully.")
 
1105
 
1106
  def __call__(self, question: str) -> str:
1107
+ """Execute agent on a question."""
1108
  print(f"\n{'='*70}")
1109
  print(f"🎯 NEW QUESTION")
1110
  print(f"{'='*70}")
 
1119
  "turn": 0,
1120
  "has_plan": False,
1121
  "consecutive_errors": 0,
1122
+ "tool_history": [],
1123
+ "last_tool_was_thinking": False
1124
  }
1125
 
1126
  final_answer = "AGENT FAILED TO PRODUCE ANSWER"
1127
+ all_messages = []
1128
+
1129
  try:
1130
  config = {"recursion_limit": MAX_TURNS + 10}
1131
+
1132
  for event in self.graph.stream(graph_input, stream_mode="values", config=config):
 
1133
  if not event.get('messages'):
1134
  continue
1135
 
1136
+ all_messages = event["messages"]
1137
+ last_message = all_messages[-1]
1138
 
1139
+ # Check for final answer
1140
  if isinstance(last_message, AIMessage) and last_message.tool_calls:
1141
+ for tool_call in last_message.tool_calls:
1142
+ if tool_call.get("name") == "final_answer_tool":
1143
+ args = tool_call.get('args', {})
1144
+ if 'answer' in args:
1145
+ final_answer = args['answer']
1146
+ print(f"\n{'='*70}")
1147
+ print(f"✅ FINAL ANSWER: '{final_answer}'")
1148
+ print(f"{'='*70}\n")
1149
+ break
 
 
 
1150
 
1151
  elif isinstance(last_message, ToolMessage):
1152
+ preview = last_message.content[:200].replace('\n', ' ')
1153
+ print(f"📊 Tool '{last_message.name}' result: {preview}...")
1154
+
1155
  elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
1156
+ print(f"💭 AI: {last_message.content[:200]}...")
1157
+
1158
+ # If no final answer, try to extract from tool messages
1159
+ if final_answer == "AGENT FAILED TO PRODUCE ANSWER":
1160
+ print("⚠️ No final_answer_tool called. Checking tool results...")
1161
+
1162
+ for msg in reversed(all_messages):
1163
+ if isinstance(msg, ToolMessage):
1164
+ if msg.name in ["calculator", "think_through_logic", "code_interpreter"]:
1165
+ content = msg.content.strip()
1166
+ # Look for short, answer-like content
1167
+ if content and len(content) < 200 and not content.startswith("Error"):
1168
+ # Extract just the result part
1169
+ lines = content.split('\n')
1170
+ for line in reversed(lines):
1171
+ if line.strip() and not line.startswith(('✅', '⚠️', 'Next', 'Remember')):
1172
+ final_answer = line.strip()
1173
+ print(f"📝 Extracted from {msg.name}: '{final_answer}'")
1174
+ break
1175
+ break
1176
+
1177
+ # Clean the answer
1178
+ cleaned = str(final_answer).strip()
1179
 
1180
+ # Remove prefixes
1181
+ prefixes = [
1182
+ "the answer is:", "here is the answer:", "based on",
1183
+ "final answer:", "answer:", "the final answer is:",
1184
+ "my answer is:", "according to", "i found that",
1185
+ "the result is:", "result:"
1186
  ]
1187
+ for prefix in prefixes:
1188
+ if cleaned.lower().startswith(prefix.lower()):
1189
+ potential = cleaned[len(prefix):].strip()
1190
+ if potential:
1191
+ cleaned = potential
1192
  break
1193
 
1194
+ # Remove code fences and quotes
1195
+ cleaned = remove_fences_simple(cleaned)
1196
 
1197
+ while cleaned.startswith("`") and cleaned.endswith("`"):
1198
+ cleaned = cleaned[1:-1].strip()
 
1199
 
1200
+ if (cleaned.startswith('"') and cleaned.endswith('"')) or \
1201
+ (cleaned.startswith("'") and cleaned.endswith("'")):
1202
+ cleaned = cleaned[1:-1].strip()
 
1203
 
1204
+ # Remove trailing period for short answers
1205
+ if cleaned.endswith('.') and len(cleaned.split()) < 10:
1206
+ cleaned = cleaned[:-1]
1207
 
1208
  print(f"\n{'='*70}")
1209
+ print(f"🎉 RETURNING ANSWER")
1210
  print(f"{'='*70}")
1211
+ print(f"{cleaned}")
1212
  print(f"{'='*70}\n")
1213
 
1214
+ return cleaned
1215
 
1216
  except Exception as e:
1217
+ print(f"❌ Graph error: {e}")
1218
+ print(traceback.format_exc())
1219
+ return f"AGENT ERROR: {e}"
 
1220
 
1221
 
1222
  # =============================================================================
1223
  # GLOBAL AGENT INSTANTIATION
1224
  # =============================================================================
1225
+ agent = None
1226
+
1227
  try:
1228
  initialize_rag_components()
1229
 
1230
  agent = PlanningReflectionAgent()
1231
+ print("✅ Global PlanningReflectionAgent instantiated.")
1232
+
1233
+ # Verify it's callable
1234
+ if not callable(agent):
1235
+ print("❌ ERROR: Agent not callable!")
1236
+ agent = None
1237
+ else:
1238
+ print("✅ Agent is callable.")
1239
+
1240
  if asr_pipeline is None:
1241
+ print("⚠️ ASR Pipeline not loaded.")
1242
+
1243
  except Exception as e:
1244
+ print(f"❌ FATAL: Agent initialization failed: {e}")
1245
  traceback.print_exc()
1246
  agent = None
1247