Final_Assignment_Template1

Sleeping

App Files Files Community

Mehedi2 commited on Oct 2, 2025

Commit

794795e

verified ·

1 Parent(s): 07c5695

Update agent.py

Browse files

Files changed (1) hide show

agent.py +163 -126

agent.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import os
 import time
 from pathlib import Path
 from typing import Optional, Union
-from itertools import cycle
 import pandas as pd
 from dotenv import load_dotenv
@@ -22,60 +22,6 @@ from smolagents.tools import Tool
 load_dotenv()
-class MultiModelManager:
-    """Manages multiple Groq models with rotation and fallback."""
-    def __init__(self):
-        # Alternative: Use a proven working Groq model
-        # If GPT-OSS still has issues, uncomment the line below:
-        # self.models = ["groq/llama-3.3-70b-versatile"]
-        # Current: Trying GPT-OSS 120B with groq/ prefix
-        self.models = [
-            "groq/openai/gpt-oss-120b",  # GPT OSS 120B via Groq
-        ]
-        self.api_key = os.getenv("GROQ_API_KEY")
-        self.model_cycle = cycle(self.models)
-        self.current_model_name = self.models[0]
-    def get_next_model(self):
-        """Get the next model in rotation."""
-        self.current_model_name = next(self.model_cycle)
-        return LiteLLMModel(
-            model_id=self.current_model_name,
-            api_key=self.api_key,
-        )
-    def get_model_by_complexity(self, complexity: str = "high"):
-        """
-        Get a model based on task complexity.
-        Args:
-            complexity: "high", "medium", or "low"
-        """
-        if complexity == "high":
-            model_id = self.models[0]  # llama-3.3-70b
-        elif complexity == "medium":
-            model_id = self.models[2]  # mixtral-8x7b
-        else:  # low
-            model_id = self.models[3]  # llama-3.1-8b
-        self.current_model_name = model_id
-        return LiteLLMModel(
-            model_id=model_id,
-            api_key=self.api_key,
-        )
-    def get_primary_model(self):
-        """Get the primary (best) model."""
-        self.current_model_name = self.models[0]
-        return LiteLLMModel(
-            model_id=self.models[0],
-            api_key=self.api_key,
-        )
 class ExcelToTextTool(Tool):
     """Render an Excel worksheet as a Markdown table."""
@@ -123,21 +69,35 @@ class ExcelToTextTool(Tool):
 class GaiaAgent:
-    """An agent using only GPT-OSS 120B for maximum performance."""
-    def __init__(self, strategy: str = "primary"):
-        """
-        Initialize agent with GPT-OSS 120B.
-        Args:
-            strategy: Kept for compatibility but only uses GPT-OSS 120B
-        """
-        print(f"✅ GaiaAgent initialized with GPT-OSS 120B.")
-        self.strategy = strategy
-        self.model_manager = MultiModelManager()
-        self.retry_count = 0
-        self.max_retries = 2
         # Initialize tools
         self.tools = [
@@ -148,102 +108,179 @@ class GaiaAgent:
             FinalAnswerTool(),
         ]
-        # Rate limiting
         self.last_call_time = 0
-        self.min_delay = 10  # 10 seconds between tasks to avoid rate limits
-        self.tokens_used_in_window = 0
-        self.window_start_time = time.time()
-        # Initialize agent with primary model
-        self._reinitialize_agent()
-    def _reinitialize_agent(self):
-        """Reinitialize the agent with GPT-OSS 120B."""
-        model = self.model_manager.get_primary_model()
-        print(f"🤖 Using model: {self.model_manager.current_model_name}")
-        self.agent = CodeAgent(
-            model=model,
-            tools=self.tools,
-            add_base_tools=True,
-            additional_authorized_imports=["pandas", "numpy", "csv", "subprocess"],
-        )
-    def _detect_complexity(self, question: str) -> str:
-        """Detect question complexity based on keywords."""
-        question_lower = question.lower()
-        # High complexity indicators
-        high_keywords = ["analyze", "complex", "multiple", "calculate", "prove",
-                        "demonstrate", "derive", "algorithm"]
-        if any(keyword in question_lower for keyword in high_keywords):
-            return "high"
-        # Low complexity indicators
-        low_keywords = ["what is", "who is", "when", "define", "list"]
-        if any(keyword in question_lower for keyword in low_keywords):
-            return "low"
-        return "medium"
     def __call__(self, task_id: str, question: str) -> str:
-        # Apply rate limiting
         elapsed = time.time() - self.last_call_time
         if elapsed < self.min_delay:
             wait_time = self.min_delay - elapsed
-            print(f"⏳ Rate limiting: waiting {wait_time:.1f}s...")
             time.sleep(wait_time)
-        print(f"🔹 Task ID: {task_id}")
-        print(f"🔹 Question: {question[:100]}...")
-        # Try to get answer with retry logic and exponential backoff
         answer = None
         for attempt in range(self.max_retries + 1):
             try:
                 answer = self.agent.run(question)
                 if answer:
                     break
             except Exception as e:
                 error_str = str(e)
-                print(f"⚠️ Attempt {attempt + 1} failed: {error_str[:150]}")
                 # Check if it's a rate limit error
-                if "rate_limit" in error_str.lower() or "Rate limit" in error_str:
-                    # Extract wait time if available
-                    import re
-                    wait_match = re.search(r'(\d+\.?\d*)\s*s', error_str)
-                    if wait_match:
-                        wait_time = float(wait_match.group(1)) + 2  # Add 2s buffer
-                    else:
-                        wait_time = 15 * (attempt + 1)  # Exponential backoff: 15s, 30s, 45s
-                    print(f"⏳ Rate limit hit. Waiting {wait_time:.1f}s before retry...")
-                    time.sleep(wait_time)
                     if attempt < self.max_retries:
-                        print(f"🔄 Retrying (attempt {attempt + 2}/{self.max_retries + 1})...")
                         continue
                 else:
-                    # Non-rate-limit error
                     if attempt < self.max_retries:
-                        print(f"🔄 Retrying with fresh agent...")
-                        self._reinitialize_agent()
-                        time.sleep(2)
                     else:
-                        answer = f"⚠️ Agent failed after {self.max_retries + 1} attempts: {e}"
         if not answer:
             answer = "⚠️ Sorry, I could not generate a valid response."
-        # Update last call time
         self.last_call_time = time.time()
-        print(f"✅ Answer: {str(answer)[:100]}...")
         return answer
 # Example usage:
-# Simply initialize the agent - it will always use GPT-OSS 120B
-# agent = GaiaAgent()

 import os
 import time
+import re
 from pathlib import Path
 from typing import Optional, Union
 import pandas as pd
 from dotenv import load_dotenv
 load_dotenv()
 class ExcelToTextTool(Tool):
     """Render an Excel worksheet as a Markdown table."""
 class GaiaAgent:
+    """
+    An agent optimized for Llama 4 Scout with multimodal capabilities.
+    Features:
+    - Uses Llama 4 Scout (30K TPM, 500K context, multimodal)
+    - Intelligent rate limiting with exponential backoff
+    - Automatic retry logic for rate limit errors
+    - Support for text and image inputs
+    """
+    def __init__(self):
+        """Initialize agent with Llama 4 Scout model."""
+        print("✅ GaiaAgent initialized with Llama 4 Scout (30K TPM, Multimodal)")
+        # Model configuration
+        self.model_id = "groq/meta-llama/llama-4-scout-17b-16e-instruct"
+        self.api_key = os.getenv("GROQ_API_KEY")
+        if not self.api_key:
+            raise ValueError("GROQ_API_KEY not found in environment variables")
+        # Initialize model
+        self.model = LiteLLMModel(
+            model_id=self.model_id,
+            api_key=self.api_key,
+        )
+        print(f"🤖 Using model: {self.model_id}")
+        print(f"📊 Limits: 30K TPM | 1K RPM | 500K context")
         # Initialize tools
         self.tools = [
             FinalAnswerTool(),
         ]
+        # Create agent
+        self.agent = CodeAgent(
+            model=self.model,
+            tools=self.tools,
+            add_base_tools=True,
+            additional_authorized_imports=["pandas", "numpy", "csv", "subprocess", "PIL", "requests"],
+        )
+        # Rate limiting configuration (optimized for 30K TPM)
         self.last_call_time = 0
+        self.min_delay = 3  # 3 seconds between tasks (generous with 30K TPM)
+        self.max_retries = 3
+        # Statistics tracking
+        self.total_tasks = 0
+        self.successful_tasks = 0
+        self.failed_tasks = 0
+        self.rate_limit_hits = 0
+    def _wait_for_rate_limit(self, wait_time: float):
+        """Wait for rate limit with progress indicator."""
+        print(f"⏳ Rate limit: waiting {wait_time:.1f}s...", end="", flush=True)
+        # Show countdown
+        for remaining in range(int(wait_time), 0, -1):
+            print(f"\r⏳ Rate limit: waiting {remaining}s...  ", end="", flush=True)
+            time.sleep(1)
+        print("\r✓ Ready to retry                    ")
+    def _extract_wait_time(self, error_str: str) -> float:
+        """Extract wait time from rate limit error message."""
+        # Look for patterns like "3.675499999s" or "try again in 3.6s"
+        patterns = [
+            r'(\d+\.?\d*)\s*s',
+            r'try again in (\d+\.?\d*)',
+            r'retry in (\d+\.?\d*)',
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, error_str)
+            if match:
+                return float(match.group(1)) + 2  # Add 2s buffer
+        return 15  # Default fallback
+    def _handle_rate_limit_error(self, error_str: str, attempt: int) -> float:
+        """Handle rate limit error and return wait time."""
+        self.rate_limit_hits += 1
+        # Try to extract wait time from error
+        wait_time = self._extract_wait_time(error_str)
+        # Apply exponential backoff if extraction failed
+        if wait_time == 15:
+            wait_time = 15 * (attempt + 1)  # 15s, 30s, 45s, 60s
+        return min(wait_time, 60)  # Cap at 60s
     def __call__(self, task_id: str, question: str) -> str:
+        """
+        Process a task with automatic rate limiting and retry logic.
+        Args:
+            task_id: Unique identifier for the task
+            question: The question to answer
+        Returns:
+            The answer string
+        """
+        self.total_tasks += 1
+        # Apply base rate limiting
         elapsed = time.time() - self.last_call_time
         if elapsed < self.min_delay:
             wait_time = self.min_delay - elapsed
+            print(f"⏳ Base rate limit: waiting {wait_time:.1f}s...")
             time.sleep(wait_time)
+        print(f"\n{'='*70}")
+        print(f"🔹 Task #{self.total_tasks} | ID: {task_id}")
+        print(f"🔹 Question: {question[:120]}{'...' if len(question) > 120 else ''}")
+        print(f"{'='*70}")
         answer = None
+        # Retry loop with exponential backoff
         for attempt in range(self.max_retries + 1):
             try:
+                print(f"\n🚀 Attempt {attempt + 1}/{self.max_retries + 1}...")
                 answer = self.agent.run(question)
                 if answer:
+                    self.successful_tasks += 1
+                    print(f"✅ Success!")
                     break
             except Exception as e:
                 error_str = str(e)
+                print(f"\n⚠️ Error on attempt {attempt + 1}:")
+                print(f"   {error_str[:200]}{'...' if len(error_str) > 200 else ''}")
                 # Check if it's a rate limit error
+                if any(keyword in error_str.lower() for keyword in ['rate_limit', 'rate limit', 'quota']):
+                    wait_time = self._handle_rate_limit_error(error_str, attempt)
                     if attempt < self.max_retries:
+                        print(f"🔄 Rate limit detected. Retrying after wait...")
+                        self._wait_for_rate_limit(wait_time)
                         continue
+                    else:
+                        answer = f"⚠️ Rate limit exceeded after {self.max_retries + 1} attempts. Please try again later."
+                        self.failed_tasks += 1
+                # Non-rate-limit error
                 else:
                     if attempt < self.max_retries:
+                        print(f"🔄 Retrying in 5s...")
+                        time.sleep(5)
+                        continue
                     else:
+                        answer = f"⚠️ Agent failed: {error_str[:300]}"
+                        self.failed_tasks += 1
+        # Fallback if no answer generated
         if not answer:
             answer = "⚠️ Sorry, I could not generate a valid response."
+            self.failed_tasks += 1
+        # Update timing
         self.last_call_time = time.time()
+        # Print results
+        print(f"\n{'='*70}")
+        print(f"📝 Answer: {str(answer)[:200]}{'...' if len(str(answer)) > 200 else ''}")
+        print(f"{'='*70}")
         return answer
+    def get_stats(self) -> dict:
+        """Get agent performance statistics."""
+        return {
+            "total_tasks": self.total_tasks,
+            "successful_tasks": self.successful_tasks,
+            "failed_tasks": self.failed_tasks,
+            "success_rate": f"{(self.successful_tasks / self.total_tasks * 100):.1f}%" if self.total_tasks > 0 else "0%",
+            "rate_limit_hits": self.rate_limit_hits,
+        }
+    def print_stats(self):
+        """Print agent performance statistics."""
+        stats = self.get_stats()
+        print(f"\n{'='*70}")
+        print(f"📊 AGENT STATISTICS")
+        print(f"{'='*70}")
+        print(f"Total Tasks:       {stats['total_tasks']}")
+        print(f"Successful:        {stats['successful_tasks']}")
+        print(f"Failed:            {stats['failed_tasks']}")
+        print(f"Success Rate:      {stats['success_rate']}")
+        print(f"Rate Limit Hits:   {stats['rate_limit_hits']}")
+        print(f"{'='*70}\n")
 # Example usage:
+if __name__ == "__main__":
+    # Initialize agent
+    agent = GaiaAgent()
+    # Test with a simple question
+    answer = agent(
+        task_id="test-001",
+        question="What is 2+2? Explain your reasoning."
+    )
+    # Print statistics
+    agent.print_stats()