Agent-Example

Runtime error

App Files Files Community

SolshineMisfit commited on Mar 7, 2025

Commit

d98a17f

verified ·

1 Parent(s): f3ea50e

Upped max steps to 20, updated context management

Browse files

Files changed (1) hide show

app.py +69 -16

app.py CHANGED Viewed

@@ -463,26 +463,72 @@ def manage_context(prompt, max_allowed_tokens=30000):
 # Now update the try_model_call_with_fallbacks function to use this context management
 def try_model_call_with_fallbacks(prompt):
-    """Try to use the primary model first, fall back to alternatives if it fails."""
-    # First attempt with primary model
     try:
-        # Apply context management
-        managed_prompt = manage_context(prompt)
-        return original_call(managed_prompt)
     except Exception as primary_error:
-        # If it's a token limit error, try more aggressive management
         if "Input validation error: inputs tokens + max_new_tokens" in str(primary_error):
             try:
-                print("Token limit exceeded. Trying more aggressive context management...")
-                more_managed_prompt = manage_context(prompt, max_allowed_tokens=20000)
-                return original_call(more_managed_prompt)
             except Exception:
-                print("Token reduction failed. Proceeding to fallback models...")
         print(f"Primary model call failed: {str(primary_error)}")
         print("Trying fallback models...")
         # List of fallback models
         fallbacks = [
             {
@@ -502,16 +548,20 @@ def try_model_call_with_fallbacks(prompt):
         if not api_key:
             raise ValueError("No Hugging Face API key found in environment variables")
-        # Try each fallback model in sequence
         for fallback in fallbacks:
             try:
                 print(f"Trying fallback model: {fallback['display_name']}")
                 client = InferenceClient(provider=fallback["provider"], api_key=api_key)
-                messages = [{"role": "user", "content": manage_context(prompt, 25000)}]  # Apply context management for fallbacks too
                 completion = client.chat.completions.create(
                     model=fallback["model_name"],
                     messages=messages,
-                    max_tokens=1800,
                     temperature=0.5
                 )
                 print(f"Successfully used fallback model: {fallback['display_name']}")
@@ -520,13 +570,16 @@ def try_model_call_with_fallbacks(prompt):
                 print(f"Fallback model {fallback['display_name']} failed: {str(e)}")
                 continue
-        # If all fallbacks fail, re-raise the original error
-        raise primary_error
 # Monkey patch the model's __call__ method to use our fallback logic
 original_call = model.__call__
 model.__call__ = try_model_call_with_fallbacks
 # Import tool from Hub
 image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
@@ -546,7 +599,7 @@ agent = CodeAgent(
         Check_Dataset_Validity,
         visit_webpage_tool,  # This is correctly initialized as VisitWebpageTool()
     ],
-    max_steps=6,
     verbosity_level=1,
     grammar=None,
     planning_interval=3,

 # Now update the try_model_call_with_fallbacks function to use this context management
 def try_model_call_with_fallbacks(prompt):
+    """Try to use the primary model first with aggressive context management."""
+    # First, ALWAYS apply context management, but more aggressively
     try:
+        # Get a rough token count estimate
+        estimated_tokens = len(prompt.split())
+        print(f"Estimated input tokens: {estimated_tokens}")
+        # Start with 25000 as the maximum (leaving ~6K tokens buffer for the model limits)
+        managed_prompt = manage_context(prompt, max_allowed_tokens=25000)
+        # If still potentially too large, reduce further
+        if len(managed_prompt.split()) > 24000:
+            print("First context reduction still too large, reducing further...")
+            managed_prompt = manage_context(managed_prompt, max_allowed_tokens=22000)
+            # Final emergency truncation if needed
+            if len(managed_prompt.split()) > 22000:
+                print("Emergency truncation required")
+                words = managed_prompt.split()
+                # Keep first 5000 and last 15000 words with a note in between
+                managed_prompt = " ".join(words[:5000]) + "\n\n[CONTEXT SEVERELY TRUNCATED]\n\n" + " ".join(words[-15000:])
+        print(f"Final managed prompt size: {len(managed_prompt.split())} estimated tokens")
+        # Temporarily reduce output tokens even further if the prompt is large
+        temp_max_tokens = model.max_tokens
+        if len(managed_prompt.split()) > 20000:
+            print("Large prompt detected, temporarily reducing output tokens")
+            model.max_tokens = 750  # Temporarily reduce to 750 for this call
+        try:
+            result = original_call(managed_prompt)
+            model.max_tokens = temp_max_tokens  # Restore original setting
+            return result
+        except Exception as call_error:
+            # Restore original setting before handling the error
+            model.max_tokens = temp_max_tokens
+            raise call_error
     except Exception as primary_error:
+        # If we still get a token limit error, try even more aggressive reduction
         if "Input validation error: inputs tokens + max_new_tokens" in str(primary_error):
             try:
+                print("Critical: Token limit exceeded despite context management. Implementing emergency measures...")
+                # Take a more drastic approach - keep only system instructions and last part
+                lines = prompt.strip().split('\n')
+                # Keep first 50 lines and last 100 lines only
+                emergency_prompt = "\n".join(lines[:50] + ["\n[MAJORITY OF CONTEXT REMOVED DUE TO TOKEN LIMITS]\n"] + lines[-100:])
+                # Reduce output tokens drastically
+                temp_max_tokens = model.max_tokens
+                model.max_tokens = 500
+                try:
+                    result = original_call(emergency_prompt)
+                    model.max_tokens = temp_max_tokens
+                    return result
+                except Exception:
+                    model.max_tokens = temp_max_tokens
+                    print("Emergency measures failed. Trying fallback models...")
             except Exception:
+                print("Emergency context management failed. Proceeding to fallback models...")
         print(f"Primary model call failed: {str(primary_error)}")
         print("Trying fallback models...")
+        # Rest of fallback logic remains the same...
         # List of fallback models
         fallbacks = [
             {
         if not api_key:
             raise ValueError("No Hugging Face API key found in environment variables")
+        # Try each fallback model in sequence with highly aggressive context management
         for fallback in fallbacks:
             try:
                 print(f"Trying fallback model: {fallback['display_name']}")
                 client = InferenceClient(provider=fallback["provider"], api_key=api_key)
+                # Apply even more aggressive context management for fallbacks
+                emergency_prompt = manage_context(prompt, max_allowed_tokens=15000)
+                messages = [{"role": "user", "content": emergency_prompt}]
                 completion = client.chat.completions.create(
                     model=fallback["model_name"],
                     messages=messages,
+                    max_tokens=1000,  # Reduced tokens for output
                     temperature=0.5
                 )
                 print(f"Successfully used fallback model: {fallback['display_name']}")
                 print(f"Fallback model {fallback['display_name']} failed: {str(e)}")
                 continue
+        # If all fallbacks fail, provide a useful error message
+        return "ERROR: Unable to process request due to context size limitations. Please break your request into smaller parts or simplify your query."
 # Monkey patch the model's __call__ method to use our fallback logic
 original_call = model.__call__
 model.__call__ = try_model_call_with_fallbacks
+# Reduce the model's output tokens immediately to improve chances of success
+model.max_tokens = 1000  # Reduce from 2096 to 1000 to stay under token limits
 # Import tool from Hub
 image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
         Check_Dataset_Validity,
         visit_webpage_tool,  # This is correctly initialized as VisitWebpageTool()
     ],
+    max_steps=20,
     verbosity_level=1,
     grammar=None,
     planning_interval=3,