SolshineMisfit commited on
Commit
d98a17f
·
verified ·
1 Parent(s): f3ea50e

Upped max steps to 20, updated context management

Browse files
Files changed (1) hide show
  1. app.py +69 -16
app.py CHANGED
@@ -463,26 +463,72 @@ def manage_context(prompt, max_allowed_tokens=30000):
463
 
464
  # Now update the try_model_call_with_fallbacks function to use this context management
465
  def try_model_call_with_fallbacks(prompt):
466
- """Try to use the primary model first, fall back to alternatives if it fails."""
467
- # First attempt with primary model
468
  try:
469
- # Apply context management
470
- managed_prompt = manage_context(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
 
472
- return original_call(managed_prompt)
 
 
 
 
 
 
 
 
473
  except Exception as primary_error:
474
- # If it's a token limit error, try more aggressive management
475
  if "Input validation error: inputs tokens + max_new_tokens" in str(primary_error):
476
  try:
477
- print("Token limit exceeded. Trying more aggressive context management...")
478
- more_managed_prompt = manage_context(prompt, max_allowed_tokens=20000)
479
- return original_call(more_managed_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
480
  except Exception:
481
- print("Token reduction failed. Proceeding to fallback models...")
482
 
483
  print(f"Primary model call failed: {str(primary_error)}")
484
  print("Trying fallback models...")
485
 
 
486
  # List of fallback models
487
  fallbacks = [
488
  {
@@ -502,16 +548,20 @@ def try_model_call_with_fallbacks(prompt):
502
  if not api_key:
503
  raise ValueError("No Hugging Face API key found in environment variables")
504
 
505
- # Try each fallback model in sequence
506
  for fallback in fallbacks:
507
  try:
508
  print(f"Trying fallback model: {fallback['display_name']}")
509
  client = InferenceClient(provider=fallback["provider"], api_key=api_key)
510
- messages = [{"role": "user", "content": manage_context(prompt, 25000)}] # Apply context management for fallbacks too
 
 
 
 
511
  completion = client.chat.completions.create(
512
  model=fallback["model_name"],
513
  messages=messages,
514
- max_tokens=1800,
515
  temperature=0.5
516
  )
517
  print(f"Successfully used fallback model: {fallback['display_name']}")
@@ -520,13 +570,16 @@ def try_model_call_with_fallbacks(prompt):
520
  print(f"Fallback model {fallback['display_name']} failed: {str(e)}")
521
  continue
522
 
523
- # If all fallbacks fail, re-raise the original error
524
- raise primary_error
525
 
526
  # Monkey patch the model's __call__ method to use our fallback logic
527
  original_call = model.__call__
528
  model.__call__ = try_model_call_with_fallbacks
529
 
 
 
 
530
  # Import tool from Hub
531
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
532
 
@@ -546,7 +599,7 @@ agent = CodeAgent(
546
  Check_Dataset_Validity,
547
  visit_webpage_tool, # This is correctly initialized as VisitWebpageTool()
548
  ],
549
- max_steps=6,
550
  verbosity_level=1,
551
  grammar=None,
552
  planning_interval=3,
 
463
 
464
  # Now update the try_model_call_with_fallbacks function to use this context management
465
  def try_model_call_with_fallbacks(prompt):
466
+ """Try to use the primary model first with aggressive context management."""
467
+ # First, ALWAYS apply context management, but more aggressively
468
  try:
469
+ # Get a rough token count estimate
470
+ estimated_tokens = len(prompt.split())
471
+ print(f"Estimated input tokens: {estimated_tokens}")
472
+
473
+ # Start with 25000 as the maximum (leaving ~6K tokens buffer for the model limits)
474
+ managed_prompt = manage_context(prompt, max_allowed_tokens=25000)
475
+
476
+ # If still potentially too large, reduce further
477
+ if len(managed_prompt.split()) > 24000:
478
+ print("First context reduction still too large, reducing further...")
479
+ managed_prompt = manage_context(managed_prompt, max_allowed_tokens=22000)
480
+
481
+ # Final emergency truncation if needed
482
+ if len(managed_prompt.split()) > 22000:
483
+ print("Emergency truncation required")
484
+ words = managed_prompt.split()
485
+ # Keep first 5000 and last 15000 words with a note in between
486
+ managed_prompt = " ".join(words[:5000]) + "\n\n[CONTEXT SEVERELY TRUNCATED]\n\n" + " ".join(words[-15000:])
487
+
488
+ print(f"Final managed prompt size: {len(managed_prompt.split())} estimated tokens")
489
+
490
+ # Temporarily reduce output tokens even further if the prompt is large
491
+ temp_max_tokens = model.max_tokens
492
+ if len(managed_prompt.split()) > 20000:
493
+ print("Large prompt detected, temporarily reducing output tokens")
494
+ model.max_tokens = 750 # Temporarily reduce to 750 for this call
495
 
496
+ try:
497
+ result = original_call(managed_prompt)
498
+ model.max_tokens = temp_max_tokens # Restore original setting
499
+ return result
500
+ except Exception as call_error:
501
+ # Restore original setting before handling the error
502
+ model.max_tokens = temp_max_tokens
503
+ raise call_error
504
+
505
  except Exception as primary_error:
506
+ # If we still get a token limit error, try even more aggressive reduction
507
  if "Input validation error: inputs tokens + max_new_tokens" in str(primary_error):
508
  try:
509
+ print("Critical: Token limit exceeded despite context management. Implementing emergency measures...")
510
+ # Take a more drastic approach - keep only system instructions and last part
511
+ lines = prompt.strip().split('\n')
512
+ # Keep first 50 lines and last 100 lines only
513
+ emergency_prompt = "\n".join(lines[:50] + ["\n[MAJORITY OF CONTEXT REMOVED DUE TO TOKEN LIMITS]\n"] + lines[-100:])
514
+
515
+ # Reduce output tokens drastically
516
+ temp_max_tokens = model.max_tokens
517
+ model.max_tokens = 500
518
+ try:
519
+ result = original_call(emergency_prompt)
520
+ model.max_tokens = temp_max_tokens
521
+ return result
522
+ except Exception:
523
+ model.max_tokens = temp_max_tokens
524
+ print("Emergency measures failed. Trying fallback models...")
525
  except Exception:
526
+ print("Emergency context management failed. Proceeding to fallback models...")
527
 
528
  print(f"Primary model call failed: {str(primary_error)}")
529
  print("Trying fallback models...")
530
 
531
+ # Rest of fallback logic remains the same...
532
  # List of fallback models
533
  fallbacks = [
534
  {
 
548
  if not api_key:
549
  raise ValueError("No Hugging Face API key found in environment variables")
550
 
551
+ # Try each fallback model in sequence with highly aggressive context management
552
  for fallback in fallbacks:
553
  try:
554
  print(f"Trying fallback model: {fallback['display_name']}")
555
  client = InferenceClient(provider=fallback["provider"], api_key=api_key)
556
+
557
+ # Apply even more aggressive context management for fallbacks
558
+ emergency_prompt = manage_context(prompt, max_allowed_tokens=15000)
559
+ messages = [{"role": "user", "content": emergency_prompt}]
560
+
561
  completion = client.chat.completions.create(
562
  model=fallback["model_name"],
563
  messages=messages,
564
+ max_tokens=1000, # Reduced tokens for output
565
  temperature=0.5
566
  )
567
  print(f"Successfully used fallback model: {fallback['display_name']}")
 
570
  print(f"Fallback model {fallback['display_name']} failed: {str(e)}")
571
  continue
572
 
573
+ # If all fallbacks fail, provide a useful error message
574
+ return "ERROR: Unable to process request due to context size limitations. Please break your request into smaller parts or simplify your query."
575
 
576
  # Monkey patch the model's __call__ method to use our fallback logic
577
  original_call = model.__call__
578
  model.__call__ = try_model_call_with_fallbacks
579
 
580
+ # Reduce the model's output tokens immediately to improve chances of success
581
+ model.max_tokens = 1000 # Reduce from 2096 to 1000 to stay under token limits
582
+
583
  # Import tool from Hub
584
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
585
 
 
599
  Check_Dataset_Validity,
600
  visit_webpage_tool, # This is correctly initialized as VisitWebpageTool()
601
  ],
602
+ max_steps=20,
603
  verbosity_level=1,
604
  grammar=None,
605
  planning_interval=3,