SolshineMisfit commited on
Commit
9991f02
·
verified ·
1 Parent(s): 8e59d51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -82
app.py CHANGED
@@ -350,100 +350,181 @@ def get_current_time_in_timezone(timezone: str) -> str:
350
 
351
  final_answer = FinalAnswerTool()
352
 
353
- # Define the primary DeepSeek model using InferenceClient
354
- def primary_deepseek_model(prompt):
355
- """Use the DeepSeek-R1-Distill-Qwen-32B model as the primary endpoint."""
356
- try:
357
- print("Using primary model: DeepSeek-R1-Distill-Qwen-32B via HF Inference API")
358
- # Get API key
359
- api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
360
- if not api_key:
361
- raise ValueError("No Hugging Face API key found in environment variables")
362
-
363
- client = InferenceClient(
364
- provider="hf-inference",
365
- api_key=api_key
366
- )
 
367
 
368
- messages = [{"role": "user", "content": prompt}]
369
- completion = client.chat.completions.create(
370
- model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
371
- messages=messages,
372
  max_tokens=2096,
373
- temperature=0.5
 
374
  )
375
- print("Primary model call successful")
376
- return completion.choices[0].message.content
377
- except Exception as e:
378
- print(f"Primary DeepSeek model failed: {str(e)}")
379
- raise e
380
-
381
- # Keep the original endpoint as a backup
382
- backup_model = HfApiModel(
383
- max_tokens=2096,
384
- temperature=0.5,
385
- model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud',
386
- custom_role_conversions=None
387
- )
388
-
389
- # Updated fallback mechanism that tries primary first, then original model, then additional fallbacks
390
- def try_model_call_with_fallbacks(prompt):
391
- """Try to use the primary model first, fall back to original model, then try other fallbacks if both fail."""
392
- # First attempt with primary DeepSeek model
393
- try:
394
- return primary_deepseek_model(prompt)
395
- except Exception as primary_error:
396
- print(f"Primary model call failed: {str(primary_error)}")
397
- print("Trying backup model...")
398
 
399
- # Second attempt with original backup model
400
- try:
401
- print("Using backup model: HfApiModel endpoint")
402
- result = backup_model(prompt)
403
- print("Backup model call successful")
404
- return result
405
- except Exception as backup_error:
406
- print(f"Backup model call failed: {str(backup_error)}")
407
- print("Trying additional fallback models...")
 
 
408
 
409
- # List of additional fallback models
410
- fallbacks = [
411
- {
412
- "provider": "sambanova",
413
- "model_name": "Qwen/Qwen2.5-Coder-32B-Instruct",
414
- "display_name": "Qwen 2.5 Coder 32B"
415
- }
416
- # DeepSeek is now the primary model, so we removed it from fallbacks
417
- ]
 
 
418
 
419
  # Get API key
420
  api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
421
  if not api_key:
422
- raise ValueError("No Hugging Face API key found in environment variables")
423
 
424
- # Try each fallback model in sequence
425
- for fallback in fallbacks:
426
- try:
427
- print(f"Trying fallback model: {fallback['display_name']}")
428
- client = InferenceClient(provider=fallback["provider"], api_key=api_key)
429
- messages = [{"role": "user", "content": prompt}]
430
- completion = client.chat.completions.create(
431
- model=fallback["model_name"],
432
- messages=messages,
433
- max_tokens=2096,
434
- temperature=0.5
435
- )
436
- print(f"Successfully used fallback model: {fallback['display_name']}")
437
- return completion.choices[0].message.content
438
- except Exception as e:
439
- print(f"Fallback model {fallback['display_name']} failed: {str(e)}")
440
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
 
442
- # If all fallbacks fail, re-raise the original error
443
- raise primary_error
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
 
445
- # Create a model wrapper that uses our fallback logic
446
- model = lambda prompt: try_model_call_with_fallbacks(prompt)
447
 
448
  # Import tool from Hub
449
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
 
350
 
351
  final_answer = FinalAnswerTool()
352
 
353
+ # Create a custom model class that implements the HfApiModel interface
354
+ class FallbackChainModel:
355
+ """
356
+ A custom model class that implements the interface expected by smolagents,
357
+ with an automatic fallback chain from primary to backup models.
358
+
359
+ Following smolagents best practices:
360
+ - Simple implementation with robust logging
361
+ - Thorough error handling with detailed messages
362
+ - Support for all parameters used by CodeAgent
363
+ """
364
+ def __init__(self):
365
+ # Initialize token tracking for compatibility with smolagents
366
+ self.last_input_token_count = 0
367
+ self.last_output_token_count = 0
368
 
369
+ # Set up the backup model as a proper HfApiModel instance
370
+ self.backup_model = HfApiModel(
 
 
371
  max_tokens=2096,
372
+ temperature=0.5,
373
+ model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud',
374
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
 
376
+ print("Initialized FallbackChainModel with DeepSeek as primary and HfApiModel as backup")
377
+
378
+ def __call__(self, prompt, stop_sequences=None, temperature=0.5, max_tokens=2096):
379
+ """
380
+ Call method that matches the HfApiModel interface required by smolagents.
381
+
382
+ Args:
383
+ prompt: The input prompt (can be string or structured format)
384
+ stop_sequences: Optional list of sequences to stop generation
385
+ temperature: Controls randomness in generation
386
+ max_tokens: Maximum tokens to generate
387
 
388
+ Returns:
389
+ Generated text from the model
390
+ """
391
+ # Track all calls and parameters for debugging
392
+ print(f"Model call with temp={temperature}, max_tokens={max_tokens}")
393
+ if stop_sequences:
394
+ print(f"Using stop sequences: {stop_sequences}")
395
+
396
+ # Try the primary model (DeepSeek)
397
+ try:
398
+ print("🚀 Attempting to use primary model: DeepSeek-R1-Distill-Qwen-32B")
399
 
400
  # Get API key
401
  api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
402
  if not api_key:
403
+ raise ValueError("No Hugging Face API key found")
404
 
405
+ # Format the prompt appropriately for DeepSeek
406
+ # For structured prompts (needed by some agents), convert to proper format
407
+ if isinstance(prompt, list) and all(isinstance(item, dict) for item in prompt):
408
+ # Handle chat format with roles
409
+ messages = prompt
410
+ print(f"Using structured chat format with {len(messages)} messages")
411
+ else:
412
+ # Simple string prompt
413
+ if isinstance(prompt, (dict, list)):
414
+ # If it's a dict or list but not in expected chat format, convert to string
415
+ import json
416
+ prompt_str = json.dumps(prompt)
417
+ print("Converting complex prompt to JSON string")
418
+ else:
419
+ prompt_str = str(prompt)
420
+
421
+ # Create a single user message
422
+ messages = [{"role": "user", "content": prompt_str}]
423
+
424
+ # Create the InferenceClient instance
425
+ client = InferenceClient(
426
+ provider="hf-inference",
427
+ api_key=api_key
428
+ )
429
+
430
+ # Call the DeepSeek model
431
+ completion = client.chat.completions.create(
432
+ model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
433
+ messages=messages,
434
+ max_tokens=max_tokens,
435
+ temperature=temperature,
436
+ stop=stop_sequences # Pass stop_sequences as stop parameter
437
+ )
438
+
439
+ # Extract the response text
440
+ response = completion.choices[0].message.content
441
 
442
+ # Track token usage (approximate for now)
443
+ prompt_text = str(prompt)
444
+ self.last_input_token_count = len(prompt_text.split()) * 1.3 # Rough approximation
445
+ self.last_output_token_count = len(response.split()) * 1.3 # Rough approximation
446
+
447
+ print("✅ Primary model call successful")
448
+ return response
449
+
450
+ except Exception as primary_error:
451
+ # Detailed error logging for the primary model
452
+ print(f"❌ Primary model error: {str(primary_error)}")
453
+
454
+ # Try the backup model (original endpoint)
455
+ try:
456
+ print("🔄 Falling back to backup model (HfApiModel endpoint)")
457
+ # Direct call to backup with all parameters
458
+ result = self.backup_model(
459
+ prompt,
460
+ stop_sequences=stop_sequences,
461
+ temperature=temperature,
462
+ max_tokens=max_tokens
463
+ )
464
+ print("✅ Backup model call successful")
465
+ return result
466
+
467
+ except Exception as backup_error:
468
+ # Log backup error and try final fallbacks
469
+ print(f"❌ Backup model error: {str(backup_error)}")
470
+ print("🔄 Trying additional fallback models...")
471
+
472
+ # Additional fallback options (last resort)
473
+ fallbacks = [
474
+ {
475
+ "provider": "sambanova",
476
+ "model_name": "Qwen/Qwen2.5-Coder-32B-Instruct",
477
+ "display_name": "Qwen 2.5 Coder 32B"
478
+ }
479
+ ]
480
+
481
+ # Get API key again to ensure it's available
482
+ api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
483
+ if not api_key:
484
+ raise ValueError("No Hugging Face API key found")
485
+
486
+ # Try each fallback in sequence
487
+ for fallback in fallbacks:
488
+ try:
489
+ print(f"🔄 Trying fallback model: {fallback['display_name']}")
490
+ client = InferenceClient(provider=fallback["provider"], api_key=api_key)
491
+
492
+ # Format prompt for this model
493
+ if isinstance(prompt, (dict, list)):
494
+ import json
495
+ prompt_str = json.dumps(prompt)
496
+ else:
497
+ prompt_str = str(prompt)
498
+
499
+ messages = [{"role": "user", "content": prompt_str}]
500
+
501
+ # Call the fallback model
502
+ completion = client.chat.completions.create(
503
+ model=fallback["model_name"],
504
+ messages=messages,
505
+ max_tokens=max_tokens,
506
+ temperature=temperature,
507
+ stop=stop_sequences
508
+ )
509
+
510
+ print(f"✅ Successfully used fallback model: {fallback['display_name']}")
511
+ return completion.choices[0].message.content
512
+
513
+ except Exception as e:
514
+ print(f"❌ Fallback model {fallback['display_name']} failed: {str(e)}")
515
+ continue
516
+
517
+ # If all fallbacks failed, raise a comprehensive error
518
+ error_msg = (
519
+ f"All models in fallback chain failed. "
520
+ f"Primary error: {str(primary_error)}. "
521
+ f"Backup error: {str(backup_error)}"
522
+ )
523
+ print(f"❌ FATAL ERROR: {error_msg}")
524
+ raise RuntimeError(error_msg)
525
 
526
+ # Create an instance of our fallback chain model
527
+ model = FallbackChainModel()
528
 
529
  # Import tool from Hub
530
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)