SolshineMisfit commited on
Commit
08a6143
·
verified ·
1 Parent(s): d7409ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -91
app.py CHANGED
@@ -350,122 +350,100 @@ def get_current_time_in_timezone(timezone: str) -> str:
350
 
351
  final_answer = FinalAnswerTool()
352
 
353
- # Create Perplexity R1 model implementation directly without referencing an undefined variable
354
-
355
- # Import necessary modules (already imported above)
356
- # from huggingface_hub import InferenceClient
 
 
 
357
 
358
- # Create a new model implementation that uses the larger context window model through InferenceClient
359
- class PerplexityR1Model:
360
- def __init__(self, temperature=0.5, max_tokens=1500):
361
- """Initialize Perplexity R1-1776 model with 128K context window."""
362
- self.temperature = temperature
363
- self.max_tokens = max_tokens
364
- self.model_name = "perplexity-ai/r1-1776"
365
- self.provider = "fireworks-ai"
366
- self.last_input_token_count = 0
367
- self.last_output_token_count = 0 # Added attribute for output tokens
368
- # Get the API key
369
- self.api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
370
- if not self.api_key:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  raise ValueError("No Hugging Face API key found in environment variables")
372
- # Create the inference client
373
- self.client = InferenceClient(provider=self.provider, api_key=self.api_key)
374
- print("Initialized Perplexity R1-1776 model with 128K context window")
375
-
376
- def __call__(self, prompt):
377
- """Call the model with the prompt."""
378
- # Determine message format and count tokens
379
- if isinstance(prompt, list):
380
- # Convert each message's content to a string to avoid nested lists
381
- combined_prompt = " ".join(str(msg.get("content", "")) for msg in prompt)
382
- self.last_input_token_count = len(combined_prompt.split())
383
- messages = prompt # Already in message format
384
- elif isinstance(prompt, str):
385
- self.last_input_token_count = len(prompt.split())
386
- messages = [{"role": "user", "content": prompt}]
387
- else:
388
- prompt_str = str(prompt)
389
- self.last_input_token_count = len(prompt_str.split())
390
- messages = [{"role": "user", "content": prompt_str}]
391
-
392
- print(f"Sending approximately {self.last_input_token_count} tokens to Perplexity R1-1776")
393
 
394
- try:
395
- completion = self.client.chat.completions.create(
396
- model=self.model_name,
397
- messages=messages,
398
- temperature=self.temperature,
399
- max_tokens=self.max_tokens
400
- )
401
- output = completion.choices[0].message.content
402
- self.last_output_token_count = len(output.split())
403
- return output
404
- except Exception as e:
405
- print(f"Error calling Perplexity R1-1776: {str(e)}")
406
- # For context length errors, try simple truncation
407
- if "context length" in str(e).lower() or "token limit" in str(e).lower():
408
- print("Context length error with R1-1776 - truncating prompt and retrying")
409
- if isinstance(prompt, str):
410
- truncated_prompt = prompt[-80000:] if len(prompt) > 80000 else prompt
411
- messages = [{"role": "user", "content": truncated_prompt}]
412
- else:
413
- combined_prompt = " ".join(str(msg.get("content", "")) for msg in prompt)
414
- truncated_prompt = combined_prompt[-80000:] if len(combined_prompt) > 80000 else combined_prompt
415
- messages = [{"role": "user", "content": truncated_prompt}]
416
-
417
- try:
418
- completion = self.client.chat.completions.create(
419
- model=self.model_name,
420
- messages=messages,
421
- temperature=self.temperature,
422
- max_tokens=self.max_tokens
423
- )
424
- output = completion.choices[0].message.content
425
- self.last_output_token_count = len(output.split())
426
- return output
427
- except Exception as retry_error:
428
- print(f"Error on retry: {str(retry_error)}")
429
- return f"ERROR: Model call failed even with reduced context. Please try a shorter query."
430
- else:
431
- return f"ERROR: {str(e)}"
432
 
433
- # Initialize our model with Perplexity R1-1776
434
- model = PerplexityR1Model(temperature=0.5, max_tokens=1500)
 
435
 
436
- # Import tool from Hub - do this before using the tool in the agent
437
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
438
 
439
- # Load prompt templates before using them in the agent
440
  with open("prompts.yaml", 'r') as stream:
441
  prompt_templates = yaml.safe_load(stream)
442
-
443
- # Initialize the agent with all required components already defined
444
  agent = CodeAgent(
445
  model=model,
446
  tools=[
447
  final_answer,
448
  Sonar_Web_Search_Tool,
449
- primary_search_tool,
450
  get_current_time_in_timezone,
451
  image_generation_tool,
452
  Dataset_Creator_Tool,
453
  Check_Dataset_Validity,
454
- visit_webpage_tool,
455
  ],
456
- max_steps=12,
457
  verbosity_level=1,
458
  grammar=None,
459
- planning_interval=2,
460
  name="Research Assistant",
461
- description="""An AI assistant that can search the web, create datasets, and answer questions.
462
- Using Perplexity R1-1776 model with 128K token context window.""",
 
 
 
463
  prompt_templates=prompt_templates
464
  )
465
 
466
- # Add informative message about the model
467
- print("Using Perplexity R1-1776 model with 128K token context window")
468
-
469
  # Add informative message about which search tool is being used
470
  print(f"Agent initialized with {search_tool_name} as primary search tool")
471
  print(f"Available tools: final_answer, Sonar_Web_Search_Tool, {search_tool_name}, get_current_time_in_timezone, image_generation_tool, Dataset_Creator_Tool, Check_Dataset_Validity, visit_webpage_tool")
 
350
 
351
  final_answer = FinalAnswerTool()
352
 
353
+ # Remove the huggingface_api_key parameter - it's not supported
354
+ model = HfApiModel(
355
+ max_tokens=2096,
356
+ temperature=0.5,
357
+ model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud', # Using the backup endpoint
358
+ custom_role_conversions=None
359
+ )
360
 
361
+ # Add fallback logic that only activates if the primary model fails
362
+ def try_model_call_with_fallbacks(prompt):
363
+ """Try to use the primary model first, fall back to alternatives if it fails."""
364
+ # First attempt with primary model
365
+ try:
366
+ return model(prompt)
367
+ except Exception as primary_error:
368
+ print(f"Primary model call failed: {str(primary_error)}")
369
+ print("Trying fallback models...")
370
+
371
+ # List of fallback models
372
+ fallbacks = [
373
+ {
374
+ "provider": "sambanova",
375
+ "model_name": "Qwen/Qwen2.5-Coder-32B-Instruct",
376
+ "display_name": "Qwen 2.5 Coder 32B"
377
+ },
378
+ {
379
+ "provider": "hf-inference",
380
+ "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
381
+ "display_name": "DeepSeek R1 Distill Qwen 32B"
382
+ }
383
+ ]
384
+
385
+ # Get API key
386
+ api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
387
+ if not api_key:
388
  raise ValueError("No Hugging Face API key found in environment variables")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
+ # Try each fallback model in sequence
391
+ for fallback in fallbacks:
392
+ try:
393
+ print(f"Trying fallback model: {fallback['display_name']}")
394
+ client = InferenceClient(provider=fallback["provider"], api_key=api_key)
395
+ messages = [{"role": "user", "content": prompt}]
396
+ completion = client.chat.completions.create(
397
+ model=fallback["model_name"],
398
+ messages=messages,
399
+ max_tokens=2096,
400
+ temperature=0.5
401
+ )
402
+ print(f"Successfully used fallback model: {fallback['display_name']}")
403
+ return completion.choices[0].message.content
404
+ except Exception as e:
405
+ print(f"Fallback model {fallback['display_name']} failed: {str(e)}")
406
+ continue
407
+
408
+ # If all fallbacks fail, re-raise the original error
409
+ raise primary_error
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
 
411
+ # Monkey patch the model's __call__ method to use our fallback logic
412
+ original_call = model.__call__
413
+ model.__call__ = try_model_call_with_fallbacks
414
 
415
+ # Import tool from Hub
416
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
417
 
 
418
  with open("prompts.yaml", 'r') as stream:
419
  prompt_templates = yaml.safe_load(stream)
420
+
421
+ # Initialize the agent using standard smolagents patterns
422
  agent = CodeAgent(
423
  model=model,
424
  tools=[
425
  final_answer,
426
  Sonar_Web_Search_Tool,
427
+ primary_search_tool, # This is already set to either DuckDuckGo, Google, or fallback
428
  get_current_time_in_timezone,
429
  image_generation_tool,
430
  Dataset_Creator_Tool,
431
  Check_Dataset_Validity,
432
+ visit_webpage_tool, # This is correctly initialized as VisitWebpageTool()
433
  ],
434
+ max_steps=6,
435
  verbosity_level=1,
436
  grammar=None,
437
+ planning_interval=3,
438
  name="Research Assistant",
439
+ description="""An AI assistant that can search the web, create datasets, and answer questions # Note about working within token limits
440
+ # When using with queries that might exceed token limits, consider:
441
+ # 1. Breaking tasks into smaller sub-tasks
442
+ # 2. Limiting the amount of data returned by search tools
443
+ # 3. Using the planning_interval to enable more effective reasoning""",
444
  prompt_templates=prompt_templates
445
  )
446
 
 
 
 
447
  # Add informative message about which search tool is being used
448
  print(f"Agent initialized with {search_tool_name} as primary search tool")
449
  print(f"Available tools: final_answer, Sonar_Web_Search_Tool, {search_tool_name}, get_current_time_in_timezone, image_generation_tool, Dataset_Creator_Tool, Check_Dataset_Validity, visit_webpage_tool")