SolshineMisfit commited on
Commit
7246f64
·
verified ·
1 Parent(s): 87ab6aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -16
app.py CHANGED
@@ -364,43 +364,58 @@ class PerplexityR1Model:
364
  self.model_name = "perplexity-ai/r1-1776"
365
  self.provider = "fireworks-ai"
366
  self.last_input_token_count = 0
 
367
  # Get the API key
368
  self.api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
369
  if not self.api_key:
370
  raise ValueError("No Hugging Face API key found in environment variables")
371
  # Create the inference client
372
  self.client = InferenceClient(provider=self.provider, api_key=self.api_key)
373
- print(f"Initialized Perplexity R1-1776 model with 128K context window")
374
 
375
  def __call__(self, prompt):
376
  """Call the model with the prompt."""
377
- # Simple token count estimation
378
- self.last_input_token_count = len(prompt.split())
 
 
 
 
 
 
 
 
 
 
 
 
379
  print(f"Sending approximately {self.last_input_token_count} tokens to Perplexity R1-1776")
380
 
381
- # Convert string prompt to messages format
382
- messages = [{"role": "user", "content": prompt}]
383
-
384
  try:
385
- # Call the model
386
  completion = self.client.chat.completions.create(
387
  model=self.model_name,
388
  messages=messages,
389
  temperature=self.temperature,
390
  max_tokens=self.max_tokens
391
  )
392
-
393
- # Return just the content string to match HfApiModel's behavior
394
- return completion.choices[0].message.content
395
  except Exception as e:
396
  print(f"Error calling Perplexity R1-1776: {str(e)}")
397
  # For context length errors, try simple truncation
398
  if "context length" in str(e).lower() or "token limit" in str(e).lower():
399
  print("Context length error with R1-1776 - truncating prompt and retrying")
400
  # Simple truncation - take the last ~80K characters which should be under the limit
401
- truncated_prompt = prompt[-80000:] if len(prompt) > 80000 else prompt
402
- messages = [{"role": "user", "content": truncated_prompt}]
403
-
 
 
 
 
 
 
404
  try:
405
  completion = self.client.chat.completions.create(
406
  model=self.model_name,
@@ -408,12 +423,13 @@ class PerplexityR1Model:
408
  temperature=self.temperature,
409
  max_tokens=self.max_tokens
410
  )
411
- return completion.choices[0].message.content
 
 
412
  except Exception as retry_error:
413
  print(f"Error on retry: {str(retry_error)}")
414
  return f"ERROR: Model call failed even with reduced context. Please try a shorter query."
415
  else:
416
- # For non-context errors
417
  return f"ERROR: {str(e)}"
418
 
419
  # Initialize our model with Perplexity R1-1776
@@ -465,7 +481,7 @@ print(f"Available tools: final_answer, Sonar_Web_Search_Tool, {search_tool_name}
465
  # To fix the TypeError in Gradio_UI.py, you would need to modify that file
466
  # For now, we'll just use the agent directly
467
  try:
468
- GradioUI(agent).launch()
469
  except TypeError as e:
470
  if "unsupported operand type(s) for +=" in str(e):
471
  print("Error: Token counting issue in Gradio UI")
 
364
  self.model_name = "perplexity-ai/r1-1776"
365
  self.provider = "fireworks-ai"
366
  self.last_input_token_count = 0
367
+ self.last_output_token_count = 0 # Added attribute for output tokens
368
  # Get the API key
369
  self.api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
370
  if not self.api_key:
371
  raise ValueError("No Hugging Face API key found in environment variables")
372
  # Create the inference client
373
  self.client = InferenceClient(provider=self.provider, api_key=self.api_key)
374
+ print("Initialized Perplexity R1-1776 model with 128K context window")
375
 
376
  def __call__(self, prompt):
377
  """Call the model with the prompt."""
378
+ # Determine message format and count tokens
379
+ if isinstance(prompt, list):
380
+ # Combine all message contents for token counting
381
+ combined_prompt = " ".join(msg.get("content", "") for msg in prompt)
382
+ self.last_input_token_count = len(combined_prompt.split())
383
+ messages = prompt # Already in message format
384
+ elif isinstance(prompt, str):
385
+ self.last_input_token_count = len(prompt.split())
386
+ messages = [{"role": "user", "content": prompt}]
387
+ else:
388
+ prompt_str = str(prompt)
389
+ self.last_input_token_count = len(prompt_str.split())
390
+ messages = [{"role": "user", "content": prompt_str}]
391
+
392
  print(f"Sending approximately {self.last_input_token_count} tokens to Perplexity R1-1776")
393
 
 
 
 
394
  try:
 
395
  completion = self.client.chat.completions.create(
396
  model=self.model_name,
397
  messages=messages,
398
  temperature=self.temperature,
399
  max_tokens=self.max_tokens
400
  )
401
+ output = completion.choices[0].message.content
402
+ self.last_output_token_count = len(output.split())
403
+ return output
404
  except Exception as e:
405
  print(f"Error calling Perplexity R1-1776: {str(e)}")
406
  # For context length errors, try simple truncation
407
  if "context length" in str(e).lower() or "token limit" in str(e).lower():
408
  print("Context length error with R1-1776 - truncating prompt and retrying")
409
  # Simple truncation - take the last ~80K characters which should be under the limit
410
+ if isinstance(prompt, str):
411
+ truncated_prompt = prompt[-80000:] if len(prompt) > 80000 else prompt
412
+ messages = [{"role": "user", "content": truncated_prompt}]
413
+ else:
414
+ # For list input, join and truncate then rebuild messages
415
+ combined_prompt = " ".join(msg.get("content", "") for msg in prompt)
416
+ truncated_prompt = combined_prompt[-80000:] if len(combined_prompt) > 80000 else combined_prompt
417
+ messages = [{"role": "user", "content": truncated_prompt}]
418
+
419
  try:
420
  completion = self.client.chat.completions.create(
421
  model=self.model_name,
 
423
  temperature=self.temperature,
424
  max_tokens=self.max_tokens
425
  )
426
+ output = completion.choices[0].message.content
427
+ self.last_output_token_count = len(output.split())
428
+ return output
429
  except Exception as retry_error:
430
  print(f"Error on retry: {str(retry_error)}")
431
  return f"ERROR: Model call failed even with reduced context. Please try a shorter query."
432
  else:
 
433
  return f"ERROR: {str(e)}"
434
 
435
  # Initialize our model with Perplexity R1-1776
 
481
  # To fix the TypeError in Gradio_UI.py, you would need to modify that file
482
  # For now, we'll just use the agent directly
483
  try:
484
+ GradioUI(agent).launch(share=True)
485
  except TypeError as e:
486
  if "unsupported operand type(s) for +=" in str(e):
487
  print("Error: Token counting issue in Gradio UI")