Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -364,43 +364,58 @@ class PerplexityR1Model:
|
|
| 364 |
self.model_name = "perplexity-ai/r1-1776"
|
| 365 |
self.provider = "fireworks-ai"
|
| 366 |
self.last_input_token_count = 0
|
|
|
|
| 367 |
# Get the API key
|
| 368 |
self.api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
|
| 369 |
if not self.api_key:
|
| 370 |
raise ValueError("No Hugging Face API key found in environment variables")
|
| 371 |
# Create the inference client
|
| 372 |
self.client = InferenceClient(provider=self.provider, api_key=self.api_key)
|
| 373 |
-
print(
|
| 374 |
|
| 375 |
def __call__(self, prompt):
|
| 376 |
"""Call the model with the prompt."""
|
| 377 |
-
#
|
| 378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
print(f"Sending approximately {self.last_input_token_count} tokens to Perplexity R1-1776")
|
| 380 |
|
| 381 |
-
# Convert string prompt to messages format
|
| 382 |
-
messages = [{"role": "user", "content": prompt}]
|
| 383 |
-
|
| 384 |
try:
|
| 385 |
-
# Call the model
|
| 386 |
completion = self.client.chat.completions.create(
|
| 387 |
model=self.model_name,
|
| 388 |
messages=messages,
|
| 389 |
temperature=self.temperature,
|
| 390 |
max_tokens=self.max_tokens
|
| 391 |
)
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
return
|
| 395 |
except Exception as e:
|
| 396 |
print(f"Error calling Perplexity R1-1776: {str(e)}")
|
| 397 |
# For context length errors, try simple truncation
|
| 398 |
if "context length" in str(e).lower() or "token limit" in str(e).lower():
|
| 399 |
print("Context length error with R1-1776 - truncating prompt and retrying")
|
| 400 |
# Simple truncation - take the last ~80K characters which should be under the limit
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
try:
|
| 405 |
completion = self.client.chat.completions.create(
|
| 406 |
model=self.model_name,
|
|
@@ -408,12 +423,13 @@ class PerplexityR1Model:
|
|
| 408 |
temperature=self.temperature,
|
| 409 |
max_tokens=self.max_tokens
|
| 410 |
)
|
| 411 |
-
|
|
|
|
|
|
|
| 412 |
except Exception as retry_error:
|
| 413 |
print(f"Error on retry: {str(retry_error)}")
|
| 414 |
return f"ERROR: Model call failed even with reduced context. Please try a shorter query."
|
| 415 |
else:
|
| 416 |
-
# For non-context errors
|
| 417 |
return f"ERROR: {str(e)}"
|
| 418 |
|
| 419 |
# Initialize our model with Perplexity R1-1776
|
|
@@ -465,7 +481,7 @@ print(f"Available tools: final_answer, Sonar_Web_Search_Tool, {search_tool_name}
|
|
| 465 |
# To fix the TypeError in Gradio_UI.py, you would need to modify that file
|
| 466 |
# For now, we'll just use the agent directly
|
| 467 |
try:
|
| 468 |
-
GradioUI(agent).launch()
|
| 469 |
except TypeError as e:
|
| 470 |
if "unsupported operand type(s) for +=" in str(e):
|
| 471 |
print("Error: Token counting issue in Gradio UI")
|
|
|
|
| 364 |
self.model_name = "perplexity-ai/r1-1776"
|
| 365 |
self.provider = "fireworks-ai"
|
| 366 |
self.last_input_token_count = 0
|
| 367 |
+
self.last_output_token_count = 0 # Added attribute for output tokens
|
| 368 |
# Get the API key
|
| 369 |
self.api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
|
| 370 |
if not self.api_key:
|
| 371 |
raise ValueError("No Hugging Face API key found in environment variables")
|
| 372 |
# Create the inference client
|
| 373 |
self.client = InferenceClient(provider=self.provider, api_key=self.api_key)
|
| 374 |
+
print("Initialized Perplexity R1-1776 model with 128K context window")
|
| 375 |
|
| 376 |
def __call__(self, prompt):
|
| 377 |
"""Call the model with the prompt."""
|
| 378 |
+
# Determine message format and count tokens
|
| 379 |
+
if isinstance(prompt, list):
|
| 380 |
+
# Combine all message contents for token counting
|
| 381 |
+
combined_prompt = " ".join(msg.get("content", "") for msg in prompt)
|
| 382 |
+
self.last_input_token_count = len(combined_prompt.split())
|
| 383 |
+
messages = prompt # Already in message format
|
| 384 |
+
elif isinstance(prompt, str):
|
| 385 |
+
self.last_input_token_count = len(prompt.split())
|
| 386 |
+
messages = [{"role": "user", "content": prompt}]
|
| 387 |
+
else:
|
| 388 |
+
prompt_str = str(prompt)
|
| 389 |
+
self.last_input_token_count = len(prompt_str.split())
|
| 390 |
+
messages = [{"role": "user", "content": prompt_str}]
|
| 391 |
+
|
| 392 |
print(f"Sending approximately {self.last_input_token_count} tokens to Perplexity R1-1776")
|
| 393 |
|
|
|
|
|
|
|
|
|
|
| 394 |
try:
|
|
|
|
| 395 |
completion = self.client.chat.completions.create(
|
| 396 |
model=self.model_name,
|
| 397 |
messages=messages,
|
| 398 |
temperature=self.temperature,
|
| 399 |
max_tokens=self.max_tokens
|
| 400 |
)
|
| 401 |
+
output = completion.choices[0].message.content
|
| 402 |
+
self.last_output_token_count = len(output.split())
|
| 403 |
+
return output
|
| 404 |
except Exception as e:
|
| 405 |
print(f"Error calling Perplexity R1-1776: {str(e)}")
|
| 406 |
# For context length errors, try simple truncation
|
| 407 |
if "context length" in str(e).lower() or "token limit" in str(e).lower():
|
| 408 |
print("Context length error with R1-1776 - truncating prompt and retrying")
|
| 409 |
# Simple truncation - take the last ~80K characters which should be under the limit
|
| 410 |
+
if isinstance(prompt, str):
|
| 411 |
+
truncated_prompt = prompt[-80000:] if len(prompt) > 80000 else prompt
|
| 412 |
+
messages = [{"role": "user", "content": truncated_prompt}]
|
| 413 |
+
else:
|
| 414 |
+
# For list input, join and truncate then rebuild messages
|
| 415 |
+
combined_prompt = " ".join(msg.get("content", "") for msg in prompt)
|
| 416 |
+
truncated_prompt = combined_prompt[-80000:] if len(combined_prompt) > 80000 else combined_prompt
|
| 417 |
+
messages = [{"role": "user", "content": truncated_prompt}]
|
| 418 |
+
|
| 419 |
try:
|
| 420 |
completion = self.client.chat.completions.create(
|
| 421 |
model=self.model_name,
|
|
|
|
| 423 |
temperature=self.temperature,
|
| 424 |
max_tokens=self.max_tokens
|
| 425 |
)
|
| 426 |
+
output = completion.choices[0].message.content
|
| 427 |
+
self.last_output_token_count = len(output.split())
|
| 428 |
+
return output
|
| 429 |
except Exception as retry_error:
|
| 430 |
print(f"Error on retry: {str(retry_error)}")
|
| 431 |
return f"ERROR: Model call failed even with reduced context. Please try a shorter query."
|
| 432 |
else:
|
|
|
|
| 433 |
return f"ERROR: {str(e)}"
|
| 434 |
|
| 435 |
# Initialize our model with Perplexity R1-1776
|
|
|
|
| 481 |
# To fix the TypeError in Gradio_UI.py, you would need to modify that file
|
| 482 |
# For now, we'll just use the agent directly
|
| 483 |
try:
|
| 484 |
+
GradioUI(agent).launch(share=True)
|
| 485 |
except TypeError as e:
|
| 486 |
if "unsupported operand type(s) for +=" in str(e):
|
| 487 |
print("Error: Token counting issue in Gradio UI")
|