Agent-Example

Runtime error

App Files Files Community

SolshineMisfit commited on Mar 7, 2025

Commit

7246f64

verified ·

1 Parent(s): 87ab6aa

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -16

app.py CHANGED Viewed

@@ -364,43 +364,58 @@ class PerplexityR1Model:
         self.model_name = "perplexity-ai/r1-1776"
         self.provider = "fireworks-ai"
         self.last_input_token_count = 0
         # Get the API key
         self.api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
         if not self.api_key:
             raise ValueError("No Hugging Face API key found in environment variables")
         # Create the inference client
         self.client = InferenceClient(provider=self.provider, api_key=self.api_key)
-        print(f"Initialized Perplexity R1-1776 model with 128K context window")
     def __call__(self, prompt):
         """Call the model with the prompt."""
-        # Simple token count estimation
-        self.last_input_token_count = len(prompt.split())
         print(f"Sending approximately {self.last_input_token_count} tokens to Perplexity R1-1776")
-        # Convert string prompt to messages format
-        messages = [{"role": "user", "content": prompt}]
         try:
-            # Call the model
             completion = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=messages,
                 temperature=self.temperature,
                 max_tokens=self.max_tokens
             )
-            # Return just the content string to match HfApiModel's behavior
-            return completion.choices[0].message.content
         except Exception as e:
             print(f"Error calling Perplexity R1-1776: {str(e)}")
             # For context length errors, try simple truncation
             if "context length" in str(e).lower() or "token limit" in str(e).lower():
                 print("Context length error with R1-1776 - truncating prompt and retrying")
                 # Simple truncation - take the last ~80K characters which should be under the limit
-                truncated_prompt = prompt[-80000:] if len(prompt) > 80000 else prompt
-                messages = [{"role": "user", "content": truncated_prompt}]
                 try:
                     completion = self.client.chat.completions.create(
                         model=self.model_name,
@@ -408,12 +423,13 @@ class PerplexityR1Model:
                         temperature=self.temperature,
                         max_tokens=self.max_tokens
                     )
-                    return completion.choices[0].message.content
                 except Exception as retry_error:
                     print(f"Error on retry: {str(retry_error)}")
                     return f"ERROR: Model call failed even with reduced context. Please try a shorter query."
             else:
-                # For non-context errors
                 return f"ERROR: {str(e)}"
 # Initialize our model with Perplexity R1-1776
@@ -465,7 +481,7 @@ print(f"Available tools: final_answer, Sonar_Web_Search_Tool, {search_tool_name}
 # To fix the TypeError in Gradio_UI.py, you would need to modify that file
 # For now, we'll just use the agent directly
 try:
-    GradioUI(agent).launch()
 except TypeError as e:
     if "unsupported operand type(s) for +=" in str(e):
         print("Error: Token counting issue in Gradio UI")

         self.model_name = "perplexity-ai/r1-1776"
         self.provider = "fireworks-ai"
         self.last_input_token_count = 0
+        self.last_output_token_count = 0  # Added attribute for output tokens
         # Get the API key
         self.api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
         if not self.api_key:
             raise ValueError("No Hugging Face API key found in environment variables")
         # Create the inference client
         self.client = InferenceClient(provider=self.provider, api_key=self.api_key)
+        print("Initialized Perplexity R1-1776 model with 128K context window")
     def __call__(self, prompt):
         """Call the model with the prompt."""
+        # Determine message format and count tokens
+        if isinstance(prompt, list):
+            # Combine all message contents for token counting
+            combined_prompt = " ".join(msg.get("content", "") for msg in prompt)
+            self.last_input_token_count = len(combined_prompt.split())
+            messages = prompt  # Already in message format
+        elif isinstance(prompt, str):
+            self.last_input_token_count = len(prompt.split())
+            messages = [{"role": "user", "content": prompt}]
+        else:
+            prompt_str = str(prompt)
+            self.last_input_token_count = len(prompt_str.split())
+            messages = [{"role": "user", "content": prompt_str}]
         print(f"Sending approximately {self.last_input_token_count} tokens to Perplexity R1-1776")
         try:
             completion = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=messages,
                 temperature=self.temperature,
                 max_tokens=self.max_tokens
             )
+            output = completion.choices[0].message.content
+            self.last_output_token_count = len(output.split())
+            return output
         except Exception as e:
             print(f"Error calling Perplexity R1-1776: {str(e)}")
             # For context length errors, try simple truncation
             if "context length" in str(e).lower() or "token limit" in str(e).lower():
                 print("Context length error with R1-1776 - truncating prompt and retrying")
                 # Simple truncation - take the last ~80K characters which should be under the limit
+                if isinstance(prompt, str):
+                    truncated_prompt = prompt[-80000:] if len(prompt) > 80000 else prompt
+                    messages = [{"role": "user", "content": truncated_prompt}]
+                else:
+                    # For list input, join and truncate then rebuild messages
+                    combined_prompt = " ".join(msg.get("content", "") for msg in prompt)
+                    truncated_prompt = combined_prompt[-80000:] if len(combined_prompt) > 80000 else combined_prompt
+                    messages = [{"role": "user", "content": truncated_prompt}]
                 try:
                     completion = self.client.chat.completions.create(
                         model=self.model_name,
                         temperature=self.temperature,
                         max_tokens=self.max_tokens
                     )
+                    output = completion.choices[0].message.content
+                    self.last_output_token_count = len(output.split())
+                    return output
                 except Exception as retry_error:
                     print(f"Error on retry: {str(retry_error)}")
                     return f"ERROR: Model call failed even with reduced context. Please try a shorter query."
             else:
                 return f"ERROR: {str(e)}"
 # Initialize our model with Perplexity R1-1776
 # To fix the TypeError in Gradio_UI.py, you would need to modify that file
 # For now, we'll just use the agent directly
 try:
+    GradioUI(agent).launch(share=True)
 except TypeError as e:
     if "unsupported operand type(s) for +=" in str(e):
         print("Error: Token counting issue in Gradio UI")