Spaces:
Runtime error
Runtime error
| from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool | |
| import datetime | |
| import requests | |
| import pytz | |
| import yaml | |
| import os | |
| import json | |
| import uuid | |
| from datasets import Dataset | |
| from huggingface_hub import HfApi | |
| from openai import OpenAI | |
| from tools.final_answer import FinalAnswerTool | |
| from Gradio_UI import GradioUI | |
| # Define the Perplexity system prompt | |
| Perplex_Assistant_Prompt = """You are a helpful AI assistant that searches the web for accurate information.""" | |
| # Set up API key in environment variable as expected by HfApiModel | |
| os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_KEY", "") | |
| #@weave.op() | |
| def tracked_perplexity_call(prompt: str, system_messages: str, model_name: str = "sonar-pro", assistant_meta: bool = False): | |
| """Enhanced Perplexity API call with explicit model tracking.""" | |
| client = OpenAI(api_key=os.getenv("PERPLEXITY_API_KEY"), base_url="https://api.perplexity.ai") | |
| system_message = Perplex_Assistant_Prompt | |
| if assistant_meta: | |
| system_message += f"\n\n{system_messages}" | |
| # Minimal parameters for Perplexity | |
| return client.chat.completions.create( | |
| model=model_name, | |
| messages=[ | |
| {"role": "system", "content": system_message}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| stream=False, | |
| ).choices[0].message.content | |
| def Sonar_Web_Search_Tool(arg1: str, arg2: str) -> str: | |
| """A tool that accesses Perplexity Sonar to search the web when the answer requires or would benefit from a real world web reference. | |
| Args: | |
| arg1: User Prompt | |
| arg2: Details on the desired web search results as system message for sonar web search | |
| """ | |
| try: | |
| sonar_response = tracked_perplexity_call(arg1, arg2) | |
| return sonar_response | |
| except Exception as e: | |
| return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}" | |
| def Dataset_Creator_Function(dataset_name: str, username: str, conversation_data: str) -> str: | |
| """Creates and pushes a dataset to Hugging Face with the conversation history. | |
| Args: | |
| dataset_name: Name for the dataset | |
| username: Default is "Misfits-and-Machines" | |
| conversation_data: String representing the conversation data | |
| Returns: | |
| URL of the created dataset or error message | |
| """ | |
| try: | |
| import tempfile | |
| import pathlib | |
| from datasets import Dataset, DatasetDict | |
| import pandas as pd | |
| # Get API key from environment variables | |
| api_key = os.getenv("HF_API_KEY") | |
| if not api_key: | |
| return "Error: No Hugging Face API key found in environment variables. Please set HF_API_KEY or HUGGINGFACE_API_KEY." | |
| # Initialize Hugging Face API | |
| hf_api = HfApi(token=api_key) | |
| # Sanitize dataset name | |
| safe_dataset_name = dataset_name.replace(" ", "_").lower() | |
| repo_id = f"{username}/{safe_dataset_name}" | |
| print(f"Creating dataset repository: {repo_id}") | |
| # Create a temporary directory to store the dataset files | |
| with tempfile.TemporaryDirectory() as tmp_dir: | |
| # Convert data to DataFrame and save as CSV | |
| df = pd.DataFrame({ | |
| "text": [conversation_data], | |
| "timestamp": [datetime.datetime.now().isoformat()], | |
| "dataset_id": [str(uuid.uuid4())] | |
| }) | |
| # Save CSV in the temp directory | |
| csv_path = pathlib.Path(tmp_dir) / "train.csv" | |
| df.to_csv(csv_path, index=False) | |
| print(f"Data saved to temporary CSV file: {csv_path}") | |
| # Load from CSV to ensure proper dataset structure | |
| train_dataset = Dataset.from_pandas(df) | |
| # Create a DatasetDict with a train split | |
| dataset_dict = DatasetDict({"train": train_dataset}) | |
| print(f"Created dataset with {len(train_dataset)} rows") | |
| # Create the repository explicitly if it doesn't exist | |
| try: | |
| if not hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"): | |
| hf_api.create_repo(repo_id=repo_id, repo_type="dataset") | |
| print(f"Repository {repo_id} created") | |
| else: | |
| print(f"Repository {repo_id} already exists") | |
| except Exception as repo_error: | |
| print(f"Repository creation error: {str(repo_error)}") | |
| # Push to Hugging Face Hub | |
| print(f"Pushing dataset to {repo_id}") | |
| # Use the DatasetDict push_to_hub method | |
| dataset_dict.push_to_hub( | |
| repo_id=repo_id, | |
| token=api_key, | |
| private=False | |
| ) | |
| dataset_url = f"https://huggingface.co/datasets/{repo_id}" | |
| print(f"Dataset successfully pushed to: {dataset_url}") | |
| # Double-check that the repo exists | |
| if hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"): | |
| print(f"Verified: Repository {repo_id} exists") | |
| else: | |
| print(f"Warning: Repository {repo_id} not found after push") | |
| return f"Successfully created dataset at {dataset_url}" | |
| except Exception as e: | |
| import traceback | |
| error_trace = traceback.format_exc() | |
| print(f"Dataset creation error: {str(e)}\n{error_trace}") | |
| return f"Error creating dataset: {str(e)}\n\nTo troubleshoot:\n1. Verify API key is valid\n2. Try with a different dataset name" | |
| def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str: | |
| """A tool that posts a new dataset of the current conversation to Hugging Face. | |
| Args: | |
| dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/') | |
| conversation_data: String content to save to the dataset (no JSON conversion needed) | |
| Returns: | |
| Link to the created dataset or error message with troubleshooting steps | |
| """ | |
| try: | |
| print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data") | |
| print(f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}") | |
| result = Dataset_Creator_Function(dataset_name, conversation_data) | |
| print(f"Dataset creation result: {result}") | |
| return result | |
| except Exception as e: | |
| import traceback | |
| error_trace = traceback.format_exc() | |
| return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}" | |
| def get_current_time_in_timezone(timezone: str) -> str: | |
| """A tool that fetches the current local time in a specified timezone. | |
| Args: | |
| timezone: A string representing a valid timezone (e.g., 'America/New_York'). | |
| """ | |
| try: | |
| # Create timezone object | |
| tz = pytz.timezone(timezone) | |
| # Get current time in that timezone | |
| local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S") | |
| return f"The current local time in {timezone} is: {local_time}" | |
| except Exception as e: | |
| return f"Error fetching time for timezone '{timezone}': {str(e)}" | |
| final_answer = FinalAnswerTool() | |
| # Remove the huggingface_api_key parameter - it's not supported | |
| model = HfApiModel( | |
| max_tokens=2096, | |
| temperature=0.5, | |
| model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud', # Using the backup endpoint | |
| custom_role_conversions=None | |
| ) | |
| # Import tool from Hub | |
| image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True) | |
| with open("prompts.yaml", 'r') as stream: | |
| prompt_templates = yaml.safe_load(stream) | |
| agent = CodeAgent( | |
| model=model, | |
| tools=[ | |
| final_answer, | |
| Sonar_Web_Search_Tool, | |
| get_current_time_in_timezone, | |
| image_generation_tool, | |
| Dataset_Creator_Tool | |
| ], | |
| max_steps=6, | |
| verbosity_level=1, | |
| grammar=None, | |
| planning_interval=None, | |
| name=None, | |
| description=None, | |
| prompt_templates=prompt_templates | |
| ) | |
| # To fix the TypeError in Gradio_UI.py, you would need to modify that file | |
| # For now, we'll just use the agent directly | |
| try: | |
| GradioUI(agent).launch() | |
| except TypeError as e: | |
| if "unsupported operand type(s) for +=" in str(e): | |
| print("Error: Token counting issue in Gradio UI") | |
| print("To fix, edit Gradio_UI.py and change:") | |
| print("total_input_tokens += agent.model.last_input_token_count") | |
| print("To:") | |
| print("total_input_tokens += (agent.model.last_input_token_count or 0)") | |
| else: | |
| raise e |