Spaces:
Runtime error
Runtime error
agent can now use this tool with clearer feedback on what's happening, and users will be directed to the correct URL where they can verify if the dataset appeared on Hugging Face.
0be1b82 verified | from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool | |
| import datetime | |
| import requests | |
| import pytz | |
| import yaml | |
| import os | |
| import json | |
| import uuid | |
| from datasets import Dataset | |
| from huggingface_hub import HfApi | |
| from openai import OpenAI | |
| from tools.final_answer import FinalAnswerTool | |
| from Gradio_UI import GradioUI | |
| # Define the Perplexity system prompt | |
| Perplex_Assistant_Prompt = """You are a helpful AI assistant that searches the web for accurate information.""" | |
| # Set up API key in environment variable as expected by HfApiModel | |
| os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_KEY", "") | |
| #@weave.op() | |
| def tracked_perplexity_call(prompt: str, system_messages: str, model_name: str = "sonar-pro", assistant_meta: bool = False): | |
| """Enhanced Perplexity API call with explicit model tracking.""" | |
| client = OpenAI(api_key=os.getenv("PERPLEXITY_API_KEY"), base_url="https://api.perplexity.ai") | |
| system_message = Perplex_Assistant_Prompt | |
| if assistant_meta: | |
| system_message += f"\n\n{system_messages}" | |
| # Minimal parameters for Perplexity | |
| return client.chat.completions.create( | |
| model=model_name, | |
| messages=[ | |
| {"role": "system", "content": system_message}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| stream=False, | |
| ).choices[0].message.content | |
| def Sonar_Web_Search_Tool(arg1: str, arg2: str) -> str: | |
| """A tool that accesses Perplexity Sonar to search the web when the answer requires or would benefit from a real world web reference. | |
| Args: | |
| arg1: User Prompt | |
| arg2: Details on the desired web search results as system message for sonar web search | |
| """ | |
| try: | |
| sonar_response = tracked_perplexity_call(arg1, arg2) | |
| return sonar_response | |
| except Exception as e: | |
| return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}" | |
| def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str: | |
| """Creates and pushes a dataset to Hugging Face with the conversation history. | |
| Args: | |
| dataset_name: Name for the dataset (will be prefixed with username) | |
| conversation_data: String representing the conversation data | |
| Returns: | |
| URL of the created dataset or error message | |
| """ | |
| try: | |
| # Get API key from environment variables | |
| api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY", "") | |
| if not api_key: | |
| return "Error: No Hugging Face API key found in environment variables. Please set HF_API_KEY or HUGGINGFACE_API_KEY." | |
| # Force the username to be the known value | |
| username = "Misfits-and-Machines" | |
| # Initialize Hugging Face API | |
| hf_api = HfApi(token=api_key) | |
| # Sanitize dataset name - use underscores instead of dashes for better compatibility | |
| safe_dataset_name = dataset_name.replace(" ", "_").lower() | |
| repo_id = f"{username}/{safe_dataset_name}" | |
| print(f"Creating dataset repository: {repo_id}") | |
| # Prepare dataset with appropriate structure | |
| # First, ensure we have a proper train split with necessary fields | |
| dataset_dict = { | |
| "text": [conversation_data], | |
| "timestamp": [datetime.datetime.now().isoformat()], | |
| "dataset_id": [str(uuid.uuid4())] | |
| } | |
| # Create a Hugging Face dataset | |
| dataset = Dataset.from_dict(dataset_dict) | |
| # Standard practice is to have a train split for datasets | |
| dataset_dict = {"train": dataset} | |
| # Check if the repository already exists | |
| try: | |
| repo_exists = hf_api.repo_exists(repo_id=repo_id, repo_type="dataset") | |
| if repo_exists: | |
| print(f"Repository {repo_id} already exists") | |
| else: | |
| # Create repo if it doesn't exist | |
| hf_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True) | |
| print(f"Repository {repo_id} created successfully") | |
| except Exception as repo_error: | |
| print(f"Repository check/creation error: {str(repo_error)}") | |
| # Continue anyway as push_to_hub might create the repo | |
| # Push dataset to the Hub with appropriate parameters | |
| print(f"Pushing dataset to {repo_id}") | |
| # Create URL for monitoring - we'll show this to the user so they can check progress | |
| dataset_url = f"https://huggingface.co/datasets/{repo_id}" | |
| print(f"Dataset URL will be: {dataset_url}") | |
| # Push with careful parameter selection | |
| dataset.push_to_hub( | |
| repo_id=repo_id, | |
| token=api_key, | |
| split="train", # Use a proper split name | |
| commit_message=f"Upload dataset: {dataset_name}" | |
| ) | |
| print(f"Dataset successfully pushed to: {dataset_url}") | |
| return f"Successfully created dataset at {dataset_url} - please check this URL to verify your dataset is visible" | |
| except Exception as e: | |
| import traceback | |
| error_trace = traceback.format_exc() | |
| print(f"Dataset creation error: {str(e)}\n{error_trace}") | |
| return f"Error creating dataset: {str(e)}\n\nTo troubleshoot:\n1. Verify API key is valid\n2. Try with a different dataset name\n3. Check if you have write permissions for the Misfits-and-Machines organization" | |
| def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str: | |
| """A tool that posts a new dataset of the current conversation to Hugging Face. | |
| Args: | |
| dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/') | |
| conversation_data: String content to save to the dataset (no JSON conversion needed) | |
| Returns: | |
| Link to the created dataset or error message with troubleshooting steps | |
| """ | |
| try: | |
| print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data") | |
| print(f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}") | |
| result = Dataset_Creator_Function(dataset_name, conversation_data) | |
| print(f"Dataset creation result: {result}") | |
| return result | |
| except Exception as e: | |
| import traceback | |
| error_trace = traceback.format_exc() | |
| return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}\n\nPlease try with a simpler dataset name using only letters, numbers and underscores." | |
| def get_current_time_in_timezone(timezone: str) -> str: | |
| """A tool that fetches the current local time in a specified timezone. | |
| Args: | |
| timezone: A string representing a valid timezone (e.g., 'America/New_York'). | |
| """ | |
| try: | |
| # Create timezone object | |
| tz = pytz.timezone(timezone) | |
| # Get current time in that timezone | |
| local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S") | |
| return f"The current local time in {timezone} is: {local_time}" | |
| except Exception as e: | |
| return f"Error fetching time for timezone '{timezone}': {str(e)}" | |
| final_answer = FinalAnswerTool() | |
| # Remove the huggingface_api_key parameter - it's not supported | |
| model = HfApiModel( | |
| max_tokens=2096, | |
| temperature=0.5, | |
| model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud', # Using the backup endpoint | |
| custom_role_conversions=None | |
| ) | |
| # Import tool from Hub | |
| image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True) | |
| with open("prompts.yaml", 'r') as stream: | |
| prompt_templates = yaml.safe_load(stream) | |
| agent = CodeAgent( | |
| model=model, | |
| tools=[ | |
| final_answer, | |
| Sonar_Web_Search_Tool, | |
| get_current_time_in_timezone, | |
| image_generation_tool, | |
| Dataset_Creator_Tool | |
| ], | |
| max_steps=6, | |
| verbosity_level=1, | |
| grammar=None, | |
| planning_interval=None, | |
| name=None, | |
| description=None, | |
| prompt_templates=prompt_templates | |
| ) | |
| # To fix the TypeError in Gradio_UI.py, you would need to modify that file | |
| # For now, we'll just use the agent directly | |
| try: | |
| GradioUI(agent).launch() | |
| except TypeError as e: | |
| if "unsupported operand type(s) for +=" in str(e): | |
| print("Error: Token counting issue in Gradio UI") | |
| print("To fix, edit Gradio_UI.py and change:") | |
| print("total_input_tokens += agent.model.last_input_token_count") | |
| print("To:") | |
| print("total_input_tokens += (agent.model.last_input_token_count or 0)") | |
| else: | |
| raise e |