SolshineMisfit's picture
agent can now use this tool with clearer feedback on what's happening, and users will be directed to the correct URL where they can verify if the dataset appeared on Hugging Face.
0be1b82 verified
raw
history blame
8.62 kB
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
import datetime
import requests
import pytz
import yaml
import os
import json
import uuid
from datasets import Dataset
from huggingface_hub import HfApi
from openai import OpenAI
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
# Define the Perplexity system prompt
Perplex_Assistant_Prompt = """You are a helpful AI assistant that searches the web for accurate information."""
# Set up API key in environment variable as expected by HfApiModel
os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_KEY", "")
#@weave.op()
def tracked_perplexity_call(prompt: str, system_messages: str, model_name: str = "sonar-pro", assistant_meta: bool = False):
"""Enhanced Perplexity API call with explicit model tracking."""
client = OpenAI(api_key=os.getenv("PERPLEXITY_API_KEY"), base_url="https://api.perplexity.ai")
system_message = Perplex_Assistant_Prompt
if assistant_meta:
system_message += f"\n\n{system_messages}"
# Minimal parameters for Perplexity
return client.chat.completions.create(
model=model_name,
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": prompt},
],
stream=False,
).choices[0].message.content
@tool
def Sonar_Web_Search_Tool(arg1: str, arg2: str) -> str:
"""A tool that accesses Perplexity Sonar to search the web when the answer requires or would benefit from a real world web reference.
Args:
arg1: User Prompt
arg2: Details on the desired web search results as system message for sonar web search
"""
try:
sonar_response = tracked_perplexity_call(arg1, arg2)
return sonar_response
except Exception as e:
return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
"""Creates and pushes a dataset to Hugging Face with the conversation history.
Args:
dataset_name: Name for the dataset (will be prefixed with username)
conversation_data: String representing the conversation data
Returns:
URL of the created dataset or error message
"""
try:
# Get API key from environment variables
api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY", "")
if not api_key:
return "Error: No Hugging Face API key found in environment variables. Please set HF_API_KEY or HUGGINGFACE_API_KEY."
# Force the username to be the known value
username = "Misfits-and-Machines"
# Initialize Hugging Face API
hf_api = HfApi(token=api_key)
# Sanitize dataset name - use underscores instead of dashes for better compatibility
safe_dataset_name = dataset_name.replace(" ", "_").lower()
repo_id = f"{username}/{safe_dataset_name}"
print(f"Creating dataset repository: {repo_id}")
# Prepare dataset with appropriate structure
# First, ensure we have a proper train split with necessary fields
dataset_dict = {
"text": [conversation_data],
"timestamp": [datetime.datetime.now().isoformat()],
"dataset_id": [str(uuid.uuid4())]
}
# Create a Hugging Face dataset
dataset = Dataset.from_dict(dataset_dict)
# Standard practice is to have a train split for datasets
dataset_dict = {"train": dataset}
# Check if the repository already exists
try:
repo_exists = hf_api.repo_exists(repo_id=repo_id, repo_type="dataset")
if repo_exists:
print(f"Repository {repo_id} already exists")
else:
# Create repo if it doesn't exist
hf_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
print(f"Repository {repo_id} created successfully")
except Exception as repo_error:
print(f"Repository check/creation error: {str(repo_error)}")
# Continue anyway as push_to_hub might create the repo
# Push dataset to the Hub with appropriate parameters
print(f"Pushing dataset to {repo_id}")
# Create URL for monitoring - we'll show this to the user so they can check progress
dataset_url = f"https://huggingface.co/datasets/{repo_id}"
print(f"Dataset URL will be: {dataset_url}")
# Push with careful parameter selection
dataset.push_to_hub(
repo_id=repo_id,
token=api_key,
split="train", # Use a proper split name
commit_message=f"Upload dataset: {dataset_name}"
)
print(f"Dataset successfully pushed to: {dataset_url}")
return f"Successfully created dataset at {dataset_url} - please check this URL to verify your dataset is visible"
except Exception as e:
import traceback
error_trace = traceback.format_exc()
print(f"Dataset creation error: {str(e)}\n{error_trace}")
return f"Error creating dataset: {str(e)}\n\nTo troubleshoot:\n1. Verify API key is valid\n2. Try with a different dataset name\n3. Check if you have write permissions for the Misfits-and-Machines organization"
@tool
def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
"""A tool that posts a new dataset of the current conversation to Hugging Face.
Args:
dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
conversation_data: String content to save to the dataset (no JSON conversion needed)
Returns:
Link to the created dataset or error message with troubleshooting steps
"""
try:
print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data")
print(f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}")
result = Dataset_Creator_Function(dataset_name, conversation_data)
print(f"Dataset creation result: {result}")
return result
except Exception as e:
import traceback
error_trace = traceback.format_exc()
return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}\n\nPlease try with a simpler dataset name using only letters, numbers and underscores."
@tool
def get_current_time_in_timezone(timezone: str) -> str:
"""A tool that fetches the current local time in a specified timezone.
Args:
timezone: A string representing a valid timezone (e.g., 'America/New_York').
"""
try:
# Create timezone object
tz = pytz.timezone(timezone)
# Get current time in that timezone
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
return f"The current local time in {timezone} is: {local_time}"
except Exception as e:
return f"Error fetching time for timezone '{timezone}': {str(e)}"
final_answer = FinalAnswerTool()
# Remove the huggingface_api_key parameter - it's not supported
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud', # Using the backup endpoint
custom_role_conversions=None
)
# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[
final_answer,
Sonar_Web_Search_Tool,
get_current_time_in_timezone,
image_generation_tool,
Dataset_Creator_Tool
],
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
# To fix the TypeError in Gradio_UI.py, you would need to modify that file
# For now, we'll just use the agent directly
try:
GradioUI(agent).launch()
except TypeError as e:
if "unsupported operand type(s) for +=" in str(e):
print("Error: Token counting issue in Gradio UI")
print("To fix, edit Gradio_UI.py and change:")
print("total_input_tokens += agent.model.last_input_token_count")
print("To:")
print("total_input_tokens += (agent.model.last_input_token_count or 0)")
else:
raise e