Agent-Example / app.py
SolshineMisfit's picture
Put more under the control of the agent
b3b6478 verified
raw
history blame
8.75 kB
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
import datetime
import requests
import pytz
import yaml
import os
import json
import uuid
from datasets import Dataset
from huggingface_hub import HfApi
from openai import OpenAI
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
# Define the Perplexity system prompt
Perplex_Assistant_Prompt = """You are a helpful AI assistant that searches the web for accurate information."""
# Set up API key in environment variable as expected by HfApiModel
os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_KEY", "")
#@weave.op()
def tracked_perplexity_call(prompt: str, system_messages: str, model_name: str = "sonar-pro", assistant_meta: bool = False):
"""Enhanced Perplexity API call with explicit model tracking."""
client = OpenAI(api_key=os.getenv("PERPLEXITY_API_KEY"), base_url="https://api.perplexity.ai")
system_message = Perplex_Assistant_Prompt
if assistant_meta:
system_message += f"\n\n{system_messages}"
# Minimal parameters for Perplexity
return client.chat.completions.create(
model=model_name,
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": prompt},
],
stream=False,
).choices[0].message.content
@tool
def Sonar_Web_Search_Tool(arg1: str, arg2: str) -> str:
"""A tool that accesses Perplexity Sonar to search the web when the answer requires or would benefit from a real world web reference.
Args:
arg1: User Prompt
arg2: Details on the desired web search results as system message for sonar web search
"""
try:
sonar_response = tracked_perplexity_call(arg1, arg2)
return sonar_response
except Exception as e:
return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
def Dataset_Creator_Function(dataset_name: str, username: str, conversation_data: str) -> str:
"""Creates and pushes a dataset to Hugging Face with the conversation history.
Args:
dataset_name: Name for the dataset
username: Default is "Misfits-and-Machines"
conversation_data: String representing the conversation data
Returns:
URL of the created dataset or error message
"""
try:
import tempfile
import pathlib
from datasets import Dataset, DatasetDict
import pandas as pd
# Get API key from environment variables
api_key = os.getenv("HF_API_KEY")
if not api_key:
return "Error: No Hugging Face API key found in environment variables. Please set HF_API_KEY or HUGGINGFACE_API_KEY."
# Initialize Hugging Face API
hf_api = HfApi(token=api_key)
# Sanitize dataset name
safe_dataset_name = dataset_name.replace(" ", "_").lower()
repo_id = f"{username}/{safe_dataset_name}"
print(f"Creating dataset repository: {repo_id}")
# Create a temporary directory to store the dataset files
with tempfile.TemporaryDirectory() as tmp_dir:
# Convert data to DataFrame and save as CSV
df = pd.DataFrame({
"text": [conversation_data],
"timestamp": [datetime.datetime.now().isoformat()],
"dataset_id": [str(uuid.uuid4())]
})
# Save CSV in the temp directory
csv_path = pathlib.Path(tmp_dir) / "train.csv"
df.to_csv(csv_path, index=False)
print(f"Data saved to temporary CSV file: {csv_path}")
# Load from CSV to ensure proper dataset structure
train_dataset = Dataset.from_pandas(df)
# Create a DatasetDict with a train split
dataset_dict = DatasetDict({"train": train_dataset})
print(f"Created dataset with {len(train_dataset)} rows")
# Create the repository explicitly if it doesn't exist
try:
if not hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
hf_api.create_repo(repo_id=repo_id, repo_type="dataset")
print(f"Repository {repo_id} created")
else:
print(f"Repository {repo_id} already exists")
except Exception as repo_error:
print(f"Repository creation error: {str(repo_error)}")
# Push to Hugging Face Hub
print(f"Pushing dataset to {repo_id}")
# Use the DatasetDict push_to_hub method
dataset_dict.push_to_hub(
repo_id=repo_id,
token=api_key,
private=False
)
dataset_url = f"https://huggingface.co/datasets/{repo_id}"
print(f"Dataset successfully pushed to: {dataset_url}")
# Double-check that the repo exists
if hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
print(f"Verified: Repository {repo_id} exists")
else:
print(f"Warning: Repository {repo_id} not found after push")
return f"Successfully created dataset at {dataset_url}"
except Exception as e:
import traceback
error_trace = traceback.format_exc()
print(f"Dataset creation error: {str(e)}\n{error_trace}")
return f"Error creating dataset: {str(e)}\n\nTo troubleshoot:\n1. Verify API key is valid\n2. Try with a different dataset name"
@tool
def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
"""A tool that posts a new dataset of the current conversation to Hugging Face.
Args:
dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
conversation_data: String content to save to the dataset (no JSON conversion needed)
Returns:
Link to the created dataset or error message with troubleshooting steps
"""
try:
print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data")
print(f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}")
result = Dataset_Creator_Function(dataset_name, conversation_data)
print(f"Dataset creation result: {result}")
return result
except Exception as e:
import traceback
error_trace = traceback.format_exc()
return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}"
@tool
def get_current_time_in_timezone(timezone: str) -> str:
"""A tool that fetches the current local time in a specified timezone.
Args:
timezone: A string representing a valid timezone (e.g., 'America/New_York').
"""
try:
# Create timezone object
tz = pytz.timezone(timezone)
# Get current time in that timezone
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
return f"The current local time in {timezone} is: {local_time}"
except Exception as e:
return f"Error fetching time for timezone '{timezone}': {str(e)}"
final_answer = FinalAnswerTool()
# Remove the huggingface_api_key parameter - it's not supported
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud', # Using the backup endpoint
custom_role_conversions=None
)
# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[
final_answer,
Sonar_Web_Search_Tool,
get_current_time_in_timezone,
image_generation_tool,
Dataset_Creator_Tool
],
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
# To fix the TypeError in Gradio_UI.py, you would need to modify that file
# For now, we'll just use the agent directly
try:
GradioUI(agent).launch()
except TypeError as e:
if "unsupported operand type(s) for +=" in str(e):
print("Error: Token counting issue in Gradio UI")
print("To fix, edit Gradio_UI.py and change:")
print("total_input_tokens += agent.model.last_input_token_count")
print("To:")
print("total_input_tokens += (agent.model.last_input_token_count or 0)")
else:
raise e