Agent-Course-First_agent

Runtime error

agent can now use this tool with clearer feedback on what's happening, and users will be directed to the correct URL where they can verify if the dataset appeared on Hugging Face.

0be1b82 verified about 1 year ago

raw

history blame

8.62 kB

	from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
	import datetime
	import requests
	import pytz
	import yaml
	import os
	import json
	import uuid
	from datasets import Dataset
	from huggingface_hub import HfApi
	from openai import OpenAI
	from tools.final_answer import FinalAnswerTool

	from Gradio_UI import GradioUI

	# Define the Perplexity system prompt
	Perplex_Assistant_Prompt = """You are a helpful AI assistant that searches the web for accurate information."""

	# Set up API key in environment variable as expected by HfApiModel
	os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_KEY", "")

	#@weave.op()
	def tracked_perplexity_call(prompt: str, system_messages: str, model_name: str = "sonar-pro", assistant_meta: bool = False):
	"""Enhanced Perplexity API call with explicit model tracking."""
	client = OpenAI(api_key=os.getenv("PERPLEXITY_API_KEY"), base_url="https://api.perplexity.ai")
	system_message = Perplex_Assistant_Prompt
	if assistant_meta:
	system_message += f"\n\n{system_messages}"

	# Minimal parameters for Perplexity
	return client.chat.completions.create(
	model=model_name,
	messages=[
	{"role": "system", "content": system_message},
	{"role": "user", "content": prompt},
	],
	stream=False,
	).choices[0].message.content

	@tool
	def Sonar_Web_Search_Tool(arg1: str, arg2: str) -> str:
	"""A tool that accesses Perplexity Sonar to search the web when the answer requires or would benefit from a real world web reference.
	Args:
	arg1: User Prompt
	arg2: Details on the desired web search results as system message for sonar web search
	"""
	try:
	sonar_response = tracked_perplexity_call(arg1, arg2)
	return sonar_response
	except Exception as e:
	return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"


	def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
	"""Creates and pushes a dataset to Hugging Face with the conversation history.

	Args:
	dataset_name: Name for the dataset (will be prefixed with username)
	conversation_data: String representing the conversation data

	Returns:
	URL of the created dataset or error message
	"""
	try:
	# Get API key from environment variables
	api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY", "")
	if not api_key:
	return "Error: No Hugging Face API key found in environment variables. Please set HF_API_KEY or HUGGINGFACE_API_KEY."

	# Force the username to be the known value
	username = "Misfits-and-Machines"

	# Initialize Hugging Face API
	hf_api = HfApi(token=api_key)

	# Sanitize dataset name - use underscores instead of dashes for better compatibility
	safe_dataset_name = dataset_name.replace(" ", "_").lower()
	repo_id = f"{username}/{safe_dataset_name}"

	print(f"Creating dataset repository: {repo_id}")

	# Prepare dataset with appropriate structure
	# First, ensure we have a proper train split with necessary fields
	dataset_dict = {
	"text": [conversation_data],
	"timestamp": [datetime.datetime.now().isoformat()],
	"dataset_id": [str(uuid.uuid4())]
	}

	# Create a Hugging Face dataset
	dataset = Dataset.from_dict(dataset_dict)

	# Standard practice is to have a train split for datasets
	dataset_dict = {"train": dataset}

	# Check if the repository already exists
	try:
	repo_exists = hf_api.repo_exists(repo_id=repo_id, repo_type="dataset")
	if repo_exists:
	print(f"Repository {repo_id} already exists")
	else:
	# Create repo if it doesn't exist
	hf_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
	print(f"Repository {repo_id} created successfully")
	except Exception as repo_error:
	print(f"Repository check/creation error: {str(repo_error)}")
	# Continue anyway as push_to_hub might create the repo

	# Push dataset to the Hub with appropriate parameters
	print(f"Pushing dataset to {repo_id}")

	# Create URL for monitoring - we'll show this to the user so they can check progress
	dataset_url = f"https://huggingface.co/datasets/{repo_id}"
	print(f"Dataset URL will be: {dataset_url}")

	# Push with careful parameter selection
	dataset.push_to_hub(
	repo_id=repo_id,
	token=api_key,
	split="train", # Use a proper split name
	commit_message=f"Upload dataset: {dataset_name}"
	)

	print(f"Dataset successfully pushed to: {dataset_url}")
	return f"Successfully created dataset at {dataset_url} - please check this URL to verify your dataset is visible"
	except Exception as e:
	import traceback
	error_trace = traceback.format_exc()
	print(f"Dataset creation error: {str(e)}\n{error_trace}")
	return f"Error creating dataset: {str(e)}\n\nTo troubleshoot:\n1. Verify API key is valid\n2. Try with a different dataset name\n3. Check if you have write permissions for the Misfits-and-Machines organization"

	@tool
	def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
	"""A tool that posts a new dataset of the current conversation to Hugging Face.

	Args:
	dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
	conversation_data: String content to save to the dataset (no JSON conversion needed)

	Returns:
	Link to the created dataset or error message with troubleshooting steps
	"""
	try:
	print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data")
	print(f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}")
	result = Dataset_Creator_Function(dataset_name, conversation_data)
	print(f"Dataset creation result: {result}")
	return result
	except Exception as e:
	import traceback
	error_trace = traceback.format_exc()
	return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}\n\nPlease try with a simpler dataset name using only letters, numbers and underscores."


	@tool
	def get_current_time_in_timezone(timezone: str) -> str:
	"""A tool that fetches the current local time in a specified timezone.
	Args:
	timezone: A string representing a valid timezone (e.g., 'America/New_York').
	"""
	try:
	# Create timezone object
	tz = pytz.timezone(timezone)
	# Get current time in that timezone
	local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
	return f"The current local time in {timezone} is: {local_time}"
	except Exception as e:
	return f"Error fetching time for timezone '{timezone}': {str(e)}"


	final_answer = FinalAnswerTool()

	# Remove the huggingface_api_key parameter - it's not supported
	model = HfApiModel(
	max_tokens=2096,
	temperature=0.5,
	model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud', # Using the backup endpoint
	custom_role_conversions=None
	)

	# Import tool from Hub
	image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)

	with open("prompts.yaml", 'r') as stream:
	prompt_templates = yaml.safe_load(stream)

	agent = CodeAgent(
	model=model,
	tools=[
	final_answer,
	Sonar_Web_Search_Tool,
	get_current_time_in_timezone,
	image_generation_tool,
	Dataset_Creator_Tool
	],
	max_steps=6,
	verbosity_level=1,
	grammar=None,
	planning_interval=None,
	name=None,
	description=None,
	prompt_templates=prompt_templates
	)

	# To fix the TypeError in Gradio_UI.py, you would need to modify that file
	# For now, we'll just use the agent directly
	try:
	GradioUI(agent).launch()
	except TypeError as e:
	if "unsupported operand type(s) for +=" in str(e):
	print("Error: Token counting issue in Gradio UI")
	print("To fix, edit Gradio_UI.py and change:")
	print("total_input_tokens += agent.model.last_input_token_count")
	print("To:")
	print("total_input_tokens += (agent.model.last_input_token_count or 0)")
	else:
	raise e