Spaces:

GoReed
/

test_agent

Runtime error

App Files Files Community

test_agent / agent.py

GoReed

Updated

9874d5f verified 6 months ago

raw

history blame contribute delete

6.19 kB

	# agent.py

	import os
	import json
	import pandas as pd
	from smolagents import (
	CodeAgent,
	LiteLLMModel,
	DuckDuckGoSearchTool,
	FinalAnswerTool,
	VisitWebpageTool,
	WikipediaSearchTool,
	WebSearchTool,
	tool,
	OpenAIServerModel
	)
	from langchain_community.document_loaders import ArxivLoader
	from dotenv import load_dotenv
	import requests
	import yaml

	load_dotenv()

	# Custom tools
	@tool
	def arxiv_search(query: str) -> str:
	"""
	Search Arxiv for a query and return up to 3 documents.

	Args:
	query (str): The search query to run on Arxiv.

	Returns:
	str: Formatted Arxiv document summaries.
	"""
	search_docs = ArxivLoader(query=query, load_max_docs=3).load()
	return "\n\n---\n\n".join([
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
	for doc in search_docs
	])

	@tool
	def extract_text_from_image(image_path: str) -> str:
	"""
	Extract text from an image using pytesseract.

	Args:
	image_path (str): Path to the image file.

	Returns:
	str: Extracted text or error message.
	"""
	try:
	import pytesseract
	from PIL import Image
	image = Image.open(image_path)
	text = pytesseract.image_to_string(image)
	return f"Extracted text from image:\n\n{text}"
	except ImportError:
	return "Error: pytesseract is not installed."
	except Exception as e:
	return f"Error extracting text: {str(e)}"
	@tool
	def read_python_file(file_name: str) -> str:
	"""
	Read a Python (.py) file and return its content.

	Args:
	file_name (str): The file name of the Python script to read.

	Returns:
	str: The contents of the file as a string.
	"""
	base_path = "data/question_files"
	with open(os.path.join(base_path, file_name), "r") as f:
	return f.read()
	@tool
	def get_youtube_transcript(video_id:str)-> str:
	"""
	Retrieves the transcript for a given YouTube video.

	Args:
	video_id: The ID of the YouTube video.

	Returns:
	A list of dictionaries, where each dictionary represents a transcript segment
	and contains 'text' and 'start' keys, or None if no transcript is found.
	"""
	try:
	transcript = YouTubeTranscriptApi.get_transcript(video_id)
	return transcript
	except Exception as e:
	# print(f"Error getting transcript: {e}")
	return f"Error getting transcript: {e}"

	@tool
	def read_excel_file(file_name: str) -> str:
	"""
	Read an Excel (.xlsx) file and return its tabular content as a string.

	Args:
	file_name (str): The Excel file to read.

	Returns:
	str: The content of the Excel file in plain text format.
	"""
	base_path = "data/question_files"
	df = pd.read_excel(os.path.join(base_path, file_name))
	return df.to_string()


	# Model and agent setup
	API_KEY = os.getenv("OPENAI_API_KEY_AG")
	MODEL_ID = "openai/gpt-4.1-nano"

	model = OpenAIServerModel(model_id="gpt-4.1-nano", api_key=API_KEY)

	agent = CodeAgent(
	model=model,
	tools=[
	WebSearchTool(),
	VisitWebpageTool(),
	WikipediaSearchTool(),
	arxiv_search,
	FinalAnswerTool(),
	extract_text_from_image,
	read_excel_file,
	read_python_file,
	get_youtube_transcript,

	],
	planning_interval=3,
	max_steps=10,
	verbosity_level=-1,
	additional_authorized_imports=[
	"pandas", "numpy", "requests", "os", "math", "sympy", "scipy",
	"markdownify", "unicodedata", "stat", "datetime", "random", "itertools",
	"statistics", "queue", "time", "collections", "re"
	],
	add_base_tools=True
	)

	def fetch_questions():
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	try:
	response = requests.get(f"{DEFAULT_API_URL}/questions")
	response.raise_for_status()
	data = response.json()
	return data
	except Exception as e:
	print(f"Error fetching questions: {e}")
	return []

	def fetch_file(task_id: str, file_name: str):
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	try:
	response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
	response.raise_for_status()
	os.makedirs("data/question_files", exist_ok=True)
	path = f"data/question_files/{file_name}"
	with open(path, "wb") as f:
	f.write(response.content)
	return path
	except Exception as e:
	print(f"Error fetching file: {e}")
	return None

	def run_agent_on_question(q):
	if q.get("file_name"):
	file_path = fetch_file(q["task_id"], q["file_name"])
	prompt = f"""You are a general AI assistant. Use tools and web search as needed.
	Question: {q['question']}
	file_path: {file_path}
	YOUR FINAL ANSWER should be a number OR few words OR comma-separated values. Follow instructions strictly."""
	else:
	prompt = f"""You are a general AI assistant. Use tools and web search as needed.
	Question: {q['question']}
	YOUR FINAL ANSWER should be a number OR few words OR comma-separated values. Follow instructions strictly."""

	output = agent.run(prompt)

	# Optional: Extract final answer if embedded in logs
	if isinstance(output, str):
	return output.strip()
	elif isinstance(output, dict):
	return output.get("final_answer", str(output))
	else:
	return str(output)

	def submit_answers(answers):
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	request_payload = {
	"username": "GoReed",
	"agent_code": "test_answers_agent_code",
	"answers": answers
	}
	try:
	response = requests.post(
	f"{DEFAULT_API_URL}/submit",
	json=request_payload # ✅ FIXED
	)
	response.raise_for_status()
	print("✅ Submission success:", response.json())
	except requests.exceptions.HTTPError as http_err:
	print(f"❌ HTTP Error: {http_err}")
	print("Response text:", response.text)
	except Exception as e:
	print(f"❌ Submission Error: {e}")