Spaces:

GoReed
/

test_agent

Runtime error

App Files Files Community

test_agent / agent_1.py

GoReed

added_files

db622b0 6 months ago

raw

history blame contribute delete

8.66 kB

	import os
	import pandas as pd
	from smolagents import (
	CodeAgent,
	LiteLLMModel,
	DuckDuckGoSearchTool,
	FinalAnswerTool,
	VisitWebpageTool,
	WikipediaSearchTool,
	WebSearchTool,
	tool,
	OpenAIServerModel
	)
	from langchain_community.document_loaders import ArxivLoader
	from google.colab import userdata

	import requests
	import yaml
	from dotenv import load_dotenv
	load_dotenv()


	def fetch_questions():
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	try:
	response = requests.get(f"{DEFAULT_API_URL}/questions")
	response.raise_for_status()
	questions_data = response.json()
	if not questions_data:
	print("Fetched questions list is empty.")
	return "Fetched questions list is empty or invalid format.", None
	print(f"Fetched {len(questions_data)} questions.")
	return questions_data
	except Exception as e:
	print(f"Error fetching questions: {e}")
	raise e

	def fetch_file(task_id: str, file_name: str):
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	try:
	response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
	response.raise_for_status()
	with open(f"data/question_files/{file_name}", "wb") as f:
	f.write(response.content)
	file_content = response.content
	return file_content
	except Exception as e:
	print(f"Error fetching file: {e}")
	raise e


	def submit_answers(answers):
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	request_payload = {
	"username": "GoReed",
	"agent_code": "test",
	"answers": answers
	}
	try:
	response = requests.post(
	f"{DEFAULT_API_URL}/submit",
	json=request_payload
	)
	# json=json.dumps(request_payload),
	# headers={"Content-Type": "application/json"}
	# )
	response.raise_for_status()
	json_response = response.json()
	print(f"Response: {json_response}")
	return json_response
	except Exception as e:
	print(f"Error submitting answers: {e}")

	@tool
	def arxiv_search(query: str) -> str:
	"""Search Arxiv for a query and return maximum 3 result.
	Args:
	query: The search query."""
	search_docs = ArxivLoader(query=query, load_max_docs=3).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
	for doc in search_docs
	]
	)
	return {"arxiv_results": formatted_search_docs}

	@tool
	def read_python_file(file_name: str) -> str:
	"""Read a python file and return the content.
	Args:
	file_name: The name of the file to read.
	Returns:
	The content of the file.
	"""
	base_path = "data/question_files"
	with open(os.path.join(base_path, file_name), "r") as f:
	return f.read()

	@tool
	def read_excel_file(file_name: str) -> str:
	"""Read an excel file with xlsx extension and return the content.
	Args:
	file_name: The name of the file to handle.
	Returns:
	The content of the file.
	"""
	base_path = "data/question_files"
	df = pd.read_excel(os.path.join(base_path, file_name))
	return df.to_string()

	@tool
	def extract_text_from_image(image_path: str) -> str:
	"""
	Extract text from an image using pytesseract (if available).

	Args:
	image_path: Path to the image file

	Returns:
	Extracted text or error message
	"""
	try:
	# Try to import pytesseract
	import pytesseract
	from PIL import Image

	# Open the image
	image = Image.open(image_path)

	# Extract text
	text = pytesseract.image_to_string(image)
	print(f"Extracted text from image:\n\n{text}")
	return f"Extracted text from image:\n\n{text}"
	except ImportError:
	return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
	except Exception as e:
	return f"Error extracting text from image: {str(e)}"

	MODEL_ID = "ollama_chat/qwen2.5-coder:7b"
	secret_value = userdata.get('OPENAI_API_KEY_AG')
	API_KEY = os.getenv("OPENAI_API_KEY_AG")
	print(API_KEY, "HEELLLOOoooooo", os.getenv("OPENAI_API_KEY_AG"))
	# model = LiteLLMModel(
	# model_id=MODEL_ID,
	# api_base="http://127.0.0.1:11434",
	# num_ctx=8192,
	# )
	model = OpenAIServerModel(model_id="gpt-4.1-nano", api_key=API_KEY)
	MODEL_ID = "openai/gpt-4.1-nano"

	with open("system_prompt.yaml", 'r') as stream:
	prompt_templates = yaml.safe_load(stream)

	agent = CodeAgent(
	model=model,
	tools=[
	WebSearchTool(),
	VisitWebpageTool(),
	WikipediaSearchTool(),
	arxiv_search,
	FinalAnswerTool(),
	extract_text_from_image,
	#read_python_file,
	#read_excel_file
	],
	planning_interval=3,
	max_steps=10,
	verbosity_level=-1,
	additional_authorized_imports=[
	"pandas",
	"numpy",
	"requests",
	"os",
	"math",
	"sympy",
	"scipy",
	"markdownify",
	"unicodedata",
	"stat",
	"datetime",
	"random",
	"itertools",
	"statistics",
	"queue",
	"time",
	"collections",
	"re",
	],
	add_base_tools=True,
	#prompt_templates=prompt_templates,
	)
	questions = fetch_questions()
	answers = []
	counter = 0
	for index, question in enumerate(questions):
	# print(f"Question {index + 1}: Question Key: {question.keys()}")
	# print(
	# f"Task ID: {question['task_id']}\n"
	# f"Question: {question['question']}\n"
	# f"Level: {question['Level']}\n"
	# f"File_name: {question['file_name']}"
	# )
	# if not question['file_name']:
	# continue
	if question['file_name']:
	file_content = fetch_file(question['task_id'], question['file_name'])
	file_path = os.path.join("data/question_files", question['file_name'])
	#print(f"File content: {file_content}")
	answer = agent.run(
	f"""You are a general AI assistant.You can use the provided tools and websearch for finding answers. I will ask you a question and provide you with a file_name. Report your thoughts, and finish your answer. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
	question:{question['question']}
	file_path:{file_path}""",
	)
	else:
	answer = agent.run(
	f"""You are a general AI assistant.You can use the provided tools and websearch for finding answers. I will ask you a question. Report your thoughts, and finish your answer. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
	Question:{question['question']}""",
	)
	print(f"Task ID: {question['task_id']} \nQuestion: {question['question']} \nAnswer: {answer}")
	print()
	answers.append(
	{
	"task_id": question['task_id'],
	"submitted_answer": answer
	}
	)
	import json
	with open(f"data/answers_with_prompt_{MODEL_ID.split('/')[-1]}_with_file_content_handling.json", "w") as f:
	json.dump(answers, f, indent=2)
	print("Submitting answers...")
	submit_answers(answers)
	print("Answers submitted successfully")