import os import pandas as pd from smolagents import ( CodeAgent, LiteLLMModel, DuckDuckGoSearchTool, FinalAnswerTool, VisitWebpageTool, WikipediaSearchTool, WebSearchTool, tool, OpenAIServerModel ) from langchain_community.document_loaders import ArxivLoader from google.colab import userdata import requests import yaml from dotenv import load_dotenv load_dotenv() def fetch_questions(): DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" try: response = requests.get(f"{DEFAULT_API_URL}/questions") response.raise_for_status() questions_data = response.json() if not questions_data: print("Fetched questions list is empty.") return "Fetched questions list is empty or invalid format.", None print(f"Fetched {len(questions_data)} questions.") return questions_data except Exception as e: print(f"Error fetching questions: {e}") raise e def fetch_file(task_id: str, file_name: str): DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" try: response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}") response.raise_for_status() with open(f"data/question_files/{file_name}", "wb") as f: f.write(response.content) file_content = response.content return file_content except Exception as e: print(f"Error fetching file: {e}") raise e def submit_answers(answers): DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" request_payload = { "username": "GoReed", "agent_code": "test", "answers": answers } try: response = requests.post( f"{DEFAULT_API_URL}/submit", json=request_payload ) # json=json.dumps(request_payload), # headers={"Content-Type": "application/json"} # ) response.raise_for_status() json_response = response.json() print(f"Response: {json_response}") return json_response except Exception as e: print(f"Error submitting answers: {e}") @tool def arxiv_search(query: str) -> str: """Search Arxiv for a query and return maximum 3 result. Args: query: The search query.""" search_docs = ArxivLoader(query=query, load_max_docs=3).load() formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content[:1000]}\n' for doc in search_docs ] ) return {"arxiv_results": formatted_search_docs} @tool def read_python_file(file_name: str) -> str: """Read a python file and return the content. Args: file_name: The name of the file to read. Returns: The content of the file. """ base_path = "data/question_files" with open(os.path.join(base_path, file_name), "r") as f: return f.read() @tool def read_excel_file(file_name: str) -> str: """Read an excel file with xlsx extension and return the content. Args: file_name: The name of the file to handle. Returns: The content of the file. """ base_path = "data/question_files" df = pd.read_excel(os.path.join(base_path, file_name)) return df.to_string() @tool def extract_text_from_image(image_path: str) -> str: """ Extract text from an image using pytesseract (if available). Args: image_path: Path to the image file Returns: Extracted text or error message """ try: # Try to import pytesseract import pytesseract from PIL import Image # Open the image image = Image.open(image_path) # Extract text text = pytesseract.image_to_string(image) print(f"Extracted text from image:\n\n{text}") return f"Extracted text from image:\n\n{text}" except ImportError: return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system." except Exception as e: return f"Error extracting text from image: {str(e)}" MODEL_ID = "ollama_chat/qwen2.5-coder:7b" secret_value = userdata.get('OPENAI_API_KEY_AG') API_KEY = os.getenv("OPENAI_API_KEY_AG") print(API_KEY, "HEELLLOOoooooo", os.getenv("OPENAI_API_KEY_AG")) # model = LiteLLMModel( # model_id=MODEL_ID, # api_base="http://127.0.0.1:11434", # num_ctx=8192, # ) model = OpenAIServerModel(model_id="gpt-4.1-nano", api_key=API_KEY) MODEL_ID = "openai/gpt-4.1-nano" with open("system_prompt.yaml", 'r') as stream: prompt_templates = yaml.safe_load(stream) agent = CodeAgent( model=model, tools=[ WebSearchTool(), VisitWebpageTool(), WikipediaSearchTool(), arxiv_search, FinalAnswerTool(), extract_text_from_image, #read_python_file, #read_excel_file ], planning_interval=3, max_steps=10, verbosity_level=-1, additional_authorized_imports=[ "pandas", "numpy", "requests", "os", "math", "sympy", "scipy", "markdownify", "unicodedata", "stat", "datetime", "random", "itertools", "statistics", "queue", "time", "collections", "re", ], add_base_tools=True, #prompt_templates=prompt_templates, ) questions = fetch_questions() answers = [] counter = 0 for index, question in enumerate(questions): # print(f"Question {index + 1}: Question Key: {question.keys()}") # print( # f"Task ID: {question['task_id']}\n" # f"Question: {question['question']}\n" # f"Level: {question['Level']}\n" # f"File_name: {question['file_name']}" # ) # if not question['file_name']: # continue if question['file_name']: file_content = fetch_file(question['task_id'], question['file_name']) file_path = os.path.join("data/question_files", question['file_name']) #print(f"File content: {file_content}") answer = agent.run( f"""You are a general AI assistant.You can use the provided tools and websearch for finding answers. I will ask you a question and provide you with a file_name. Report your thoughts, and finish your answer. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. question:{question['question']} file_path:{file_path}""", ) else: answer = agent.run( f"""You are a general AI assistant.You can use the provided tools and websearch for finding answers. I will ask you a question. Report your thoughts, and finish your answer. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. Question:{question['question']}""", ) print(f"Task ID: {question['task_id']} \nQuestion: {question['question']} \nAnswer: {answer}") print() answers.append( { "task_id": question['task_id'], "submitted_answer": answer } ) import json with open(f"data/answers_with_prompt_{MODEL_ID.split('/')[-1]}_with_file_content_handling.json", "w") as f: json.dump(answers, f, indent=2) print("Submitting answers...") submit_answers(answers) print("Answers submitted successfully")