Spaces:
Runtime error
Runtime error
| import os | |
| import pandas as pd | |
| from smolagents import ( | |
| CodeAgent, | |
| LiteLLMModel, | |
| DuckDuckGoSearchTool, | |
| FinalAnswerTool, | |
| VisitWebpageTool, | |
| WikipediaSearchTool, | |
| WebSearchTool, | |
| tool, | |
| OpenAIServerModel | |
| ) | |
| from langchain_community.document_loaders import ArxivLoader | |
| from google.colab import userdata | |
| import requests | |
| import yaml | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| def fetch_questions(): | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| try: | |
| response = requests.get(f"{DEFAULT_API_URL}/questions") | |
| response.raise_for_status() | |
| questions_data = response.json() | |
| if not questions_data: | |
| print("Fetched questions list is empty.") | |
| return "Fetched questions list is empty or invalid format.", None | |
| print(f"Fetched {len(questions_data)} questions.") | |
| return questions_data | |
| except Exception as e: | |
| print(f"Error fetching questions: {e}") | |
| raise e | |
| def fetch_file(task_id: str, file_name: str): | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| try: | |
| response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}") | |
| response.raise_for_status() | |
| with open(f"data/question_files/{file_name}", "wb") as f: | |
| f.write(response.content) | |
| file_content = response.content | |
| return file_content | |
| except Exception as e: | |
| print(f"Error fetching file: {e}") | |
| raise e | |
| def submit_answers(answers): | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| request_payload = { | |
| "username": "GoReed", | |
| "agent_code": "test", | |
| "answers": answers | |
| } | |
| try: | |
| response = requests.post( | |
| f"{DEFAULT_API_URL}/submit", | |
| json=request_payload | |
| ) | |
| # json=json.dumps(request_payload), | |
| # headers={"Content-Type": "application/json"} | |
| # ) | |
| response.raise_for_status() | |
| json_response = response.json() | |
| print(f"Response: {json_response}") | |
| return json_response | |
| except Exception as e: | |
| print(f"Error submitting answers: {e}") | |
| def arxiv_search(query: str) -> str: | |
| """Search Arxiv for a query and return maximum 3 result. | |
| Args: | |
| query: The search query.""" | |
| search_docs = ArxivLoader(query=query, load_max_docs=3).load() | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>' | |
| for doc in search_docs | |
| ] | |
| ) | |
| return {"arxiv_results": formatted_search_docs} | |
| def read_python_file(file_name: str) -> str: | |
| """Read a python file and return the content. | |
| Args: | |
| file_name: The name of the file to read. | |
| Returns: | |
| The content of the file. | |
| """ | |
| base_path = "data/question_files" | |
| with open(os.path.join(base_path, file_name), "r") as f: | |
| return f.read() | |
| def read_excel_file(file_name: str) -> str: | |
| """Read an excel file with xlsx extension and return the content. | |
| Args: | |
| file_name: The name of the file to handle. | |
| Returns: | |
| The content of the file. | |
| """ | |
| base_path = "data/question_files" | |
| df = pd.read_excel(os.path.join(base_path, file_name)) | |
| return df.to_string() | |
| def extract_text_from_image(image_path: str) -> str: | |
| """ | |
| Extract text from an image using pytesseract (if available). | |
| Args: | |
| image_path: Path to the image file | |
| Returns: | |
| Extracted text or error message | |
| """ | |
| try: | |
| # Try to import pytesseract | |
| import pytesseract | |
| from PIL import Image | |
| # Open the image | |
| image = Image.open(image_path) | |
| # Extract text | |
| text = pytesseract.image_to_string(image) | |
| print(f"Extracted text from image:\n\n{text}") | |
| return f"Extracted text from image:\n\n{text}" | |
| except ImportError: | |
| return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system." | |
| except Exception as e: | |
| return f"Error extracting text from image: {str(e)}" | |
| MODEL_ID = "ollama_chat/qwen2.5-coder:7b" | |
| secret_value = userdata.get('OPENAI_API_KEY_AG') | |
| API_KEY = os.getenv("OPENAI_API_KEY_AG") | |
| print(API_KEY, "HEELLLOOoooooo", os.getenv("OPENAI_API_KEY_AG")) | |
| # model = LiteLLMModel( | |
| # model_id=MODEL_ID, | |
| # api_base="http://127.0.0.1:11434", | |
| # num_ctx=8192, | |
| # ) | |
| model = OpenAIServerModel(model_id="gpt-4.1-nano", api_key=API_KEY) | |
| MODEL_ID = "openai/gpt-4.1-nano" | |
| with open("system_prompt.yaml", 'r') as stream: | |
| prompt_templates = yaml.safe_load(stream) | |
| agent = CodeAgent( | |
| model=model, | |
| tools=[ | |
| WebSearchTool(), | |
| VisitWebpageTool(), | |
| WikipediaSearchTool(), | |
| arxiv_search, | |
| FinalAnswerTool(), | |
| extract_text_from_image, | |
| #read_python_file, | |
| #read_excel_file | |
| ], | |
| planning_interval=3, | |
| max_steps=10, | |
| verbosity_level=-1, | |
| additional_authorized_imports=[ | |
| "pandas", | |
| "numpy", | |
| "requests", | |
| "os", | |
| "math", | |
| "sympy", | |
| "scipy", | |
| "markdownify", | |
| "unicodedata", | |
| "stat", | |
| "datetime", | |
| "random", | |
| "itertools", | |
| "statistics", | |
| "queue", | |
| "time", | |
| "collections", | |
| "re", | |
| ], | |
| add_base_tools=True, | |
| #prompt_templates=prompt_templates, | |
| ) | |
| questions = fetch_questions() | |
| answers = [] | |
| counter = 0 | |
| for index, question in enumerate(questions): | |
| # print(f"Question {index + 1}: Question Key: {question.keys()}") | |
| # print( | |
| # f"Task ID: {question['task_id']}\n" | |
| # f"Question: {question['question']}\n" | |
| # f"Level: {question['Level']}\n" | |
| # f"File_name: {question['file_name']}" | |
| # ) | |
| # if not question['file_name']: | |
| # continue | |
| if question['file_name']: | |
| file_content = fetch_file(question['task_id'], question['file_name']) | |
| file_path = os.path.join("data/question_files", question['file_name']) | |
| #print(f"File content: {file_content}") | |
| answer = agent.run( | |
| f"""You are a general AI assistant.You can use the provided tools and websearch for finding answers. I will ask you a question and provide you with a file_name. Report your thoughts, and finish your answer. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. | |
| question:{question['question']} | |
| file_path:{file_path}""", | |
| ) | |
| else: | |
| answer = agent.run( | |
| f"""You are a general AI assistant.You can use the provided tools and websearch for finding answers. I will ask you a question. Report your thoughts, and finish your answer. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. | |
| Question:{question['question']}""", | |
| ) | |
| print(f"Task ID: {question['task_id']} \nQuestion: {question['question']} \nAnswer: {answer}") | |
| print() | |
| answers.append( | |
| { | |
| "task_id": question['task_id'], | |
| "submitted_answer": answer | |
| } | |
| ) | |
| import json | |
| with open(f"data/answers_with_prompt_{MODEL_ID.split('/')[-1]}_with_file_content_handling.json", "w") as f: | |
| json.dump(answers, f, indent=2) | |
| print("Submitting answers...") | |
| submit_answers(answers) | |
| print("Answers submitted successfully") | |