Spaces:

GoReed
/

test_agent

Runtime error

File size: 8,658 Bytes

import os
import pandas as pd
from smolagents import (
    CodeAgent, 
    LiteLLMModel, 
    DuckDuckGoSearchTool, 
    FinalAnswerTool,
    VisitWebpageTool,
    WikipediaSearchTool,
    WebSearchTool,
    tool,
    OpenAIServerModel
)
from langchain_community.document_loaders import ArxivLoader
from google.colab import userdata
    
import requests
import yaml
from dotenv import load_dotenv
load_dotenv()


def fetch_questions():
    DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
    try:
        response = requests.get(f"{DEFAULT_API_URL}/questions")
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            print("Fetched questions list is empty.")
            return "Fetched questions list is empty or invalid format.", None
        print(f"Fetched {len(questions_data)} questions.")
        return questions_data
    except Exception as e:
        print(f"Error fetching questions: {e}")
        raise e
    
def fetch_file(task_id: str, file_name: str):
    DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
    try:
        response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
        response.raise_for_status()
        with open(f"data/question_files/{file_name}", "wb") as f:
            f.write(response.content)
        file_content = response.content
        return file_content
    except Exception as e:
        print(f"Error fetching file: {e}")
        raise e
    

def submit_answers(answers):
    DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
    request_payload = {
        "username": "GoReed",
        "agent_code": "test",
        "answers": answers
    }
    try:
        response = requests.post(
            f"{DEFAULT_API_URL}/submit",
                json=request_payload
        )
            # json=json.dumps(request_payload),
            # headers={"Content-Type": "application/json"}
        # )
        response.raise_for_status()
        json_response = response.json()
        print(f"Response: {json_response}")
        return json_response
    except Exception as e:
        print(f"Error submitting answers: {e}")

@tool
def arxiv_search(query: str) -> str:
    """Search Arxiv for a query and return maximum 3 result.
    Args:
        query: The search query."""
    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
            for doc in search_docs
        ]
    )
    return {"arxiv_results": formatted_search_docs}

@tool
def read_python_file(file_name: str) -> str:
    """Read a python file and return the content.
    Args:
        file_name: The name of the file to read.
    Returns:
        The content of the file.
    """
    base_path = "data/question_files"
    with open(os.path.join(base_path, file_name), "r") as f:
        return f.read()

@tool
def read_excel_file(file_name: str) -> str:
    """Read an excel file with xlsx extension and return the content.
    Args:
        file_name: The name of the file to handle.
    Returns:
        The content of the file.
    """
    base_path = "data/question_files"
    df = pd.read_excel(os.path.join(base_path, file_name))
    return df.to_string()

@tool
def extract_text_from_image(image_path: str) -> str:
    """
    Extract text from an image using pytesseract (if available).
    
    Args:
        image_path: Path to the image file
        
    Returns:
        Extracted text or error message
    """
    try:
        # Try to import pytesseract
        import pytesseract
        from PIL import Image
        
        # Open the image
        image = Image.open(image_path)
        
        # Extract text
        text = pytesseract.image_to_string(image)
        print(f"Extracted text from image:\n\n{text}")
        return f"Extracted text from image:\n\n{text}"
    except ImportError:
        return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
    except Exception as e:
        return f"Error extracting text from image: {str(e)}"

MODEL_ID = "ollama_chat/qwen2.5-coder:7b"
secret_value = userdata.get('OPENAI_API_KEY_AG')
API_KEY = os.getenv("OPENAI_API_KEY_AG")
print(API_KEY, "HEELLLOOoooooo", os.getenv("OPENAI_API_KEY_AG"))
# model = LiteLLMModel(
#     model_id=MODEL_ID,
#     api_base="http://127.0.0.1:11434",
#     num_ctx=8192,
# )
model = OpenAIServerModel(model_id="gpt-4.1-nano", api_key=API_KEY)
MODEL_ID = "openai/gpt-4.1-nano"

with open("system_prompt.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)

agent = CodeAgent(
    model=model,
    tools=[
        WebSearchTool(),
        VisitWebpageTool(),
        WikipediaSearchTool(),
        arxiv_search,
        FinalAnswerTool(),
        extract_text_from_image,
        #read_python_file,
        #read_excel_file
    ],
    planning_interval=3,
    max_steps=10,
    verbosity_level=-1,
    additional_authorized_imports=[
                "pandas",
                "numpy",
                "requests",
                "os",
                "math",
                "sympy",
                "scipy",
                "markdownify",
                "unicodedata",
                "stat",
                "datetime",
                "random",
                "itertools",
                "statistics",
                "queue",
                "time",
                "collections",
                "re",
            ],
    add_base_tools=True,
    #prompt_templates=prompt_templates,
)
questions = fetch_questions()
answers = []
counter = 0
for index, question in enumerate(questions):
    # print(f"Question {index + 1}: Question Key: {question.keys()}")
    # print(
    #     f"Task ID: {question['task_id']}\n"
    #     f"Question: {question['question']}\n"
    #     f"Level: {question['Level']}\n"
    #     f"File_name: {question['file_name']}"
    # )
    # if not question['file_name']:
    #     continue
    if question['file_name']:
        file_content = fetch_file(question['task_id'], question['file_name'])
        file_path = os.path.join("data/question_files", question['file_name'])
        #print(f"File content: {file_content}")
        answer = agent.run(
                f"""You are a general AI assistant.You can use the provided tools and websearch for finding answers. I will ask you a question and provide you with a file_name. Report your thoughts, and finish your answer. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
                question:{question['question']}
                file_path:{file_path}""",
        )
    else:
        answer = agent.run(
                f"""You are a general AI assistant.You can use the provided tools and websearch for finding answers. I will ask you a question. Report your thoughts, and finish your answer. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
                Question:{question['question']}""",
        )
    print(f"Task ID: {question['task_id']} \nQuestion: {question['question']} \nAnswer: {answer}")
    print()
    answers.append(
        {
            "task_id": question['task_id'],
            "submitted_answer": answer
        }
    )
import json
with open(f"data/answers_with_prompt_{MODEL_ID.split('/')[-1]}_with_file_content_handling.json", "w") as f:
    json.dump(answers, f, indent=2)
print("Submitting answers...")
submit_answers(answers)
print("Answers submitted successfully")