Spaces:

GoReed
/

test_agent

Runtime error

File size: 6,194 Bytes

# agent.py

import os
import json
import pandas as pd
from smolagents import (
    CodeAgent, 
    LiteLLMModel, 
    DuckDuckGoSearchTool, 
    FinalAnswerTool,
    VisitWebpageTool,
    WikipediaSearchTool,
    WebSearchTool,
    tool,
    OpenAIServerModel
)
from langchain_community.document_loaders import ArxivLoader
from dotenv import load_dotenv
import requests
import yaml

load_dotenv()

# Custom tools
@tool
def arxiv_search(query: str) -> str:
    """
    Search Arxiv for a query and return up to 3 documents.

    Args:
        query (str): The search query to run on Arxiv.

    Returns:
        str: Formatted Arxiv document summaries.
    """
    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
    return "\n\n---\n\n".join([
        f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
        for doc in search_docs
    ])

@tool
def extract_text_from_image(image_path: str) -> str:
    """
    Extract text from an image using pytesseract.

    Args:
        image_path (str): Path to the image file.

    Returns:
        str: Extracted text or error message.
    """
    try:
        import pytesseract
        from PIL import Image
        image = Image.open(image_path)
        text = pytesseract.image_to_string(image)
        return f"Extracted text from image:\n\n{text}"
    except ImportError:
        return "Error: pytesseract is not installed."
    except Exception as e:
        return f"Error extracting text: {str(e)}"
@tool
def read_python_file(file_name: str) -> str:
    """
    Read a Python (.py) file and return its content.

    Args:
        file_name (str): The file name of the Python script to read.

    Returns:
        str: The contents of the file as a string.
    """
    base_path = "data/question_files"
    with open(os.path.join(base_path, file_name), "r") as f:
        return f.read()
@tool
def get_youtube_transcript(video_id:str)-> str:
    """
    Retrieves the transcript for a given YouTube video.

    Args:
        video_id: The ID of the YouTube video.

    Returns:
        A list of dictionaries, where each dictionary represents a transcript segment
        and contains 'text' and 'start' keys, or None if no transcript is found.
    """
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return transcript
    except Exception as e:
        # print(f"Error getting transcript: {e}")
        return f"Error getting transcript: {e}"

@tool
def read_excel_file(file_name: str) -> str:
    """
    Read an Excel (.xlsx) file and return its tabular content as a string.

    Args:
        file_name (str): The Excel file to read.

    Returns:
        str: The content of the Excel file in plain text format.
    """
    base_path = "data/question_files"
    df = pd.read_excel(os.path.join(base_path, file_name))
    return df.to_string()


# Model and agent setup
API_KEY = os.getenv("OPENAI_API_KEY_AG")
MODEL_ID = "openai/gpt-4.1-nano"

model = OpenAIServerModel(model_id="gpt-4.1-nano", api_key=API_KEY)

agent = CodeAgent(
    model=model,
    tools=[
        WebSearchTool(),
        VisitWebpageTool(),
        WikipediaSearchTool(),
        arxiv_search,
        FinalAnswerTool(),
        extract_text_from_image,
        read_excel_file,
        read_python_file,
        get_youtube_transcript,
    
    ],
    planning_interval=3,
    max_steps=10,
    verbosity_level=-1,
    additional_authorized_imports=[
        "pandas", "numpy", "requests", "os", "math", "sympy", "scipy",
        "markdownify", "unicodedata", "stat", "datetime", "random", "itertools",
        "statistics", "queue", "time", "collections", "re"
    ],
    add_base_tools=True
)

def fetch_questions():
    DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
    try:
        response = requests.get(f"{DEFAULT_API_URL}/questions")
        response.raise_for_status()
        data = response.json()
        return data
    except Exception as e:
        print(f"Error fetching questions: {e}")
        return []

def fetch_file(task_id: str, file_name: str):
    DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
    try:
        response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
        response.raise_for_status()
        os.makedirs("data/question_files", exist_ok=True)
        path = f"data/question_files/{file_name}"
        with open(path, "wb") as f:
            f.write(response.content)
        return path
    except Exception as e:
        print(f"Error fetching file: {e}")
        return None

def run_agent_on_question(q):
    if q.get("file_name"):
        file_path = fetch_file(q["task_id"], q["file_name"])
        prompt = f"""You are a general AI assistant. Use tools and web search as needed.
Question: {q['question']}
file_path: {file_path}
YOUR FINAL ANSWER should be a number OR few words OR comma-separated values. Follow instructions strictly."""
    else:
        prompt = f"""You are a general AI assistant. Use tools and web search as needed.
Question: {q['question']}
YOUR FINAL ANSWER should be a number OR few words OR comma-separated values. Follow instructions strictly."""
    
    output = agent.run(prompt)

    # Optional: Extract final answer if embedded in logs
    if isinstance(output, str):
        return output.strip()
    elif isinstance(output, dict):
        return output.get("final_answer", str(output))
    else:
        return str(output)

def submit_answers(answers):
    DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
    request_payload = {
        "username": "GoReed",
        "agent_code": "test_answers_agent_code",
        "answers": answers
    }
    try:
        response = requests.post(
            f"{DEFAULT_API_URL}/submit",
            json=request_payload  # ✅ FIXED
        )
        response.raise_for_status()
        print("✅ Submission success:", response.json())
    except requests.exceptions.HTTPError as http_err:
        print(f"❌ HTTP Error: {http_err}")
        print("Response text:", response.text)
    except Exception as e:
        print(f"❌ Submission Error: {e}")