Spaces:

agents-course
/

Final_Assignment_Template

Running

File size: 14,112 Bytes

import os
import gradio as gr
import requests
import pandas as pd
import re
import json
import math
import unicodedata
from datetime import datetime

# --- LangGraph + LangChain imports ---
from langgraph.prebuilt import create_react_agent
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_core.tools import tool
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_core.messages import SystemMessage

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# ─────────────────────────────────────────────
#  TOOLS
# ─────────────────────────────────────────────

@tool
def web_search(query: str) -> str:
    """Search the web using DuckDuckGo. Use for current events, facts, and general knowledge."""
    try:
        search = DuckDuckGoSearchRun()
        return search.run(query)
    except Exception as e:
        return f"Search error: {e}"


@tool
def wikipedia_search(query: str) -> str:
    """Search Wikipedia for encyclopedic knowledge, historical facts, biographies, science."""
    try:
        wiki = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=3000)
        return wiki.run(query)
    except Exception as e:
        return f"Wikipedia error: {e}"


@tool
def python_repl(code: str) -> str:
    """
    Execute Python code for math calculations, data processing, logic.
    Always print() the final result.
    Example: print(2 + 2)
    """
    import io, sys, math, json, re, unicodedata, datetime
    old_stdout = sys.stdout
    sys.stdout = io.StringIO()
    try:
        exec(code, {
            "math": math, "json": json, "re": re,
            "unicodedata": unicodedata, "datetime": datetime,
            "__builtins__": __builtins__
        })
        output = sys.stdout.getvalue()
        return output.strip() if output.strip() else "Code executed (no output). Use print() to see results."
    except Exception as e:
        return f"Code error: {e}"
    finally:
        sys.stdout = old_stdout


@tool
def read_file_from_url(url: str) -> str:
    """
    Download and read a file from a URL (txt, csv, json, py, etc.).
    Returns the file content as text.
    """
    try:
        response = requests.get(url, timeout=15)
        response.raise_for_status()
        content_type = response.headers.get("Content-Type", "")
        if "text" in content_type or "json" in content_type:
            return response.text[:5000]
        else:
            return f"Binary file ({content_type}), cannot read as text."
    except Exception as e:
        return f"Error reading file: {e}"


@tool
def get_task_file(task_id: str) -> str:
    """
    Fetch the file associated with a GAIA task by its task_id.
    Returns file content or description.
    """
    try:
        api_url = "https://agents-course-unit4-scoring.hf.space"
        url = f"{api_url}/files/{task_id}"
        response = requests.get(url, timeout=15)
        if response.status_code == 200:
            content_type = response.headers.get("Content-Type", "")
            if "text" in content_type or "json" in content_type:
                return response.text[:5000]
            elif "image" in content_type:
                return f"[Image file attached to task {task_id} - content-type: {content_type}]"
            elif "audio" in content_type:
                return f"[Audio file attached to task {task_id} - content-type: {content_type}]"
            else:
                return f"[File attached: {content_type}]"
        else:
            return f"No file found for task {task_id}"
    except Exception as e:
        return f"Error fetching task file: {e}"


@tool
def calculator(expression: str) -> str:
    """
    Evaluate a simple math expression safely.
    Examples: '2 + 2', '100 * 1.07 ** 5', 'math.sqrt(144)'
    """
    try:
        result = eval(expression, {"math": math, "__builtins__": {}})
        return str(result)
    except Exception as e:
        return f"Calculation error: {e}. Try python_repl for complex code."


# ─────────────────────────────────────────────
#  SYSTEM PROMPT
# ─────────────────────────────────────────────

SYSTEM_PROMPT = """You are a precise, expert AI assistant solving GAIA benchmark questions.

GAIA questions require careful reasoning and often multiple steps. Follow these rules:

## Answer Format (CRITICAL)
- Your FINAL answer must be the **bare minimum**: a number, a word, a name, a date, a short phrase.
- NO explanations, NO punctuation at the end, NO "The answer is...", NO sentences.
- Examples of correct final answers: `42`, `Marie Curie`, `Paris`, `1969`, `blue`, `$14.50`
- For lists, separate items with commas: `item1, item2, item3`

## Strategy
1. **Read carefully** – identify exactly what is being asked.
2. **Use tools** – search the web, Wikipedia, or run code to verify facts.
3. **Verify numbers** – always double-check calculations with the calculator or python_repl.
4. **Check for files** – if the question mentions an attachment or file, use get_task_file.
5. **Be specific** – GAIA answers are exact; approximate answers are wrong.

## Tool Usage
- Use `web_search` for recent events, facts, and general knowledge.
- Use `wikipedia_search` for biographies, history, science.
- Use `python_repl` for calculations, data manipulation, logic puzzles.
- Use `calculator` for quick arithmetic.
- Use `get_task_file` when a question refers to an attached file or document.

## Final Answer
Always end your response with:
FINAL ANSWER: <your answer here>
"""

# ─────────────────────────────────────────────
#  AGENT
# ─────────────────────────────────────────────

class BasicAgent:
    def __init__(self):
        print("Initializing LangGraph ReAct Agent with Llama 3.3 70B...")

        hf_token = os.getenv("HF_TOKEN")

        llm_endpoint = HuggingFaceEndpoint(
            repo_id="meta-llama/Llama-3.3-70B-Instruct",
            huggingfacehub_api_token=hf_token,
            task="text-generation",
            max_new_tokens=1024,
            temperature=0.1,
            do_sample=False,
        )
        llm = ChatHuggingFace(llm=llm_endpoint)

        tools = [
            web_search,
            wikipedia_search,
            python_repl,
            calculator,
            read_file_from_url,
            get_task_file,
        ]

        self.agent = create_react_agent(
            model=llm,
            tools=tools,
            state_modifier=SYSTEM_PROMPT,
        )

        print("Agent ready.")

    def __call__(self, question: str) -> str:
        print(f"\n[AGENT] Question: {question[:100]}...")
        try:
            result = self.agent.invoke({
                "messages": [("user", question)]
            })

            # Extract last AI message
            last_message = result["messages"][-1].content
            print(f"[AGENT] Raw output: {last_message[:200]}...")

            # Extract FINAL ANSWER if present
            answer = self._extract_final_answer(last_message)
            print(f"[AGENT] Final answer: {answer}")
            return answer

        except Exception as e:
            print(f"[AGENT] Error: {e}")
            return f"Error: {e}"

    def _extract_final_answer(self, text: str) -> str:
        """Extract the FINAL ANSWER from agent output."""
        # Try to find "FINAL ANSWER: ..." pattern
        patterns = [
            r"FINAL ANSWER:\s*(.+?)(?:\n|$)",
            r"Final Answer:\s*(.+?)(?:\n|$)",
            r"final answer:\s*(.+?)(?:\n|$)",
        ]
        for pattern in patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                return match.group(1).strip()

        # Fallback: return last non-empty line
        lines = [l.strip() for l in text.strip().split("\n") if l.strip()]
        return lines[-1] if lines else text.strip()


# ─────────────────────────────────────────────
#  GRADIO RUNNER
# ─────────────────────────────────────────────

def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")

    if profile:
        username = f"{profile.username}"
        print(f"User logged in: {username}")
    else:
        print("User not logged in.")
        return "Please Login to Hugging Face with the button.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # 1. Init Agent
    try:
        agent = BasicAgent()
    except Exception as e:
        print(f"Error instantiating agent: {e}")
        return f"Error initializing agent: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    print(f"Agent code: {agent_code}")

    # 2. Fetch Questions
    print(f"Fetching questions from: {questions_url}")
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            return "Fetched questions list is empty or invalid format.", None
        print(f"Fetched {len(questions_data)} questions.")
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # 3. Run Agent
    results_log = []
    answers_payload = []
    print(f"Running agent on {len(questions_data)} questions...")

    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            print(f"Skipping item with missing task_id or question: {item}")
            continue
        try:
            submitted_answer = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({
                "Task ID": task_id,
                "Question": question_text[:100],
                "Submitted Answer": submitted_answer
            })
        except Exception as e:
            print(f"Error on task {task_id}: {e}")
            results_log.append({
                "Task ID": task_id,
                "Question": question_text[:100],
                "Submitted Answer": f"AGENT ERROR: {e}"
            })

    if not answers_payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    # 4. Submit
    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code,
        "answers": answers_payload
    }
    print(f"Submitting {len(answers_payload)} answers...")

    try:
        response = requests.post(submit_url, json=submission_data, timeout=120)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        print("Submission successful.")
        return final_status, pd.DataFrame(results_log)
    except requests.exceptions.HTTPError as e:
        error_detail = f"Server responded with status {e.response.status_code}."
        try:
            error_json = e.response.json()
            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
        except Exception:
            error_detail += f" Response: {e.response.text[:500]}"
        return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(results_log)


# ─────────────────────────────────────────────
#  GRADIO UI
# ─────────────────────────────────────────────

with gr.Blocks() as demo:
    gr.Markdown("# 🤖 GAIA Agent — LangGraph + Llama 3.3 70B")
    gr.Markdown("""
**Stack:** LangGraph ReAct · Llama 3.3 70B (HF Inference) · DuckDuckGo · Wikipedia · Python REPL

**Instructions:**
1. Log in with your HuggingFace account below.
2. Make sure `HF_TOKEN` is set as a Space secret (with access to Llama 3.3 70B).
3. Click **Run Evaluation & Submit All Answers**.

> ⚠️ The run can take several minutes — the agent reasons through each question step by step.
    """)

    gr.LoginButton()

    run_button = gr.Button("▶️ Run Evaluation & Submit All Answers", variant="primary")

    status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_button.click(
        fn=run_and_submit_all,
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
    space_host = os.getenv("SPACE_HOST")
    space_id = os.getenv("SPACE_ID")
    if space_host:
        print(f"✅ SPACE_HOST: {space_host}")
    if space_id:
        print(f"✅ SPACE_ID: {space_id}")
        print(f"   Repo: https://huggingface.co/spaces/{space_id}/tree/main")
    print("-" * 60 + "\n")
    demo.launch(debug=True, share=False)