Spaces:

igorpavlov-mgr
/

GAIA-Test-HF-Agent-Course

Sleeping

App Files Files Community

igorpavlov-mgr commited on May 13, 2025

Commit

65fb8cb

verified ·

1 Parent(s): e921749

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -482

app.py CHANGED Viewed

@@ -1,488 +1,9 @@
-# app-24.py
-# Final GAIA-compliant agent integrating RobotPai best practices + our advanced logic
-import os
-import re
-import json
-import base64
-import requests
-import pdfplumber
-import fitz  # PyMuPDF
-import tempfile
 import gradio as gr
-import pandas as pd
-from pydub import AudioSegment
-import speech_recognition as sr
-from io import BytesIO
-from langchain_core.messages import HumanMessage
-from langgraph.graph import StateGraph, END
-from langgraph.prebuilt import ToolNode
-from langchain.tools import tool
-from langchain.agents import tool as lc_tool
-from langchain_core.runnables import Runnable
-from langchain.agents.output_parsers import ReActSingleInputOutputParser
-from langchain.agents.format_scratchpad import format_to_openai_functions
-from langchain.agents.agent import AgentExecutor
-from langchain.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
-from langchain_core.prompts import SystemMessagePromptTemplate
-from langchain_core.prompts.chat import HumanMessagePromptTemplate
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.runnables import RunnableLambda
-from langchain_community.tools.tavily_search import TavilySearchResults
-from langchain_community.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
-from langchain_community.chat_models import ChatOpenAI
-from langchain_core.language_models.chat_models import BaseChatModel
-# =========================
-# AGENT STATE SCHEMA
-# =========================
-from typing import TypedDict, Optional, List, Tuple
-class AgentState(TypedDict, total=False):
-    question: str
-    planner_output: Optional[str]
-    tool_call: Optional[str]
-    tool_result: Optional[str]
-    answer: Optional[str]
-    replan: Optional[bool]
-    replan_count: int
-    debug_trace: List[str]
-# =========================
-# ENVIRONMENT & LLM SETUP
-# =========================
-openai_api_key = os.getenv("OPENAI_API_KEY", "")
-model_name = os.getenv("OPENAI_MODEL", "gpt-4-turbo")
-llm = ChatOpenAI(
-    model=model_name,
-    temperature=0.0,
-    openai_api_key=openai_api_key,
-    max_tokens=512
-)
-# =========================
-# File Download Function
-# =========================
-def download_file_from_gaia(task_id: str, file_name: str) -> str:
-    url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
-    response = requests.get(url)
-    if response.status_code == 200:
-        dir_path = os.path.expanduser("~/gaia_files")
-        os.makedirs(dir_path, exist_ok=True)
-        file_path = os.path.join(dir_path, file_name)
-        with open(file_path, "wb") as f:
-            f.write(response.content)
-        return file_path
-    else:
-        return f"/tmp/fake_{file_name}"
-# =========================
-# TOOL REGISTRY SECTION
-# =========================
-@tool
-def Calculator(expression: str) -> str:
-    """Evaluate a basic math expression like 15 / 100 * 80"""
-    try:
-        result = eval(expression, {"__builtins__": {}}, {})
-        return str(result)
-    except Exception as e:
-        return f"Error: {str(e)}"
-@tool
-def PythonExec(code: str) -> str:
-    """Evaluate basic Python code for logic and parsing. Avoid stateful ops."""
-    if not is_valid_python_code(code):
-        return "Invalid Python code."
-    try:
-        exec_globals = {}
-        exec(code, exec_globals)
-        return str(exec_globals.get("result", "Executed"))
-    except Exception as e:
-        return f"Error: {str(e)}"
-def is_valid_python_code(code: str) -> bool:
-    invalid_keywords = ["import", "open", "os", "sys", "socket", "subprocess"]
-    return not any(word in code for word in invalid_keywords)
-@tool
-def PDFReader(file_path: str) -> str:
-    """Extract up to 1000 characters of clean text from a PDF file."""
-    try:
-        text = ""
-        with pdfplumber.open(file_path) as pdf:
-            for page in pdf.pages:
-                text += page.extract_text() or ""
-                if len(text) > 1000:
-                    break
-        return text[:1000].strip()
-    except Exception:
-        try:
-            doc = fitz.open(file_path)
-            text = " ".join([page.get_text() for page in doc][:3])
-            return text[:1000].strip()
-        except Exception as e:
-            return f"Error: {str(e)}"
-@tool
-def ReadExcel(file_path: str) -> str:
-    """Return a summary of the Excel file content."""
-    try:
-        df = pd.read_excel(file_path)
-        preview = df.head().to_string()
-        return preview
-    except Exception as e:
-        return f"Error: {str(e)}"
-@tool
-def TranscribeAudio(file_path: str) -> str:
-    """Return the audio transcript (mp3 only)."""
-    try:
-        audio = AudioSegment.from_file(file_path)
-        audio.export("/tmp/tmp.wav", format="wav")
-        recognizer = sr.Recognizer()
-        with sr.AudioFile("/tmp/tmp.wav") as source:
-            audio_data = recognizer.record(source)
-        return recognizer.recognize_google(audio_data)
-    except Exception as e:
-        return f"Error: {str(e)}"
-@tool
-def YouTubeTranscript(url: str) -> str:
-    """Extract transcript text from a YouTube video (fallback simulation)."""
-    return f"Transcript of video {url} (not implemented)"
-@tool
-def DuckDuckGoSearch(query: str) -> str:
-    """Search the web using DuckDuckGo."""
-    try:
-        wrapper = DuckDuckGoSearchAPIWrapper()
-        results = wrapper.run(query)
-        return results
-    except Exception as e:
-        return f"Error: {str(e)}"
-# Tool registry list
-tools = [
-    Calculator,
-    PythonExec,
-    PDFReader,
-    ReadExcel,
-    TranscribeAudio,
-    YouTubeTranscript,
-    DuckDuckGoSearch,
-]
-# =========================
-# PLANNER NODE
-# =========================
-def is_valid_tool_call(output: str) -> bool:
-    """Check if the output is a valid tool call of the form ToolName[<input>]"""
-    return bool(re.match(r"^[A-Za-z_]+\[.*\]$", output.strip()))
-def planner_node(state: dict) -> dict:
-    question = state.get("question", "")
-    trace = state.get("debug_trace", [])
-    # Prompt with tool list and few-shot examples
-    prompt = (
-        "You are a ReAct-style planning agent. Choose the most suitable tool.\n"
-        "Respond using this format:\n"
-        "Thought: <reasoning>\nAction: ToolName[<input>]\n\n"
-        "Available tools:\n"
-        "- Calculator: Evaluate math expressions\n"
-        "- PythonExec: Run Python code\n"
-        "- PDFReader: Read content from PDF files\n"
-        "- ReadExcel: Parse Excel spreadsheets\n"
-        "- TranscribeAudio: Transcribe .mp3 audio\n"
-        "- YouTubeTranscript: Extract transcript from a video\n"
-        "- DuckDuckGoSearch: Search for web content\n\n"
-        "---\n"
-        "Question: What is 25% of 80?\n"
-        "Thought: I can calculate this with math.\n"
-        "Action: Calculator[25 / 100 * 80]\n\n"
-        "Question: What does the video say at https://youtube.com/watch?v=abc123?\n"
-        "Thought: I need the video transcript.\n"
-        "Action: YouTubeTranscript[https://youtube.com/watch?v=abc123]\n\n"
-        "Question: What is in the Excel file sales.xlsx?\n"
-        "Thought: I should read the Excel file.\n"
-        "Action: ReadExcel[/tmp/sales.xlsx]\n\n"
-        f"Question: {question}"
-    )
-    llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
-    result = llm.invoke(prompt)
-    result_text = result.content.strip()
-    # Extract Thought and Action
-    thought_match = re.search(r"Thought: (.*?)\n", result_text, re.DOTALL)
-    action_match = re.search(r"Action: (.*?)$", result_text.strip())
-    thought = thought_match.group(1).strip() if thought_match else ""
-    action = action_match.group(1).strip() if action_match else "INVALID"
-    trace.append(f"[Planner] Thought: {thought}")
-    trace.append(f"[Planner] Raw Action: {action}")
-    if not is_valid_tool_call(action):
-        trace.append("[Planner] Invalid format detected — replanning may be required.")
-        return {**state, "tool_call": None, "replan": True, "debug_trace": trace}
-    return {**state, "tool_call": action, "debug_trace": trace, "replan": False}
-# =========================
-# TOOL NODE (ReAct-style)
-# =========================
-from langgraph.prebuilt import ToolExecutor
-tool_executor = ToolExecutor(tools)
-def tool_node(state: dict) -> dict:
-    tool_call = state.get("tool_call")
-    trace = state.get("debug_trace", [])
-    if not tool_call:
-        trace.append("[ToolNode] No tool call provided.")
-        return {**state, "tool_result": None, "debug_trace": trace}
-    try:
-        tool_name, tool_input = re.match(r"([A-Za-z_]+)\[(.*)\]", tool_call).groups()
-        tool_input = tool_input.strip()
-        result = tool_executor.invoke({"tool": tool_name, "tool_input": tool_input})
-        trace.append(f"[ToolNode] Tool used: {tool_name}")
-        trace.append(f"[ToolNode] Input: {tool_input[:250]}")
-        trace.append(f"[ToolNode] Observation: {str(result)[:250]}")
-        return {**state, "tool_result": str(result), "debug_trace": trace}
-    except Exception as e:
-        trace.append(f"[ToolNode] Error invoking tool: {str(e)}")
-        return {**state, "tool_result": None, "debug_trace": trace}
-# =========================
-# FINALIZER NODE
-# =========================
-def clean_final_answer(question: str, result: str, trace: list) -> str:
-    """Apply GAIA-safe formatting rules to tool output."""
-    answer = result.strip()
-    # First name trimming
-    if re.search(r"first name", question, re.IGNORECASE):
-        words = answer.split()
-        if len(words) > 1:
-            answer = words[0]
-            trace.append("[Finalizer] Heuristic: Trimmed to first name.")
-    # Quote simulation fallback (if output in quotes)
-    quote_match = re.findall(r'"([^"]{1,40})"', answer)
-    if quote_match:
-        answer = quote_match[0]
-        trace.append("[Finalizer] Heuristic: Quote selected as answer.")
-    # Year counting (e.g., for discography)
-    if re.search(r"how many .*\b(years|albums|times)\b", question, re.IGNORECASE):
-        years = re.findall(r"\b(19|20)\d{2}\b", answer)
-        if years:
-            answer = str(len(years))
-            trace.append("[Finalizer] Heuristic: Counted year mentions.")
-    # Defunct country parsing
-    if re.search(r"born in.*\b(USSR|Yugoslavia|Czechoslovakia)\b", question, re.IGNORECASE):
-        m = re.search(r"\b[A-Z][a-z]+\b", answer)
-        if m:
-            answer = m.group(0)
-            trace.append("[Finalizer] Heuristic: Extracted name from defunct country context.")
-    # Final trim and return
-    return answer.strip()
-def finalizer_node(state: dict) -> dict:
-    question = state.get("question", "")
-    tool_result = state.get("tool_result", "")
-    trace = state.get("debug_trace", [])
-    answer = clean_final_answer(question, tool_result, trace)
-    trace.append(f"[Finalizer] Final Answer: {answer}")
-    return {**state, "answer": answer, "debug_trace": trace}
-# =========================
-# BASIC AGENT CLASS
-# =========================
-class BasicAgent:
-    def __init__(self, graph):
-        self.graph = graph
-    def __call__(self, question: str) -> str:
-        state = {"question": question, "debug_trace": []}
-        result = self.graph.invoke(state)
-        return result.get("answer", "Error"), result.get("debug_trace", [])
-agent = BasicAgent(compiled_graph)
-# =========================
-# GRAPH DEFINITION
-# =========================
-def build_graph():
-    graph = StateGraph()
-    graph.add_node("planner", planner_node)
-    graph.add_node("tool", tool_node)
-    graph.add_node("finalizer", finalizer_node)
-    graph.set_entry_point("planner")
-    graph.add_edge("planner", "tool")
-    graph.add_edge("tool", "finalizer")
-    graph.set_finish_point("finalizer")
-    return graph.compile()
-print("✅ app.py loaded")
-try:
-    compiled_graph = build_graph()
-    print("✅ Graph compiled")
-    agent = BasicAgent(compiled_graph)
-    print("✅ Agent ready")
-except Exception as e:
-    import traceback
-    print("❌ Agent init failed:")
-    print(traceback.format_exc())
-# =========================
-# GAIA RUNNERS FOR SUBMISSION
-# =========================
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-def run_gaia_agent(question: str) -> str:
-    answer, _ = agent(question)
-    return answer or "Final Answer: [ERROR] Missing."
-def run_and_submit_all(profile: gr.OAuthProfile | None):
-    import pandas as pd
-    import requests
-    if not profile:
-        return "Please Login to Hugging Face with the button.", None
-    username = profile.username
-    space_id = os.getenv("SPACE_ID", "unknown-space-id")
-    questions_url = f"{DEFAULT_API_URL}/questions"
-    submit_url = f"{DEFAULT_API_URL}/submit"
-    try:
-        questions_data = requests.get(questions_url, timeout=15).json()
-    except Exception as e:
-        return f"Error fetching questions: {e}", None
-    results_log, answers_payload = [], []
-    for item in questions_data:
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or not question_text:
-            continue
-        try:
-            submitted_answer = run_gaia_agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-        except Exception as e:
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
-    submission_data = {
-        "username": username.strip(),
-        "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
-        "answers": answers_payload,
-    }
-    try:
-        response = requests.post(submit_url, json=submission_data, timeout=60).json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {response.get('username')}\n"
-            f"Score: {response.get('score')}% "
-            f"({response.get('correct_count')}/{response.get('total_attempted')} correct)\n"
-            f"Message: {response.get('message', 'No message')}"
-        )
-        return final_status, pd.DataFrame(results_log)
-    except Exception as e:
-        return f"Submission failed: {e}", pd.DataFrame(results_log)
-# =========================
-# UI + GAIA SUBMISSION ENTRY POINT
-# =========================
-def debug_single_question(q):
-    try:
-        result = compiled_graph.invoke({"question": q})
-        trace = "\n".join(result.get("debug_trace", []))
-        answer = result["answer"]
-        # Format checks (debug only)
-        format_warnings = []
-        if "," in answer:
-            parts = [x.strip() for x in answer.split(",")]
-            if [p.lower() for p in parts] != sorted([p.lower() for p in parts]):
-                format_warnings.append("List is not alphabetically sorted.")
-        if len(answer.split()) == 2:
-            format_warnings.append("Full name detected; question may require first name only.")
-        if answer.lower().strip().startswith("final answer:"):
-            format_warnings.append("Do not include 'Final Answer:' prefix in result.")
-        if any(ord(c) > 127 for c in answer):
-            format_warnings.append("Non-ASCII characters found in result.")
-        if format_warnings:
-            trace += "\n\n⚠️ **Format Warning(s):**\n- " + "\n- ".join(format_warnings)
-        return answer, trace
-    except Exception as e:
-        import traceback
-        return "Error", traceback.format_exc()
-with gr.Blocks() as demo:
-    gr.Markdown("# GAIA Agent with Debug & Submission UI")
-    # --- Debug UI ---
-    question_box = gr.Textbox(label='Enter a GAIA Question')
-    ask_button = gr.Button('Run Agent')
-    answer_output = gr.Textbox(label='Final Answer')
-    debug_output = gr.Textbox(label='Planner / Tool / Finalizer Trace', lines=20)
-    ask_button.click(fn=debug_single_question, inputs=question_box, outputs=[answer_output, debug_output])
-    # --- File Preview UI ---
-    task_id_box = gr.Textbox(label='GAIA Task ID (for File Download)')
-    file_name_box = gr.Textbox(label='File Name (e.g., doc.pdf)')
-    download_button = gr.Button("Download File and Get Base64")
-    base64_output = gr.Textbox(label="Base64 Download Link", lines=2)
-    def get_base64_file_link(task_id, file_name):
-        path = download_file_from_gaia(task_id, file_name)
-        if os.path.exists(path):
-            with open(path, "rb") as f:
-                encoded = base64.b64encode(f.read()).decode("utf-8")
-                link = f"data:application/octet-stream;base64,{encoded}"
-                return link
-        return "Error downloading file."
-    download_button.click(fn=get_base64_file_link, inputs=[task_id_box, file_name_box], outputs=base64_output)
-    # === GAIA Submission UI
-    gr.Markdown("## Submit GAIA Benchmark")
-    gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
     print("✅ Gradio demo launching...")
-    demo.launch()

 import gradio as gr
+print("✅ Minimal app.py reached")
+demo = gr.Interface(fn=lambda x: x.upper(), inputs="text", outputs="text")
 if __name__ == "__main__":
     print("✅ Gradio demo launching...")
+    demo.launch()