import os import gradio as gr import requests import pandas as pd import re import json import math import unicodedata from datetime import datetime # --- LangGraph + LangChain imports --- from langgraph.prebuilt import create_react_agent from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint from langchain_core.tools import tool from langchain_community.tools import DuckDuckGoSearchRun from langchain_community.utilities import WikipediaAPIWrapper from langchain_core.messages import SystemMessage # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # ───────────────────────────────────────────── # TOOLS # ───────────────────────────────────────────── @tool def web_search(query: str) -> str: """Search the web using DuckDuckGo. Use for current events, facts, and general knowledge.""" try: search = DuckDuckGoSearchRun() return search.run(query) except Exception as e: return f"Search error: {e}" @tool def wikipedia_search(query: str) -> str: """Search Wikipedia for encyclopedic knowledge, historical facts, biographies, science.""" try: wiki = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=3000) return wiki.run(query) except Exception as e: return f"Wikipedia error: {e}" @tool def python_repl(code: str) -> str: """ Execute Python code for math calculations, data processing, logic. Always print() the final result. Example: print(2 + 2) """ import io, sys, math, json, re, unicodedata, datetime old_stdout = sys.stdout sys.stdout = io.StringIO() try: exec(code, { "math": math, "json": json, "re": re, "unicodedata": unicodedata, "datetime": datetime, "__builtins__": __builtins__ }) output = sys.stdout.getvalue() return output.strip() if output.strip() else "Code executed (no output). Use print() to see results." except Exception as e: return f"Code error: {e}" finally: sys.stdout = old_stdout @tool def read_file_from_url(url: str) -> str: """ Download and read a file from a URL (txt, csv, json, py, etc.). Returns the file content as text. """ try: response = requests.get(url, timeout=15) response.raise_for_status() content_type = response.headers.get("Content-Type", "") if "text" in content_type or "json" in content_type: return response.text[:5000] else: return f"Binary file ({content_type}), cannot read as text." except Exception as e: return f"Error reading file: {e}" @tool def get_task_file(task_id: str) -> str: """ Fetch the file associated with a GAIA task by its task_id. Returns file content or description. """ try: api_url = "https://agents-course-unit4-scoring.hf.space" url = f"{api_url}/files/{task_id}" response = requests.get(url, timeout=15) if response.status_code == 200: content_type = response.headers.get("Content-Type", "") if "text" in content_type or "json" in content_type: return response.text[:5000] elif "image" in content_type: return f"[Image file attached to task {task_id} - content-type: {content_type}]" elif "audio" in content_type: return f"[Audio file attached to task {task_id} - content-type: {content_type}]" else: return f"[File attached: {content_type}]" else: return f"No file found for task {task_id}" except Exception as e: return f"Error fetching task file: {e}" @tool def calculator(expression: str) -> str: """ Evaluate a simple math expression safely. Examples: '2 + 2', '100 * 1.07 ** 5', 'math.sqrt(144)' """ try: result = eval(expression, {"math": math, "__builtins__": {}}) return str(result) except Exception as e: return f"Calculation error: {e}. Try python_repl for complex code." # ───────────────────────────────────────────── # SYSTEM PROMPT # ───────────────────────────────────────────── SYSTEM_PROMPT = """You are a precise, expert AI assistant solving GAIA benchmark questions. GAIA questions require careful reasoning and often multiple steps. Follow these rules: ## Answer Format (CRITICAL) - Your FINAL answer must be the **bare minimum**: a number, a word, a name, a date, a short phrase. - NO explanations, NO punctuation at the end, NO "The answer is...", NO sentences. - Examples of correct final answers: `42`, `Marie Curie`, `Paris`, `1969`, `blue`, `$14.50` - For lists, separate items with commas: `item1, item2, item3` ## Strategy 1. **Read carefully** – identify exactly what is being asked. 2. **Use tools** – search the web, Wikipedia, or run code to verify facts. 3. **Verify numbers** – always double-check calculations with the calculator or python_repl. 4. **Check for files** – if the question mentions an attachment or file, use get_task_file. 5. **Be specific** – GAIA answers are exact; approximate answers are wrong. ## Tool Usage - Use `web_search` for recent events, facts, and general knowledge. - Use `wikipedia_search` for biographies, history, science. - Use `python_repl` for calculations, data manipulation, logic puzzles. - Use `calculator` for quick arithmetic. - Use `get_task_file` when a question refers to an attached file or document. ## Final Answer Always end your response with: FINAL ANSWER: """ # ───────────────────────────────────────────── # AGENT # ───────────────────────────────────────────── class BasicAgent: def __init__(self): print("Initializing LangGraph ReAct Agent with Llama 3.3 70B...") hf_token = os.getenv("HF_TOKEN") llm_endpoint = HuggingFaceEndpoint( repo_id="meta-llama/Llama-3.3-70B-Instruct", huggingfacehub_api_token=hf_token, task="text-generation", max_new_tokens=1024, temperature=0.1, do_sample=False, ) llm = ChatHuggingFace(llm=llm_endpoint) tools = [ web_search, wikipedia_search, python_repl, calculator, read_file_from_url, get_task_file, ] self.agent = create_react_agent( model=llm, tools=tools, state_modifier=SYSTEM_PROMPT, ) print("Agent ready.") def __call__(self, question: str) -> str: print(f"\n[AGENT] Question: {question[:100]}...") try: result = self.agent.invoke({ "messages": [("user", question)] }) # Extract last AI message last_message = result["messages"][-1].content print(f"[AGENT] Raw output: {last_message[:200]}...") # Extract FINAL ANSWER if present answer = self._extract_final_answer(last_message) print(f"[AGENT] Final answer: {answer}") return answer except Exception as e: print(f"[AGENT] Error: {e}") return f"Error: {e}" def _extract_final_answer(self, text: str) -> str: """Extract the FINAL ANSWER from agent output.""" # Try to find "FINAL ANSWER: ..." pattern patterns = [ r"FINAL ANSWER:\s*(.+?)(?:\n|$)", r"Final Answer:\s*(.+?)(?:\n|$)", r"final answer:\s*(.+?)(?:\n|$)", ] for pattern in patterns: match = re.search(pattern, text, re.IGNORECASE) if match: return match.group(1).strip() # Fallback: return last non-empty line lines = [l.strip() for l in text.strip().split("\n") if l.strip()] return lines[-1] if lines else text.strip() # ───────────────────────────────────────────── # GRADIO RUNNER # ───────────────────────────────────────────── def run_and_submit_all(profile: gr.OAuthProfile | None): space_id = os.getenv("SPACE_ID") if profile: username = f"{profile.username}" print(f"User logged in: {username}") else: print("User not logged in.") return "Please Login to Hugging Face with the button.", None api_url = DEFAULT_API_URL questions_url = f"{api_url}/questions" submit_url = f"{api_url}/submit" # 1. Init Agent try: agent = BasicAgent() except Exception as e: print(f"Error instantiating agent: {e}") return f"Error initializing agent: {e}", None agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" print(f"Agent code: {agent_code}") # 2. Fetch Questions print(f"Fetching questions from: {questions_url}") try: response = requests.get(questions_url, timeout=15) response.raise_for_status() questions_data = response.json() if not questions_data: return "Fetched questions list is empty or invalid format.", None print(f"Fetched {len(questions_data)} questions.") except Exception as e: return f"Error fetching questions: {e}", None # 3. Run Agent results_log = [] answers_payload = [] print(f"Running agent on {len(questions_data)} questions...") for item in questions_data: task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: print(f"Skipping item with missing task_id or question: {item}") continue try: submitted_answer = agent(question_text) answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) results_log.append({ "Task ID": task_id, "Question": question_text[:100], "Submitted Answer": submitted_answer }) except Exception as e: print(f"Error on task {task_id}: {e}") results_log.append({ "Task ID": task_id, "Question": question_text[:100], "Submitted Answer": f"AGENT ERROR: {e}" }) if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) # 4. Submit submission_data = { "username": username.strip(), "agent_code": agent_code, "answers": answers_payload } print(f"Submitting {len(answers_payload)} answers...") try: response = requests.post(submit_url, json=submission_data, timeout=120) response.raise_for_status() result_data = response.json() final_status = ( f"Submission Successful!\n" f"User: {result_data.get('username')}\n" f"Overall Score: {result_data.get('score', 'N/A')}% " f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n" f"Message: {result_data.get('message', 'No message received.')}" ) print("Submission successful.") return final_status, pd.DataFrame(results_log) except requests.exceptions.HTTPError as e: error_detail = f"Server responded with status {e.response.status_code}." try: error_json = e.response.json() error_detail += f" Detail: {error_json.get('detail', e.response.text)}" except Exception: error_detail += f" Response: {e.response.text[:500]}" return f"Submission Failed: {error_detail}", pd.DataFrame(results_log) except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log) # ───────────────────────────────────────────── # GRADIO UI # ───────────────────────────────────────────── with gr.Blocks() as demo: gr.Markdown("# 🤖 GAIA Agent — LangGraph + Llama 3.3 70B") gr.Markdown(""" **Stack:** LangGraph ReAct · Llama 3.3 70B (HF Inference) · DuckDuckGo · Wikipedia · Python REPL **Instructions:** 1. Log in with your HuggingFace account below. 2. Make sure `HF_TOKEN` is set as a Space secret (with access to Llama 3.3 70B). 3. Click **Run Evaluation & Submit All Answers**. > ⚠️ The run can take several minutes — the agent reasons through each question step by step. """) gr.LoginButton() run_button = gr.Button("▶️ Run Evaluation & Submit All Answers", variant="primary") status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False) results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) run_button.click( fn=run_and_submit_all, outputs=[status_output, results_table] ) if __name__ == "__main__": print("\n" + "-" * 30 + " App Starting " + "-" * 30) space_host = os.getenv("SPACE_HOST") space_id = os.getenv("SPACE_ID") if space_host: print(f"✅ SPACE_HOST: {space_host}") if space_id: print(f"✅ SPACE_ID: {space_id}") print(f" Repo: https://huggingface.co/spaces/{space_id}/tree/main") print("-" * 60 + "\n") demo.launch(debug=True, share=False)