hs-unit4-agent / app.py
HarsimranjitSingh's picture
Update app.py
fcac5e2 verified
import os
import gradio as gr
import requests
import pandas as pd
import logging
# Configure logging for more detailed output
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# --- LangChain Imports ---
from langchain_community.tools import DuckDuckGoSearchRun
from langchain.agents import AgentExecutor, create_react_agent
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
from langchain_core.tools import Tool
from langchain_core.messages import HumanMessage
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Basic Agent Definition ---
class BasicAgent:
def __init__(self):
logger.info("Initializing LangChain-based Agent...")
hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not hf_token:
logger.error("HUGGINGFACEHUB_API_TOKEN not found in environment variables.")
logger.error("Please add it as a secret in your Hugging Face Space settings.")
self.llm = None
else:
try:
# Using Mistral-7B-Instruct-v0.2 with text-generation task,
# which is a good general-purpose model and task type.
logger.info(f"Attempting to initialize ChatHuggingFace with repo_id: mistralai/Mistral-7B-Instruct-v0.2. Token starts with: {hf_token[:5]}*****")
self.llm = ChatHuggingFace(
llm=HuggingFaceEndpoint(
repo_id="mistralai/Mistral-7B-Instruct-v0.2",
temperature=0.1,
huggingfacehub_api_token=hf_token,
task="text-generation", # Mistral-Instruct often works well with this task
max_new_tokens=512,
)
)
logger.info(f"ChatHuggingFace LLM instance created successfully for repo_id: mistralai/Mistral-7B-Instruct-v0.2")
# Perform a small test of the LLM instance to ensure it's functional
logger.info("Performing a small test inference with the initialized ChatHuggingFace LLM.")
try:
test_message = HumanMessage(content="What is the capital of France?")
response_from_llm_test = self.llm.invoke([test_message])
if response_from_llm_test and response_from_llm_test.content:
logger.info(f"LLM test inference successful. Response snippet: '{response_from_llm_test.content[:50]}...'")
else:
logger.error("LLM test inference returned an empty or invalid response.")
self.llm = None
raise ValueError("LLM test inference failed to return content.")
except Exception as test_e:
logger.error(f"LLM POST-INITIALIZATION INFERENCE TEST FAILED: {test_e}")
self.llm = None
raise
logger.info("ChatHuggingFace LLM fully initialized and tested.")
except Exception as e:
logger.exception(f"CRITICAL ERROR during LLM (ChatHuggingFace/HuggingFaceEndpoint) initialization: {e}")
self.llm = None
# --- Define Tools ---
self.tools = [
DuckDuckGoSearchRun(
name="web_search",
description="A useful tool for searching the internet for information, especially for current events or factual queries. Use this when you need to find specific data or verify facts."
)
]
# --- Create Agent Prompt ---
self.agent_prompt = PromptTemplate.from_template(
"""Answer the following question as concisely and directly as possible.
If the question requires factual information, use your tools to find it.
If the question requires a numerical answer, provide only the number.
If the question requires a specific string, provide only that string.
Do not include any conversational filler, explanations, or extra text unless explicitly asked.
You have access to the following tools:
{tools}
Use the following format:
Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question. Provide only the final answer without any additional text or explanations.
Question: {input}
Thought:{agent_scratchpad}
"""
)
# --- Initialize the Agent Executor ---
if self.llm:
try:
self.agent = create_react_agent(self.llm, self.tools, self.agent_prompt)
self.agent_executor = AgentExecutor(
agent=self.agent,
tools=self.tools,
verbose=True,
handle_parsing_errors=True,
max_iterations=7,
# early_stopping_method="generate" # <-- REMOVED THIS LINE
)
logger.info("LangChain Agent Executor initialized successfully.")
except Exception as e:
logger.exception(f"ERROR creating agent or agent executor: {e}")
self.agent_executor = None
else:
self.agent_executor = None
logger.warning("Agent Executor not initialized because LLM failed to initialize.")
def __call__(self, question: str) -> str:
logger.info(f"\n--- Agent received question: {question[:150]}...")
if not self.agent_executor:
logger.error("Agent not configured. Returning placeholder answer.")
return "AGENT_INIT_ERROR: Agent not configured due to prior LLM/agent setup failure. Check Space logs."
try:
response = self.agent_executor.invoke({"input": question})
final_answer = response.get('output', "No answer generated by agent.")
final_answer = str(final_answer).strip()
logger.info(f"--- Agent returning answer: '{final_answer[:150]}'")
return final_answer
except Exception as e:
logger.exception(f"ERROR during agent execution for question: {question[:50]}... Error: {e}")
return f"AGENT_EXECUTION_ERROR: {e}"
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if profile:
username= f"{profile.username}"
logger.info(f"User logged in: {username}")
else:
logger.warning("User not logged in.")
return "Please Login to Hugging Face with the button.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
# 1. Instantiate Agent
try:
agent = BasicAgent()
if not agent.llm or not agent.agent_executor:
return "Agent could not be initialized. HUGGINGFACEHUB_API_TOKEN might be missing or invalid, or there's an issue with the LLM/Agent setup. Check Space logs for details.", None
except Exception as e:
logger.exception(f"Critical error during agent instantiation: {e}")
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
logger.info(agent_code)
# 2. Fetch Questions
logger.info(f"Fetching questions from: {questions_url}")
try:
response = requests.get(questions_url, timeout=30)
response.raise_for_status()
questions_data = response.json()
if not questions_data:
logger.warning("Fetched questions list is empty.")
return "Fetched questions list is empty or invalid format.", None
logger.info(f"Fetched {len(questions_data)} questions.")
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching questions: {e}")
return f"Error fetching questions: {e}", None
except requests.exceptions.JSONDecodeError as e:
logger.error(f"Error decoding JSON response from questions endpoint: {e}")
logger.error(f"Response text: {response.text[:500]}")
return f"Error decoding server response for questions: {e}", None
except Exception as e:
logger.exception(f"An unexpected error occurred fetching questions: {e}")
return f"An unexpected error occurred fetching questions: {e}", None
# 3. Run your Agent
results_log = []
answers_payload = []
logger.info(f"Running agent on {len(questions_data)} questions...")
for i, item in enumerate(questions_data):
task_id = item.get("task_id")
question_text = item.get("question")
logger.info(f"\nProcessing question {i+1}/{len(questions_data)}: Task ID {task_id}")
if not task_id or question_text is None:
logger.warning(f"Skipping item with missing task_id or question: {item}")
continue
try:
submitted_answer = agent(question_text)
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
except Exception as e:
logger.exception(f"Error running agent on task {task_id}: {e}")
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
if not answers_payload:
logger.warning("Agent did not produce any answers to submit.")
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
# 4. Prepare Submission
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
logger.info(status_update)
# 5. Submit
logger.info(f"Submitting {len(answers_payload)} answers to: {submit_url}")
try:
response = requests.post(submit_url, json=submission_data, timeout=120)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Overall Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
f"Message: {result_data.get('message', 'No message received.')}"
)
logger.info("Submission successful.")
results_df = pd.DataFrame(results_log)
return final_status, results_df
except requests.exceptions.HTTPError as e:
error_detail = f"Server responded with status {e.response.status_code}."
try:
error_json = e.response.json()
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
except requests.exceptions.JSONDecodeError:
error_detail += f" Response: {e.response.text[:500]}"
status_message = f"Submission Failed: {error_detail}"
logger.error(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
except requests.exceptions.Timeout:
status_message = "Submission Failed: The request timed out."
logger.error(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
except requests.exceptions.RequestException as e:
status_message = f"Submission Failed: Network error - {e}"
logger.error(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
except Exception as e:
status_message = f"An unexpected error occurred during submission: {e}"
logger.exception(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
# --- Build Gradio Interface using Blocks ---
with gr.Blocks() as demo:
gr.Markdown("# Advanced Agent Evaluation Runner for GAIA Benchmark")
gr.Markdown(
"""
**Instructions:**
1. **Clone this space** to your account.
2. **Add `HUGGINGFACEHUB_API_TOKEN` as a Space Secret** under the "Settings" tab of your duplicated Space. This is crucial for the agent to use the Hugging Face Inference API.
3. Modify the `BasicAgent` class in `app.py` to refine its logic, add more tools (like a calculator or file reader), and optimize its prompting to improve GAIA scores.
4. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
5. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see your score.
---
**Disclaimers:**
The submission process can take time as the agent processes multiple questions. This setup provides a foundational framework; continuous improvement in agent logic and tool integration is key to higher GAIA scores. Consider strategies like answer caching or asynchronous processing for larger benchmarks.
"""
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
logger.info("\n" + "-"*30 + " App Starting " + "-"*30)
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID")
if space_host_startup:
logger.info(f"✅ SPACE_HOST found: {space_host_startup}")
logger.info(f" Runtime URL should be: https://{space_host_startup}.hf.space")
else:
logger.info("ℹ️ SPACE_HOST environment variable not found (running locally?).")
if space_id_startup:
logger.info(f"✅ SPACE_ID found: {space_id_startup}")
logger.info(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
logger.info(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
else:
logger.info("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
logger.info("-"*(60 + len(" App Starting ")) + "\n")
logger.info("Launching Gradio Interface for Basic Agent Evaluation...")
demo.launch(debug=True, share=False)