mycertification / app.py
jebaponselvasingh
changes in the domain structure
d1dcd56
import os
import gradio as gr
import requests
import pandas as pd
import tempfile
import json
import logging
from typing import Optional
from dotenv import load_dotenv
load_dotenv()
from agent_enhanced import GAIAAgent, is_ollama_available, is_production
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def fetch_questions(api_url: str = DEFAULT_API_URL) -> list:
"""Fetch all questions from the GAIA API."""
for attempt in range(3):
try:
response = requests.get(f"{api_url}/questions", timeout=30)
response.raise_for_status()
questions = response.json()
# Print all questions with their task IDs
print("\n" + "="*80)
print("ALL QUESTIONS WITH TASK IDs:")
print("="*80)
for i, q in enumerate(questions, 1):
task_id = q.get("task_id", "N/A")
question_text = q.get("question", "N/A")
file_name = q.get("file_name", "")
print(f"\n[{i}] Task ID: {task_id}")
print(f" Question: {question_text[:200]}{'...' if len(question_text) > 200 else ''}")
if file_name:
print(f" File: {file_name}")
print("\n" + "="*80)
print(f"Total questions: {len(questions)}")
print("="*80 + "\n")
return questions
except Exception as e:
logger.warning(f"Attempt {attempt + 1} failed: {e}")
return []
def fetch_random_question(api_url: str = DEFAULT_API_URL) -> dict:
"""Fetch a random question."""
for attempt in range(3):
try:
response = requests.get(f"{api_url}/random-question", timeout=30)
response.raise_for_status()
return response.json()
except Exception as e:
logger.warning(f"Attempt {attempt + 1} failed: {e}")
return {}
def fetch_file(task_id: str, api_url: str = DEFAULT_API_URL) -> Optional[str]:
"""Fetch file for a task."""
try:
response = requests.get(f"{api_url}/files/{task_id}", timeout=30)
if response.status_code == 200:
content_disposition = response.headers.get('content-disposition', '')
filename = f"task_{task_id}_file"
if 'filename=' in content_disposition:
filename = content_disposition.split('filename=')[1].strip('"')
temp_dir = tempfile.mkdtemp()
file_path = os.path.join(temp_dir, filename)
with open(file_path, 'wb') as f:
f.write(response.content)
logger.info(f"Downloaded: {file_path}")
return file_path
elif response.status_code == 404:
return None
except Exception as e:
logger.error(f"File fetch failed: {e}")
return None
def submit_answers(username: str, agent_code: str, answers: list, api_url: str = DEFAULT_API_URL) -> dict:
"""Submit answers to API."""
payload = {"username": username, "agent_code": agent_code, "answers": answers}
response = requests.post(f"{api_url}/submit", json=payload, timeout=60)
response.raise_for_status()
return response.json()
def get_env_status() -> str:
"""Get environment status."""
if is_production():
return "☁️ **Production Mode** (HuggingFace Spaces) - Using OpenAI GPT-4o"
elif is_ollama_available():
return "🏠 **Local Mode** - Using Ollama"
elif os.environ.get("OPENAI_API_KEY"):
return "☁️ **Local + OpenAI** - Using OpenAI GPT-4o"
else:
return "⚠️ **No Backend** - Set OPENAI_API_KEY or start Ollama"
def run_agent_on_questions(progress=gr.Progress()):
"""Run agent on all questions."""
try:
env_info = get_env_status()
progress(0, desc="Initializing agent...")
agent = GAIAAgent()
progress(0.05, desc="Fetching questions...")
questions = fetch_questions()
if not questions:
return "Error: Failed to fetch questions.", None
total = len(questions)
results = []
answers_for_submission = []
for i, q in enumerate(questions):
progress((i + 1) / total, desc=f"Question {i+1}/{total}...")
task_id = q.get("task_id", "")
question_text = q.get("question", "")
file_path = None
if q.get("file_name"):
file_path = fetch_file(task_id)
try:
answer = agent.run(question_text, task_id, file_path)
except Exception as e:
logger.error(f"Error on question {i+1}: {e}")
answer = f"Error: {str(e)}"
results.append({
"Task ID": task_id,
"Question": question_text,
"Answer": answer,
"Status": "βœ“" if answer and not answer.startswith("Error:") and answer != "Unable to determine answer" else "βœ—"
})
answers_for_submission.append({
"task_id": task_id,
"submitted_answer": answer
})
# Cleanup
if file_path and os.path.exists(file_path):
try:
os.remove(file_path)
os.rmdir(os.path.dirname(file_path))
except:
pass
df = pd.DataFrame(results)
progress(1.0, desc="Complete!")
return df, answers_for_submission
except Exception as e:
logger.error(f"Error: {e}")
return f"Error: {str(e)}", None
def test_single_question():
"""Test on a single random question."""
try:
agent = GAIAAgent()
question_data = fetch_random_question()
if not question_data:
return "Error: Failed to fetch question.", "", "", ""
task_id = question_data.get("task_id", "")
question_text = question_data.get("question", "")
file_path = None
if question_data.get("file_name"):
file_path = fetch_file(task_id)
answer = agent.run(question_text, task_id, file_path)
# Cleanup
if file_path and os.path.exists(file_path):
try:
os.remove(file_path)
os.rmdir(os.path.dirname(file_path))
except:
pass
status = "βœ“ Valid" if answer and not answer.startswith("Error") else "⚠️ Check answer"
return question_text, answer, task_id, status
except Exception as e:
logger.error(f"Error: {e}")
return f"Error: {str(e)}", "", "", ""
def submit_to_leaderboard(username: str, space_url: str, answers_json: str):
"""Submit to leaderboard."""
if not username or not space_url or not answers_json:
return "Please fill in all fields and run the agent first."
try:
answers = json.loads(answers_json) if isinstance(answers_json, str) else answers_json
if not isinstance(answers, list) or len(answers) == 0:
return "Error: Run the benchmark first."
if not space_url.endswith("/tree/main"):
space_url = space_url.rstrip("/") + "/tree/main"
result = submit_answers(username, space_url, answers)
print(result)
score = result.get("score", 0)
correct = result.get("correct_count", 0)
total = result.get("total_attempted", 0)
cert_msg = "πŸ† **Congratulations!** Score above 30% - Certificate earned!" if score > 0.3 else "❌ Need >30% for certificate."
return f"""
## Submission Results
**Score:** {score:.1%}
**Correct:** {correct}/{total}
{cert_msg}
[View Leaderboard](https://huggingface.co/spaces/agents-course/Students_leaderboard)
"""
except Exception as e:
logger.error(f"Submission error: {e}")
return f"Error: {str(e)}"
# ============ GRADIO APP ============
with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸ€– GAIA Benchmark Agent
**Tools:** πŸ” Web Search | πŸ“š Wikipedia | 🐍 Python | πŸ“„ Files | πŸ”’ Calculator | 🌐 Webpages | πŸ‘οΈ Vision (OpenAI)
""")
env_status = gr.Markdown(get_env_status())
with gr.Tabs():
with gr.TabItem("πŸ§ͺ Test Single"):
test_btn = gr.Button("Fetch & Solve Random Question", variant="primary")
test_q = gr.Textbox(label="Question", lines=4, interactive=False)
test_a = gr.Textbox(label="Answer", lines=2, interactive=False)
test_id = gr.Textbox(label="Task ID", interactive=False)
test_status = gr.Textbox(label="Status", interactive=False)
test_btn.click(test_single_question, outputs=[test_q, test_a, test_id, test_status])
with gr.TabItem("πŸš€ Full Benchmark"):
run_btn = gr.Button("Run on All Questions", variant="primary")
results_df = gr.Dataframe(label="Results")
answers_state = gr.State()
run_btn.click(run_agent_on_questions, outputs=[results_df, answers_state])
with gr.TabItem("πŸ“€ Submit"):
gr.Markdown("### Submit to Leaderboard")
with gr.Row():
username_in = gr.Textbox(label="HF Username", placeholder="your-username")
space_url_in = gr.Textbox(label="Space URL", placeholder="https://huggingface.co/spaces/you/space")
answers_in = gr.Textbox(label="Answers JSON (auto-filled)", lines=8)
submit_btn = gr.Button("Submit", variant="primary")
submit_result = gr.Markdown()
def format_answers(a):
return json.dumps(a, indent=2) if a else ""
answers_state.change(format_answers, inputs=[answers_state], outputs=[answers_in])
submit_btn.click(submit_to_leaderboard, inputs=[username_in, space_url_in, answers_in], outputs=[submit_result])
gr.Markdown("""
---
**Setup:**
- Local: `ollama serve` + `ollama pull qwen2.5:32b`
- Production: Set `OPENAI_API_KEY` in `.env` or HF Secrets
""")
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)