Spaces:

jebaselvasingh
/

mycertification

Sleeping

jebaponselvasingh

changes in the domain structure

d1dcd56 about 2 months ago

10.6 kB

	import os
	import gradio as gr
	import requests
	import pandas as pd
	import tempfile
	import json
	import logging
	from typing import Optional
	from dotenv import load_dotenv

	load_dotenv()

	from agent_enhanced import GAIAAgent, is_ollama_available, is_production

	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	def fetch_questions(api_url: str = DEFAULT_API_URL) -> list:
	"""Fetch all questions from the GAIA API."""
	for attempt in range(3):
	try:
	response = requests.get(f"{api_url}/questions", timeout=30)
	response.raise_for_status()
	questions = response.json()

	# Print all questions with their task IDs
	print("\n" + "="*80)
	print("ALL QUESTIONS WITH TASK IDs:")
	print("="*80)
	for i, q in enumerate(questions, 1):
	task_id = q.get("task_id", "N/A")
	question_text = q.get("question", "N/A")
	file_name = q.get("file_name", "")
	print(f"\n[{i}] Task ID: {task_id}")
	print(f" Question: {question_text[:200]}{'...' if len(question_text) > 200 else ''}")
	if file_name:
	print(f" File: {file_name}")
	print("\n" + "="*80)
	print(f"Total questions: {len(questions)}")
	print("="*80 + "\n")

	return questions
	except Exception as e:
	logger.warning(f"Attempt {attempt + 1} failed: {e}")
	return []


	def fetch_random_question(api_url: str = DEFAULT_API_URL) -> dict:
	"""Fetch a random question."""
	for attempt in range(3):
	try:
	response = requests.get(f"{api_url}/random-question", timeout=30)
	response.raise_for_status()
	return response.json()
	except Exception as e:
	logger.warning(f"Attempt {attempt + 1} failed: {e}")
	return {}


	def fetch_file(task_id: str, api_url: str = DEFAULT_API_URL) -> Optional[str]:
	"""Fetch file for a task."""
	try:
	response = requests.get(f"{api_url}/files/{task_id}", timeout=30)
	if response.status_code == 200:
	content_disposition = response.headers.get('content-disposition', '')
	filename = f"task_{task_id}_file"
	if 'filename=' in content_disposition:
	filename = content_disposition.split('filename=')[1].strip('"')

	temp_dir = tempfile.mkdtemp()
	file_path = os.path.join(temp_dir, filename)

	with open(file_path, 'wb') as f:
	f.write(response.content)

	logger.info(f"Downloaded: {file_path}")
	return file_path
	elif response.status_code == 404:
	return None
	except Exception as e:
	logger.error(f"File fetch failed: {e}")
	return None


	def submit_answers(username: str, agent_code: str, answers: list, api_url: str = DEFAULT_API_URL) -> dict:
	"""Submit answers to API."""
	payload = {"username": username, "agent_code": agent_code, "answers": answers}
	response = requests.post(f"{api_url}/submit", json=payload, timeout=60)
	response.raise_for_status()
	return response.json()


	def get_env_status() -> str:
	"""Get environment status."""
	if is_production():
	return "☁️ Production Mode (HuggingFace Spaces) - Using OpenAI GPT-4o"
	elif is_ollama_available():
	return "🏠 Local Mode - Using Ollama"
	elif os.environ.get("OPENAI_API_KEY"):
	return "☁️ Local + OpenAI - Using OpenAI GPT-4o"
	else:
	return "⚠️ No Backend - Set OPENAI_API_KEY or start Ollama"


	def run_agent_on_questions(progress=gr.Progress()):
	"""Run agent on all questions."""
	try:
	env_info = get_env_status()
	progress(0, desc="Initializing agent...")

	agent = GAIAAgent()

	progress(0.05, desc="Fetching questions...")
	questions = fetch_questions()

	if not questions:
	return "Error: Failed to fetch questions.", None

	total = len(questions)
	results = []
	answers_for_submission = []

	for i, q in enumerate(questions):
	progress((i + 1) / total, desc=f"Question {i+1}/{total}...")

	task_id = q.get("task_id", "")
	question_text = q.get("question", "")

	file_path = None
	if q.get("file_name"):
	file_path = fetch_file(task_id)

	try:
	answer = agent.run(question_text, task_id, file_path)
	except Exception as e:
	logger.error(f"Error on question {i+1}: {e}")
	answer = f"Error: {str(e)}"

	results.append({
	"Task ID": task_id,
	"Question": question_text,
	"Answer": answer,
	"Status": "✓" if answer and not answer.startswith("Error:") and answer != "Unable to determine answer" else "✗"
	})

	answers_for_submission.append({
	"task_id": task_id,
	"submitted_answer": answer
	})

	# Cleanup
	if file_path and os.path.exists(file_path):
	try:
	os.remove(file_path)
	os.rmdir(os.path.dirname(file_path))
	except:
	pass

	df = pd.DataFrame(results)
	progress(1.0, desc="Complete!")
	return df, answers_for_submission

	except Exception as e:
	logger.error(f"Error: {e}")
	return f"Error: {str(e)}", None


	def test_single_question():
	"""Test on a single random question."""
	try:
	agent = GAIAAgent()
	question_data = fetch_random_question()

	if not question_data:
	return "Error: Failed to fetch question.", "", "", ""

	task_id = question_data.get("task_id", "")
	question_text = question_data.get("question", "")

	file_path = None
	if question_data.get("file_name"):
	file_path = fetch_file(task_id)

	answer = agent.run(question_text, task_id, file_path)

	# Cleanup
	if file_path and os.path.exists(file_path):
	try:
	os.remove(file_path)
	os.rmdir(os.path.dirname(file_path))
	except:
	pass

	status = "✓ Valid" if answer and not answer.startswith("Error") else "⚠️ Check answer"
	return question_text, answer, task_id, status

	except Exception as e:
	logger.error(f"Error: {e}")
	return f"Error: {str(e)}", "", "", ""


	def submit_to_leaderboard(username: str, space_url: str, answers_json: str):
	"""Submit to leaderboard."""
	if not username or not space_url or not answers_json:
	return "Please fill in all fields and run the agent first."

	try:
	answers = json.loads(answers_json) if isinstance(answers_json, str) else answers_json

	if not isinstance(answers, list) or len(answers) == 0:
	return "Error: Run the benchmark first."

	if not space_url.endswith("/tree/main"):
	space_url = space_url.rstrip("/") + "/tree/main"

	result = submit_answers(username, space_url, answers)
	print(result)
	score = result.get("score", 0)
	correct = result.get("correct_count", 0)
	total = result.get("total_attempted", 0)

	cert_msg = "🏆 Congratulations! Score above 30% - Certificate earned!" if score > 0.3 else "❌ Need >30% for certificate."

	return f"""
	## Submission Results

	Score: {score:.1%}
	Correct: {correct}/{total}

	{cert_msg}

	[View Leaderboard](https://huggingface.co/spaces/agents-course/Students_leaderboard)
	"""
	except Exception as e:
	logger.error(f"Submission error: {e}")
	return f"Error: {str(e)}"


	# ============ GRADIO APP ============
	with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🤖 GAIA Benchmark Agent

	Tools: 🔍 Web Search \| 📚 Wikipedia \| 🐍 Python \| 📄 Files \| 🔢 Calculator \| 🌐 Webpages \| 👁️ Vision (OpenAI)
	""")

	env_status = gr.Markdown(get_env_status())

	with gr.Tabs():
	with gr.TabItem("🧪 Test Single"):
	test_btn = gr.Button("Fetch & Solve Random Question", variant="primary")
	test_q = gr.Textbox(label="Question", lines=4, interactive=False)
	test_a = gr.Textbox(label="Answer", lines=2, interactive=False)
	test_id = gr.Textbox(label="Task ID", interactive=False)
	test_status = gr.Textbox(label="Status", interactive=False)

	test_btn.click(test_single_question, outputs=[test_q, test_a, test_id, test_status])

	with gr.TabItem("🚀 Full Benchmark"):
	run_btn = gr.Button("Run on All Questions", variant="primary")
	results_df = gr.Dataframe(label="Results")
	answers_state = gr.State()

	run_btn.click(run_agent_on_questions, outputs=[results_df, answers_state])

	with gr.TabItem("📤 Submit"):
	gr.Markdown("### Submit to Leaderboard")

	with gr.Row():
	username_in = gr.Textbox(label="HF Username", placeholder="your-username")
	space_url_in = gr.Textbox(label="Space URL", placeholder="https://huggingface.co/spaces/you/space")

	answers_in = gr.Textbox(label="Answers JSON (auto-filled)", lines=8)
	submit_btn = gr.Button("Submit", variant="primary")
	submit_result = gr.Markdown()

	def format_answers(a):
	return json.dumps(a, indent=2) if a else ""

	answers_state.change(format_answers, inputs=[answers_state], outputs=[answers_in])
	submit_btn.click(submit_to_leaderboard, inputs=[username_in, space_url_in, answers_in], outputs=[submit_result])

	gr.Markdown("""
	---
	Setup:
	- Local: `ollama serve` + `ollama pull qwen2.5:32b`
	- Production: Set `OPENAI_API_KEY` in `.env` or HF Secrets
	""")

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)