Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / app.py

bhotta

Update app.py

d91971a verified 11 days ago

raw

history blame contribute delete

25.8 kB

	import os
	import re
	import json
	import base64
	import subprocess
	import tempfile
	import requests
	import pandas as pd
	import gradio as gr
	from huggingface_hub import InferenceClient
	import anthropic

	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	# ── helpers ───────────────────────────────────────────────────────────────────
	def _strip_html(html: str) -> str:
	from html.parser import HTMLParser

	class _P(HTMLParser):
	def __init__(self):
	super().__init__()
	self.parts = []
	self._skip = False
	self._skip_tags = {"script", "style", "nav", "footer", "head"}

	def handle_starttag(self, tag, attrs):
	if tag in self._skip_tags:
	self._skip = True

	def handle_endtag(self, tag):
	if tag in self._skip_tags:
	self._skip = False

	def handle_data(self, data):
	if not self._skip and data.strip():
	self.parts.append(data.strip())

	p = _P()
	p.feed(html)
	return " ".join(p.parts)


	# ── agent ─────────────────────────────────────────────────────────────────────

	class BasicAgent:
	def __init__(self):
	# Use Anthropic API — no HF credits needed
	self.anthropic_client = anthropic.Anthropic(
	api_key=os.environ.get("ANTHROPIC_API_KEY", "")
	)
	self.model = "claude-sonnet-4-20250514"

	# Keep HF client only for Whisper ASR (free, no Inference Provider needed)
	hf_token = self._get_hf_token()
	self.hf_token = hf_token
	self.hf_client = InferenceClient(token=hf_token) if hf_token else None

	self.api_url = DEFAULT_API_URL
	print(f"✅ Agent initialised with model: {self.model}")

	def _get_hf_token(self):
	for var in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACE_HUB_TOKEN"):
	token = os.getenv(var, "").strip()
	if token:
	return token
	return None

	# ── raw file fetch ────────────────────────────────────────────────────────

	def _fetch_file(self, task_id: str):
	"""Return (bytes, content_type) or (None, '')."""
	try:
	r = requests.get(f"{self.api_url}/files/{task_id}", timeout=15)
	if r.status_code == 200 and r.content:
	return r.content, r.headers.get("Content-Type", "")
	except Exception:
	pass
	return None, ""

	# ── tool implementations ──────────────────────────────────────────────────

	def tool_check_file(self, task_id: str) -> str:
	fb, ct = self._fetch_file(task_id)
	if not fb:
	return "NO_FILE"
	ct_clean = ct.split(";")[0].strip().lower()
	return (
	f"FILE_EXISTS type={ct_clean} size={len(fb)}_bytes. "
	f"Use the right tool: image→analyse_image, python→run_python_file, "
	f"excel/xlsx→read_excel_file, audio→transcribe_audio, "
	f"text/pdf→read_text_file."
	)

	def tool_analyse_image(self, task_id: str, question: str) -> str:
	"""Analyse image using Claude's vision."""
	fb, ct = self._fetch_file(task_id)
	if not fb:
	return "No image found."
	ct_clean = ct.split(";")[0].strip().lower()
	if "image" not in ct_clean:
	return f"File is not an image (type={ct_clean})."
	b64 = base64.b64encode(fb).decode()

	# Map content type to Anthropic media type
	media_map = {
	"image/jpeg": "image/jpeg",
	"image/jpg": "image/jpeg",
	"image/png": "image/png",
	"image/gif": "image/gif",
	"image/webp": "image/webp",
	}
	media_type = media_map.get(ct_clean, "image/jpeg")

	try:
	response = self.anthropic_client.messages.create(
	model=self.model,
	max_tokens=800,
	messages=[{
	"role": "user",
	"content": [
	{
	"type": "image",
	"source": {
	"type": "base64",
	"media_type": media_type,
	"data": b64,
	},
	},
	{"type": "text", "text": question},
	],
	}],
	)
	return response.content[0].text
	except Exception as e:
	return f"Vision error: {e}"

	def tool_run_python_file(self, task_id: str) -> str:
	"""Download and execute Python file, return stdout."""
	fb, _ = self._fetch_file(task_id)
	if not fb:
	return "No file found."
	code = fb.decode("utf-8", errors="ignore")
	try:
	with tempfile.NamedTemporaryFile(
	suffix=".py", delete=False, mode="w"
	) as f:
	f.write(code)
	fname = f.name
	result = subprocess.run(
	["python3", fname],
	capture_output=True, text=True, timeout=30,
	)
	out = result.stdout.strip()
	err = result.stderr.strip()
	return f"STDOUT:\n{out}" if out else f"STDERR:\n{err}" if err else "No output."
	except Exception as e:
	return f"Execution error: {e}"

	def tool_read_excel_file(self, task_id: str, question: str) -> str:
	"""Load Excel/CSV and answer a question about it."""
	fb, ct = self._fetch_file(task_id)
	if not fb:
	return "No file found."
	try:
	import io
	ct_clean = ct.split(";")[0].strip().lower()
	df = (
	pd.read_csv(io.BytesIO(fb))
	if ("csv" in ct_clean or "text" in ct_clean)
	else pd.read_excel(io.BytesIO(fb))
	)
	preview = df.to_string(max_rows=80, max_cols=20)
	return (
	f"SPREADSHEET DATA:\n{preview}\n\n"
	f"Answer the following about this data: {question}"
	)
	except Exception as e:
	return f"Excel read error: {e}"

	def tool_transcribe_audio(self, task_id: str) -> str:
	"""Transcribe audio using HF Whisper (free ASR endpoint)."""
	fb, ct = self._fetch_file(task_id)
	if not fb:
	return "No file found."
	try:
	ct_clean = ct.split(";")[0].strip().lower()
	ext_map = {
	"audio/mpeg": ".mp3", "audio/mp3": ".mp3",
	"audio/wav": ".wav", "audio/x-wav": ".wav",
	"audio/ogg": ".ogg", "audio/flac": ".flac",
	"audio/m4a": ".m4a", "audio/mp4": ".mp4",
	}
	ext = ext_map.get(ct_clean, ".mp3")
	with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f:
	f.write(fb)
	fname = f.name

	if self.hf_client:
	asr_client = InferenceClient(
	model="openai/whisper-large-v3",
	token=self.hf_token,
	)
	with open(fname, "rb") as audio_f:
	result = asr_client.automatic_speech_recognition(audio_f)
	return result.text if hasattr(result, "text") else str(result)
	else:
	return "No HF token available for audio transcription."
	except Exception as e:
	return f"Transcription error: {e}"

	def tool_read_text_file(self, task_id: str) -> str:
	fb, ct = self._fetch_file(task_id)
	if not fb:
	return "No file found."
	try:
	ct_clean = ct.split(";")[0].strip().lower()
	if "pdf" in ct_clean:
	try:
	import pdfminer.high_level
	import io
	return pdfminer.high_level.extract_text(io.BytesIO(fb))[:6000]
	except ImportError:
	pass
	return fb.decode("utf-8", errors="ignore")[:6000]
	except Exception as e:
	return f"Read error: {e}"

	def tool_search_web(self, query: str) -> str:
	try:
	hdrs = {
	"User-Agent": (
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
	"AppleWebKit/537.36 Chrome/124.0 Safari/537.36"
	)
	}
	r = requests.get(
	"https://html.duckduckgo.com/html/",
	params={"q": query}, headers=hdrs, timeout=12,
	)
	from html.parser import HTMLParser

	class _DDG(HTMLParser):
	def __init__(self):
	super().__init__()
	self.results = []
	self._in = False
	self._cur = ""

	def handle_starttag(self, tag, attrs):
	d = dict(attrs)
	if "result__snippet" in d.get("class", ""):
	self._in = True
	self._cur = ""

	def handle_data(self, data):
	if self._in:
	self._cur += data

	def handle_endtag(self, tag):
	if self._in:
	t = self._cur.strip()
	if t:
	self.results.append(t)
	self._in = False

	p = _DDG()
	p.feed(r.text)
	return "\n\n".join(p.results[:6]) or "No results."
	except Exception as e:
	return f"Search error: {e}"

	def tool_fetch_webpage(self, url: str) -> str:
	try:
	hdrs = {"User-Agent": "Mozilla/5.0 Chrome/124.0"}
	r = requests.get(url, headers=hdrs, timeout=18)
	r.raise_for_status()
	return _strip_html(r.text)[:8000]
	except Exception as e:
	return f"Fetch error: {e}"

	def tool_fetch_wikipedia(self, title: str) -> str:
	try:
	slug = requests.utils.quote(title.replace(" ", "_"))
	r = requests.get(
	f"https://en.wikipedia.org/api/rest_v1/page/summary/{slug}",
	timeout=12,
	)
	if r.status_code == 200:
	return r.json().get("extract", "Not found.")
	r2 = requests.get(
	"https://en.wikipedia.org/w/api.php",
	params={
	"action": "query", "prop": "extracts",
	"titles": title, "format": "json", "redirects": 1,
	},
	timeout=12,
	)
	pages = r2.json().get("query", {}).get("pages", {})
	for page in pages.values():
	text = _strip_html(page.get("extract", ""))
	if text:
	return text[:7000]
	except Exception as e:
	return f"Wikipedia error: {e}"
	return "Not found."

	def tool_youtube_transcript(self, video_url: str) -> str:
	try:
	from youtube_transcript_api import YouTubeTranscriptApi
	vid = re.search(r"v=([^&]+)", video_url)
	if not vid:
	return "Bad URL."
	entries = YouTubeTranscriptApi.get_transcript(vid.group(1))
	return " ".join(e["text"] for e in entries)[:6000]
	except Exception as e:
	err = str(e)
	if any(k in err.lower() for k in
	("blocked", "ip", "cloud", "requestblocked", "ipblocked")):
	return (
	"BLOCKED: YouTube blocks cloud IPs. "
	"Use search_web to find transcript or description of this video."
	)
	return f"Transcript error: {err}"

	# ── Anthropic tool definitions ────────────────────────────────────────────

	TOOLS = [
	{
	"name": "check_file",
	"description": (
	"ALWAYS call this first. Checks if a file is attached to the task. "
	"Returns NO_FILE or the file type and which tool to use next."
	),
	"input_schema": {
	"type": "object",
	"properties": {"task_id": {"type": "string"}},
	"required": ["task_id"],
	},
	},
	{
	"name": "analyse_image",
	"description": (
	"Analyse an image file attached to the task using vision. "
	"Use for chess boards, diagrams, photos, screenshots."
	),
	"input_schema": {
	"type": "object",
	"properties": {
	"task_id": {"type": "string"},
	"question": {
	"type": "string",
	"description": "What to find or answer from the image.",
	},
	},
	"required": ["task_id", "question"],
	},
	},
	{
	"name": "run_python_file",
	"description": (
	"Execute the Python file attached to the task and return its output. "
	"The stdout IS the answer."
	),
	"input_schema": {
	"type": "object",
	"properties": {"task_id": {"type": "string"}},
	"required": ["task_id"],
	},
	},
	{
	"name": "read_excel_file",
	"description": "Read an Excel or CSV file and answer a question about its data.",
	"input_schema": {
	"type": "object",
	"properties": {
	"task_id": {"type": "string"},
	"question": {"type": "string"},
	},
	"required": ["task_id", "question"],
	},
	},
	{
	"name": "transcribe_audio",
	"description": (
	"Transcribe an audio file using Whisper. "
	"Use for voice memos, recordings, audio questions."
	),
	"input_schema": {
	"type": "object",
	"properties": {"task_id": {"type": "string"}},
	"required": ["task_id"],
	},
	},
	{
	"name": "read_text_file",
	"description": "Read a text or PDF file attached to the task.",
	"input_schema": {
	"type": "object",
	"properties": {"task_id": {"type": "string"}},
	"required": ["task_id"],
	},
	},
	{
	"name": "youtube_transcript",
	"description": (
	"Fetch YouTube video transcript. "
	"If cloud-blocked, use search_web instead."
	),
	"input_schema": {
	"type": "object",
	"properties": {"video_url": {"type": "string"}},
	"required": ["video_url"],
	},
	},
	{
	"name": "search_web",
	"description": "Search the web via DuckDuckGo. Returns top result snippets.",
	"input_schema": {
	"type": "object",
	"properties": {"query": {"type": "string"}},
	"required": ["query"],
	},
	},
	{
	"name": "fetch_webpage",
	"description": "Fetch and read the full text of any URL.",
	"input_schema": {
	"type": "object",
	"properties": {"url": {"type": "string"}},
	"required": ["url"],
	},
	},
	{
	"name": "fetch_wikipedia",
	"description": (
	"Fetch a Wikipedia article by exact title via REST API. "
	"Always prefer this over fetch_webpage for Wikipedia."
	),
	"input_schema": {
	"type": "object",
	"properties": {"title": {"type": "string"}},
	"required": ["title"],
	},
	},
	]

	def _dispatch(self, fn: str, args: dict, task_id: str, question: str) -> str:
	if fn == "check_file":
	return self.tool_check_file(args.get("task_id", task_id))
	if fn == "analyse_image":
	return self.tool_analyse_image(
	args.get("task_id", task_id), args.get("question", question))
	if fn == "run_python_file":
	return self.tool_run_python_file(args.get("task_id", task_id))
	if fn == "read_excel_file":
	return self.tool_read_excel_file(
	args.get("task_id", task_id), args.get("question", question))
	if fn == "transcribe_audio":
	return self.tool_transcribe_audio(args.get("task_id", task_id))
	if fn == "read_text_file":
	return self.tool_read_text_file(args.get("task_id", task_id))
	if fn == "youtube_transcript":
	return self.tool_youtube_transcript(args.get("video_url", ""))
	if fn == "search_web":
	return self.tool_search_web(args.get("query", ""))
	if fn == "fetch_webpage":
	return self.tool_fetch_webpage(args.get("url", ""))
	if fn == "fetch_wikipedia":
	return self.tool_fetch_wikipedia(args.get("title", ""))
	return "Unknown tool."

	# ── system prompt ─────────────────────────────────────────────────────────

	SYSTEM = """You are a precise research agent solving GAIA benchmark tasks.
	MANDATORY WORKFLOW:
	STEP 1 — Call check_file(task_id) first for every task.
	• NO_FILE → go to STEP 2.
	• image file → call analyse_image(task_id, question).
	• python file → call run_python_file(task_id). Its output IS the answer.
	• excel/csv file → call read_excel_file(task_id, question).
	• audio file → call transcribe_audio(task_id), then answer from transcript.
	• text/pdf file → call read_text_file(task_id), then answer from content.
	NEVER return "NO_FILE" or tool status strings as your final answer.
	STEP 2 — Gather information.
	• YouTube URL → call youtube_transcript(url). If BLOCKED → search_web.
	• Wikipedia question → fetch_wikipedia("Exact Article Title").
	Discography → count ONLY solo studio albums (not collaborations/live/EP).
	• LibreTexts 1.E → fetch_webpage:
	https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(LibreTexts)/02%3A_Measurement_and_Problem_Solving/2.E%3A_Measurement_and_Problem_Solving_(Exercises)
	• Sports stats → search_web then fetch_webpage for exact numbers.
	• Any other question → search_web, then fetch_webpage for details.
	STEP 3 — Try at least 2-3 different search queries before concluding.
	Never say "I was unable to find." Always use tools to find the answer.
	STEP 4 — Final answer: ONLY the value. No explanation. No preamble.
	Numbers: just digits. Names: just the name. Lists: comma-separated."""

	# ── main call ─────────────────────────────────────────────────────────────

	def __call__(self, question: str, task_id: str = "") -> str:
	print(f"▶ Task {task_id[:8]}: {question[:80]}")

	messages = [
	{
	"role": "user",
	"content": f"task_id: {task_id}\n\nTask: {question}",
	},
	]

	bad_phrases = (
	"no_file", "file_exists", "i was unable", "i couldn't",
	"i can't access", "please provide", "you might want",
	"i'm unable", "i cannot", "i am unable",
	)

	for _round in range(10):
	try:
	resp = self.anthropic_client.messages.create(
	model=self.model,
	max_tokens=1500,
	system=self.SYSTEM,
	tools=self.TOOLS,
	messages=messages,
	)
	except Exception as e:
	print(f" Anthropic API error: {e}")
	return "Error."

	# Check stop reason
	stop_reason = resp.stop_reason

	# Collect text and tool use blocks
	tool_uses = [b for b in resp.content if b.type == "tool_use"]
	text_blocks = [b for b in resp.content if b.type == "text"]

	# Append assistant message
	messages.append({"role": "assistant", "content": resp.content})

	if stop_reason == "end_turn" or not tool_uses:
	# Final answer
	answer = text_blocks[0].text.strip() if text_blocks else ""
	if any(b in answer.lower() for b in bad_phrases):
	messages.append({
	"role": "user",
	"content": (
	"That is not acceptable. Use your tools to find the "
	"real answer. Return ONLY the final value."
	),
	})
	continue
	return answer

	# Execute tool calls and collect results
	tool_results = []
	for tb in tool_uses:
	fn = tb.name
	args = tb.input if isinstance(tb.input, dict) else {}
	result = self._dispatch(fn, args, task_id, question)
	print(f" {fn} → {str(result)[:80]}")
	tool_results.append({
	"type": "tool_result",
	"tool_use_id": tb.id,
	"content": result or "Empty result.",
	})

	messages.append({"role": "user", "content": tool_results})

	# Force final answer after max rounds
	try:
	messages.append({
	"role": "user",
	"content": "Final answer only — just the value, no explanation.",
	})
	resp = self.anthropic_client.messages.create(
	model=self.model,
	max_tokens=100,
	system=self.SYSTEM,
	messages=messages,
	)
	text_blocks = [b for b in resp.content if b.type == "text"]
	return text_blocks[0].text.strip() if text_blocks else "Error."
	except Exception:
	return "Error."


	# ── Gradio UI ─────────────────────────────────────────────────────────────────

	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	if not profile:
	return "Please login to Hugging Face first.", None

	username = profile.username
	space_id = os.getenv("SPACE_ID", "")
	api_url = DEFAULT_API_URL

	try:
	agent = BasicAgent()
	except Exception as e:
	return f"Init failed: {e}", None

	try:
	qs = requests.get(f"{api_url}/questions", timeout=15)
	qs.raise_for_status()
	questions_data = qs.json()
	except Exception as e:
	return f"Error fetching questions: {e}", None

	results_log, answers_payload = [], []

	for item in questions_data:
	task_id = item.get("task_id", "")
	question_text = item.get("question", "")
	try:
	answer = agent(question_text, task_id=task_id)
	except Exception as e:
	answer = f"Error: {e}"
	print(f" → {answer[:60]}")

	answers_payload.append({"task_id": task_id, "submitted_answer": answer})
	results_log.append({
	"Task ID": task_id,
	"Question": question_text[:120],
	"Answer": answer,
	})

	try:
	r = requests.post(
	f"{api_url}/submit",
	json={
	"username": username.strip(),
	"agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
	"answers": answers_payload,
	},
	timeout=60,
	)
	r.raise_for_status()
	res = r.json()
	status = (
	f"✅ Submitted!\n"
	f"Score: {res.get('score')}% "
	f"({res.get('correct_count')}/{res.get('total_attempted')})\n"
	f"Message: {res.get('message')}"
	)
	except Exception as e:
	status = f"Submission failed: {e}"

	return status, pd.DataFrame(results_log)


	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🤖 GAIA Agent — Claude Sonnet")
	gr.Markdown(
	f"LLM: `claude-sonnet-4-20250514` (Anthropic API) \n"
	"Vision: Claude native vision \n"
	"ASR: `openai/whisper-large-v3` (HF)"
	)
	gr.LoginButton()
	run_button = gr.Button("🚀 Run Evaluation & Submit", variant="primary")
	status_output = gr.Textbox(label="Status", lines=5)
	results_table = gr.DataFrame(label="Results")
	run_button.click(fn=run_and_submit_all,
	outputs=[status_output, results_table])

	if __name__ == "__main__":
	demo.launch()