Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / app.py

emanuelediluzio

Update app.py

a88e8d8 verified about 1 month ago

raw

history blame contribute delete

43.2 kB

	"""
	GAIA Agent v5 — With Vision and Audio Transcription!
	Target: 40%+ (8+/20)
	"""
	import os
	import re
	import io
	import time
	import base64
	import traceback
	import gradio as gr
	import requests
	import pandas as pd
	from bs4 import BeautifulSoup
	from typing import Optional, Tuple, List, Dict, Any

	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	GROQ_API = "https://api.groq.com/openai/v1/chat/completions"
	GROQ_AUDIO_API = "https://api.groq.com/openai/v1/audio/transcriptions"

	# Models to try in order of preference
	GROQ_MODELS = [
	"llama-3.3-70b-versatile",
	"llama-3.1-70b-versatile",
	"mixtral-8x7b-32768",
	]

	GROQ_VISION_MODEL = "llama-3.2-90b-vision-preview"
	GROQ_AUDIO_MODEL = "whisper-large-v3"

	# ==========================================
	# VISION & AUDIO TOOLS
	# ==========================================

	def transcribe_audio(audio_bytes: bytes, groq_key: str, filename: str = "audio.mp3") -> str:
	"""Transcribe audio using Groq Whisper API."""
	if not groq_key or not audio_bytes:
	return ""

	try:
	print(f" 🎤 Transcribing audio ({len(audio_bytes)/1024:.1f} KB)...")

	files = {
	'file': (filename, audio_bytes, 'audio/mpeg'),
	'model': (None, GROQ_AUDIO_MODEL),
	}

	resp = requests.post(
	GROQ_AUDIO_API,
	headers={"Authorization": f"Bearer {groq_key}"},
	files=files,
	timeout=60,
	)

	if resp.status_code == 200:
	result = resp.json()
	text = result.get("text", "")
	print(f" ✅ Transcribed: {text[:100]}...")
	return text
	else:
	print(f" ⚠️ Audio transcription failed: {resp.status_code} - {resp.text[:200]}")
	return ""
	except Exception as e:
	print(f" ⚠️ Audio transcription error: {e}")
	return ""


	def analyze_image(image_bytes: bytes, question: str, groq_key: str) -> str:
	"""Analyze image using Groq Vision API."""
	if not groq_key or not image_bytes:
	return ""

	try:
	print(f" 🖼️ Analyzing image ({len(image_bytes)/1024:.1f} KB)...")

	# Convert to base64
	image_b64 = base64.b64encode(image_bytes).decode('utf-8')

	# Detect image type
	if image_bytes[:8] == b'\x89PNG\r\n\x1a\n':
	mime_type = "image/png"
	elif image_bytes[:2] == b'\xff\xd8':
	mime_type = "image/jpeg"
	elif image_bytes[:6] in (b'GIF87a', b'GIF89a'):
	mime_type = "image/gif"
	else:
	mime_type = "image/png" # default

	messages = [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": f"Look at this image and answer the question precisely. Give ONLY the answer, no explanation.\n\nQuestion: {question}"
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:{mime_type};base64,{image_b64}"
	}
	}
	]
	}
	]

	resp = requests.post(
	GROQ_API,
	headers={
	"Authorization": f"Bearer {groq_key}",
	"Content-Type": "application/json"
	},
	json={
	"model": GROQ_VISION_MODEL,
	"messages": messages,
	"temperature": 0.1,
	"max_tokens": 300,
	},
	timeout=60,
	)

	if resp.status_code == 200:
	result = resp.json()
	answer = result.get("choices", [{}])[0].get("message", {}).get("content", "")
	print(f" ✅ Vision response: {answer[:100]}...")
	return answer
	else:
	print(f" ⚠️ Vision failed: {resp.status_code} - {resp.text[:200]}")
	return ""
	except Exception as e:
	print(f" ⚠️ Vision error: {e}")
	return ""


	# ==========================================
	# TOOLS
	# ==========================================

	def fetch_webpage(url: str, timeout: int = 15) -> str:
	"""Fetch and extract text from a webpage."""
	try:
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
	"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8",
	"Accept-Language": "en-US,en;q=0.5",
	}
	resp = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True)
	resp.raise_for_status()

	soup = BeautifulSoup(resp.text, "html.parser")

	# Remove unwanted elements
	for el in soup(["script", "style", "nav", "footer", "header", "aside", "noscript", "iframe", "form"]):
	el.extract()

	# Try to get main content first
	main_content = soup.find("main") or soup.find("article") or soup.find("div", {"class": re.compile(r"content\|main\|article", re.I)})
	if main_content:
	text = main_content.get_text("\n", strip=True)
	else:
	text = soup.get_text("\n", strip=True)

	lines = [l.strip() for l in text.splitlines() if l.strip() and len(l.strip()) > 2]
	return "\n".join(lines)[:10000]
	except Exception as e:
	print(f" ⚠️ Webpage fetch error: {e}")
	return ""


	def fetch_youtube_transcript(url: str) -> str:
	"""Fetch YouTube video transcript with multiple fallback methods."""
	try:
	from youtube_transcript_api import YouTubeTranscriptApi

	# Extract video ID
	patterns = [
	r"(?:v=\|/v/\|youtu\.be/\|embed/\|shorts/)([a-zA-Z0-9_-]{11})",
	r"^([a-zA-Z0-9_-]{11})$"
	]
	vid = None
	for pattern in patterns:
	match = re.search(pattern, url)
	if match:
	vid = match.group(1)
	break

	if not vid:
	print(f" ⚠️ Could not extract video ID from: {url}")
	return ""

	print(f" 📺 Video ID: {vid}")

	# Create API instance (new API style)
	ytt_api = YouTubeTranscriptApi()

	# Try multiple language options
	lang_options = [
	("en",),
	("en", "en-US", "en-GB"),
	("it", "it-IT"),
	("en", "it", "fr", "de", "es", "pt"),
	]

	for langs in lang_options:
	try:
	transcript = ytt_api.fetch(vid, languages=langs)
	# transcript is a FetchedTranscript object, iterate to get snippets
	text = " ".join([snippet.text for snippet in transcript])
	if text:
	print(f" ✓ Got transcript ({len(text)} chars, langs: {langs})")
	return text[:8000]
	except Exception as e:
	continue

	# Try listing all transcripts and fetching any available
	try:
	transcript_list = ytt_api.list(vid)

	# Try manually created first
	for t in transcript_list:
	if not t.is_generated:
	try:
	fetched = t.fetch()
	text = " ".join([snippet.text for snippet in fetched])
	if text:
	print(f" ✓ Got manual transcript ({len(text)} chars)")
	return text[:8000]
	except:
	pass

	# Then auto-generated
	for t in transcript_list:
	if t.is_generated:
	try:
	fetched = t.fetch()
	text = " ".join([snippet.text for snippet in fetched])
	if text:
	print(f" ✓ Got auto transcript ({len(text)} chars)")
	return text[:8000]
	except:
	pass

	# Try translated
	for t in transcript_list:
	try:
	translated = t.translate('en')
	fetched = translated.fetch()
	text = " ".join([snippet.text for snippet in fetched])
	if text:
	print(f" ✓ Got translated transcript ({len(text)} chars)")
	return text[:8000]
	except:
	pass

	except Exception as e:
	print(f" ⚠️ Transcript list error: {e}")

	return ""
	except ImportError:
	print(" ⚠️ youtube_transcript_api not installed")
	return ""
	except Exception as e:
	print(f" ⚠️ YouTube error: {e}")
	return ""


	def fetch_task_file(task_id: str) -> Tuple[str, str, Optional[bytes]]:
	"""Fetch and parse attached file for a task. Returns (content_str, file_type, raw_bytes_for_media)."""
	try:
	url = f"{DEFAULT_API_URL}/files/{task_id}"
	resp = requests.get(url, timeout=30)

	if resp.status_code == 404:
	return "", "none", None
	if resp.status_code != 200:
	print(f" ⚠️ File fetch failed: {resp.status_code}")
	return "", "none", None

	ct = resp.headers.get("Content-Type", "").lower()
	cd = resp.headers.get("Content-Disposition", "")

	# Extract filename
	filename = ""
	if "filename=" in cd:
	filename = cd.split("filename=")[-1].strip('" ')
	ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""

	print(f" 📎 File: {filename or 'unknown'}, type: {ct[:50]}")

	# Text/Code files
	if any(t in ct for t in ["text/", "json", "javascript", "python"]) or ext in ["txt", "csv", "json", "py", "md", "js", "html"]:
	text = resp.text

	# CSV parsing
	if ext == "csv" or "csv" in ct:
	try:
	df = pd.read_csv(io.StringIO(text))
	summary = f"CSV file with {len(df)} rows and columns: {list(df.columns)}\n"
	summary += f"Data:\n{df.to_string()}"
	return summary[:8000], "csv", None
	except Exception as e:
	print(f" ⚠️ CSV parse error: {e}")

	# Python code
	if ext == "py":
	return f"Python code:\n```python\n{text[:6000]}\n```", "python", None

	return text[:8000], "text", None

	# Excel files
	if "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
	try:
	df = pd.read_excel(io.BytesIO(resp.content), engine="openpyxl")
	summary = f"Excel file with {len(df)} rows and columns: {list(df.columns)}\n"
	summary += f"Data:\n{df.to_string()}"
	return summary[:8000], "excel", None
	except Exception as e:
	print(f" ⚠️ Excel parse error: {e}")
	try:
	df = pd.read_excel(io.BytesIO(resp.content))
	summary = f"Excel file with {len(df)} rows and columns: {list(df.columns)}\n"
	summary += f"Data:\n{df.to_string()}"
	return summary[:8000], "excel", None
	except:
	return "Excel file (could not parse)", "excel", None

	# PDF files
	if "pdf" in ct or ext == "pdf":
	try:
	import PyPDF2
	reader = PyPDF2.PdfReader(io.BytesIO(resp.content))
	text_parts = []
	for i, page in enumerate(reader.pages):
	page_text = page.extract_text() or ""
	if page_text:
	text_parts.append(f"--- Page {i+1} ---\n{page_text}")
	text = "\n".join(text_parts)
	return text[:8000] if text else "PDF (no extractable text)", "pdf", None
	except ImportError:
	print(" ⚠️ PyPDF2 not installed")
	return "PDF file (PyPDF2 not available)", "pdf", None
	except Exception as e:
	print(f" ⚠️ PDF parse error: {e}")
	return "PDF file (parse error)", "pdf", None

	# Audio files - return raw bytes for transcription
	if "audio" in ct or ext in ["mp3", "wav", "m4a", "ogg", "flac"]:
	size_kb = len(resp.content) / 1024
	print(f" 🎵 Audio file detected ({size_kb:.1f} KB) - will transcribe")
	return f"Audio file ({ext or 'unknown'}, {size_kb:.1f} KB)", "audio", resp.content

	# Image files - return raw bytes for vision analysis
	if "image" in ct or ext in ["png", "jpg", "jpeg", "gif", "webp", "bmp"]:
	size_kb = len(resp.content) / 1024
	print(f" 🖼️ Image file detected ({size_kb:.1f} KB) - will analyze")
	return f"Image file ({ext or 'unknown'}, {size_kb:.1f} KB)", "image", resp.content

	# Try to decode as text
	try:
	text = resp.content.decode("utf-8")
	return text[:8000], "text", None
	except:
	try:
	text = resp.content.decode("latin-1")
	return text[:8000], "text", None
	except:
	return f"Binary file ({ct or 'unknown type'}, {len(resp.content)} bytes)", "binary", None

	except requests.exceptions.Timeout:
	print(" ⚠️ File fetch timeout")
	return "", "none", None
	except Exception as e:
	print(f" ⚠️ File fetch error: {e}")
	return "", "none", None


	def web_search(query: str, max_results: int = 5) -> List[Dict[str, str]]:
	"""Search the web and return results."""
	results = []

	# Try ddgs package (new name)
	try:
	from ddgs import DDGS
	ddgs = DDGS()
	for r in ddgs.text(query, max_results=max_results):
	results.append({
	"title": r.get("title", ""),
	"body": r.get("body", ""),
	"href": r.get("href", "")
	})
	if results:
	print(f" 🔍 ddgs found {len(results)} results")
	return results
	except ImportError:
	pass
	except Exception as e:
	print(f" ⚠️ ddgs error: {e}")

	# Fallback: try duckduckgo-search package
	try:
	from duckduckgo_search import DDGS
	with DDGS() as ddgs:
	for r in ddgs.text(query, max_results=max_results):
	results.append({
	"title": r.get("title", ""),
	"body": r.get("body", ""),
	"href": r.get("href", "")
	})
	if results:
	print(f" 🔍 DDG found {len(results)} results")
	return results
	except ImportError:
	print(" ⚠️ duckduckgo-search not installed")
	except Exception as e:
	print(f" ⚠️ DDG error: {e}")

	return results


	def search_wikipedia(query: str) -> str:
	"""Search Wikipedia and return article content."""
	try:
	headers = {
	"User-Agent": "GAIAAgent/1.0 (https://huggingface.co/spaces; contact@example.com)"
	}

	# Search for article
	search_url = "https://en.wikipedia.org/w/api.php"
	params = {
	"action": "query",
	"list": "search",
	"srsearch": query,
	"format": "json",
	"srlimit": 3
	}
	resp = requests.get(search_url, params=params, headers=headers, timeout=10)

	if resp.status_code != 200:
	print(f" ⚠️ Wikipedia search HTTP {resp.status_code}")
	return ""

	data = resp.json()

	results = data.get("query", {}).get("search", [])
	if not results:
	return ""

	# Get the first article
	title = results[0]["title"]

	# Fetch article content using REST API
	encoded_title = requests.utils.quote(title.replace(' ', '_'))
	content_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{encoded_title}"
	resp = requests.get(content_url, headers=headers, timeout=10)

	if resp.status_code == 200:
	article = resp.json()
	extract = article.get("extract", "")
	if extract:
	print(f" 📖 Wikipedia: {title}")
	return f"Wikipedia - {title}:\n{extract}"

	return ""
	except requests.exceptions.Timeout:
	print(f" ⚠️ Wikipedia timeout")
	return ""
	except Exception as e:
	print(f" ⚠️ Wikipedia error: {e}")
	return ""


	# ==========================================
	# GROQ LLM
	# ==========================================

	def ask_groq(messages: List[Dict], groq_key: str, max_tokens: int = 400, temperature: float = 0.1, model: str = None) -> str:
	"""Send request to Groq API with retries and model fallback."""
	if not groq_key:
	print(" ❌ GROQ_API_KEY is empty!")
	return ""

	# Use specified model or try all models in order
	models_to_try = [model] if model else GROQ_MODELS

	for model_name in models_to_try:
	for attempt in range(2): # 2 attempts per model
	try:
	resp = requests.post(
	GROQ_API,
	headers={
	"Authorization": f"Bearer {groq_key}",
	"Content-Type": "application/json"
	},
	json={
	"model": model_name,
	"messages": messages,
	"temperature": temperature,
	"max_tokens": max_tokens,
	},
	timeout=60,
	)

	if resp.status_code == 200:
	result = resp.json()
	content = result.get("choices", [{}])[0].get("message", {}).get("content", "")
	if content:
	print(f" 📝 [{model_name}] Response: {content[:80]}...")
	return content.strip()
	else:
	print(f" ⚠️ [{model_name}] Empty content")
	elif resp.status_code == 429:
	wait_time = 10 * (attempt + 1)
	print(f" ⏳ [{model_name}] Rate limited, waiting {wait_time}s...")
	time.sleep(wait_time)
	elif resp.status_code == 401:
	print(f" ❌ Groq API key invalid!")
	return ""
	elif resp.status_code == 404:
	print(f" ⚠️ Model {model_name} not found, trying next...")
	break # Try next model
	else:
	print(f" ⚠️ [{model_name}] HTTP {resp.status_code}: {resp.text[:200]}")
	time.sleep(3)
	except requests.exceptions.Timeout:
	print(f" ⚠️ [{model_name}] Timeout (attempt {attempt + 1}/2)")
	time.sleep(5)
	except Exception as e:
	print(f" ⚠️ [{model_name}] Error: {type(e).__name__}: {e}")
	time.sleep(3)

	print(" ❌ All Groq attempts failed")
	return ""


	# ==========================================
	# TEXT PROCESSING
	# ==========================================

	def preprocess_question(question: str) -> str:
	"""Handle reversed or scrambled text."""
	stripped = question.strip()

	# Check for reversed text
	reversed_text = stripped[::-1]

	# Keywords that indicate proper English text
	keywords = ["answer", "what", "who", "how", "find", "list", "which", "where",
	"when", "the", "is", "are", "was", "were", "has", "have", "this",
	"that", "from", "with", "about", "question", "video", "image",
	"write", "opposite", "sentence", "if", "you", "understand"]

	orig_score = sum(1 for w in keywords if w in stripped.lower())
	rev_score = sum(1 for w in keywords if w in reversed_text.lower())

	print(f" 📊 Text analysis: orig_keywords={orig_score}, rev_keywords={rev_score}")

	# If reversed text has more keywords, use it
	if rev_score > orig_score + 1 and len(stripped) > 20:
	print(f" 🔄 Detected reversed text!")
	print(f" 📝 Reversed: {reversed_text[:100]}...")
	return reversed_text

	# Also check if text starts with punctuation (common in reversed text)
	if stripped and stripped[0] in '.!?,;:' and rev_score >= orig_score:
	print(f" 🔄 Text starts with punctuation, trying reversed")
	print(f" 📝 Reversed: {reversed_text[:100]}...")
	return reversed_text

	return stripped


	def clean_answer(raw: str) -> str:
	"""Extract and clean the final answer from LLM response."""
	if not raw:
	return ""

	answer = raw.strip()

	# Take first non-empty line
	for line in answer.split("\n"):
	line = line.strip()
	if line and not line.startswith("#"):
	answer = line
	break

	# Remove common prefixes (case-insensitive)
	prefixes = [
	"the answer is:", "the answer is", "answer:", "answer is:",
	"final answer:", "final answer is:", "the final answer is:",
	"the correct answer is:", "the correct answer is",
	"result:", "the result is:",
	"based on my analysis,", "based on my analysis",
	"based on the", "according to",
	"sure,", "here is", "here's", "i found that"
	]

	# Apply prefix removal iteratively
	changed = True
	max_iterations = 10
	iterations = 0
	while changed and iterations < max_iterations:
	changed = False
	iterations += 1
	answer_lower = answer.lower()
	for prefix in prefixes:
	if answer_lower.startswith(prefix):
	answer = answer[len(prefix):].strip()
	changed = True
	break

	# Remove trailing punctuation (period, comma, etc.)
	# But preserve decimal numbers like "3.14"
	while answer and answer[-1] in '.,:;!':
	char = answer[-1]
	if char == '.' and len(answer) >= 2:
	# Check if this is a decimal number (has digits on both sides of a period)
	before = answer[:-1]
	# If there's already a period in the string AND it's followed by digits, it's a decimal
	if '.' in before:
	# e.g., "3.14." - the inner period is decimal, outer is punctuation
	answer = answer[:-1].strip()
	elif before and before[-1].isdigit():
	# Could be end of integer "42." or a decimal "3.14"
	# Check if there are non-digit chars (indicating it's just "42." not "3.14")
	# A pure decimal would be all digits and one period
	test_str = before.lstrip('-') # Handle negative numbers
	if test_str.isdigit():
	# It's just an integer with a period, remove the period
	answer = answer[:-1].strip()
	else:
	# Might have letters or other chars, remove period
	answer = answer[:-1].strip()
	else:
	answer = answer[:-1].strip()
	else:
	answer = answer[:-1].strip()

	# Clean up formatting
	answer = answer.replace("**", "").strip('"\'`')

	return answer.strip()


	def is_valid_answer(answer: str) -> bool:
	"""Check if an answer is valid (not a refusal or error)."""
	if not answer or len(answer.strip()) < 1:
	return False

	# If answer is too long, it's probably not a direct answer
	if len(answer) > 150:
	print(f" ⚠️ Answer too long ({len(answer)} chars), likely not a direct answer")
	return False

	# Check for refusal phrases at the START of the answer
	refusal_starts = [
	"no image", "no information", "no transcript", "no data",
	"i do not", "i don't", "i cannot", "i can't", "i am not able",
	"unable to", "cannot determine", "not able to",
	"without access", "i'm not sure", "i am unable",
	"there is no", "there's no", "no file", "no video"
	]

	answer_lower = answer.lower().strip()
	for phrase in refusal_starts:
	if answer_lower.startswith(phrase):
	print(f" ⚠️ Answer starts with refusal: '{phrase}'")
	return False

	invalid_phrases = [
	"i don't know", "i dont know", "i do not know",
	"n/a", "error",
	"i cannot", "i can't", "i cant",
	"not available", "no answer", "unable to",
	"i'm not sure", "im not sure", "i am not sure",
	"no image", "cannot determine", "insufficient information",
	"not provided", "cannot access", "i'm unable", "i am unable",
	"not able to", "i am not able", "however,", "based on typical",
	"without access", "no transcript", "no information"
	]

	return not any(phrase in answer_lower for phrase in invalid_phrases)


	# ==========================================
	# MAIN SOLVER
	# ==========================================

	SYSTEM_PROMPT = """Answer the question with ONLY the final answer. No explanation.

	Format:
	- Numbers: just the number (e.g., 5)
	- Names: just the name (e.g., John Smith)
	- Words: just the word (e.g., right)
	- Lists: comma-separated (e.g., a, b, c)

	IMPORTANT:
	- If counting items from a list or table, count carefully and give the exact number
	- If asked for opposite of a word, give that opposite word
	- Always give your best answer, never refuse"""


	def is_simple_question(question: str) -> bool:
	"""Check if question is simple enough to answer without web search."""
	q_lower = question.lower()
	# Simple questions about opposites, basic facts, math
	simple_patterns = [
	"opposite of", "antonym of", "what is the opposite",
	"write the opposite", "2+2", "2 + 2",
	]
	return any(p in q_lower for p in simple_patterns) and len(question) < 200


	def solve_question(question: str, task_id: str, groq_key: str) -> str:
	"""Main function to solve a GAIA question."""
	print(f"\n[Q]: {question[:150]}{'...' if len(question) > 150 else ''}")

	# Preprocess the question
	processed_q = preprocess_question(question)
	context_parts = []

	# Check if it's a simple question that doesn't need web search
	if is_simple_question(processed_q):
	print(" ⚡ Simple question detected, answering directly")
	answer_raw = ask_groq([
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": f"Answer this directly: {processed_q}"}
	], groq_key, max_tokens=50, temperature=0.0)
	answer = clean_answer(answer_raw) if answer_raw else ""
	if answer and is_valid_answer(answer):
	print(f" ✅ Direct answer: {answer}")
	return answer

	# 1. Check for attached files
	file_content, file_type, file_bytes = fetch_task_file(task_id)
	if file_content and file_type != "none":
	# Handle images with Vision API
	if file_type == "image" and file_bytes:
	print(f" 🖼️ Analyzing image with Vision API...")
	vision_answer = analyze_image(file_bytes, processed_q, groq_key)
	if vision_answer and is_valid_answer(clean_answer(vision_answer)):
	# If vision gives a good answer, use it directly
	answer = clean_answer(vision_answer)
	print(f" ✅ Vision answer: {answer}")
	return answer
	elif vision_answer:
	# Add vision analysis to context
	context_parts.append(f"[IMAGE ANALYSIS]:\n{vision_answer}")

	# Handle audio with Transcription API
	elif file_type == "audio" and file_bytes:
	print(f" 🎵 Transcribing audio with Whisper...")
	transcript = transcribe_audio(file_bytes, groq_key)
	if transcript:
	context_parts.append(f"[AUDIO TRANSCRIPTION]:\n{transcript}")
	print(f" ✅ Got audio transcript ({len(transcript)} chars)")
	else:
	context_parts.append(f"[NOTE: Audio file attached but transcription failed.]")

	# Normal files
	else:
	context_parts.append(f"[ATTACHED FILE - {file_type.upper()}]:\n{file_content}")
	print(f" 📁 Got {file_type} file ({len(file_content)} chars)")

	# 2. Process YouTube URLs
	yt_urls = re.findall(r'https?://(?:www\.)?(?:youtube\.com/watch\?v=\|youtu\.be/\|youtube\.com/shorts/)[^\s\)\]]+', processed_q)
	for yt_url in yt_urls[:2]: # Limit to 2 videos
	clean_url = yt_url.rstrip('.,;:')
	print(f" 🎬 Fetching transcript: {clean_url}")
	transcript = fetch_youtube_transcript(clean_url)
	if transcript:
	context_parts.append(f"[YOUTUBE VIDEO TRANSCRIPT]:\n{transcript}")
	else:
	# Try to search for information about this video
	vid_match = re.search(r'(?:v=\|youtu\.be/)([a-zA-Z0-9_-]{11})', clean_url)
	if vid_match:
	vid_id = vid_match.group(1)
	print(f" 🔍 No transcript, searching for video info: {vid_id}")
	video_results = web_search(f"youtube {vid_id} video content summary", max_results=3)
	if video_results:
	snippets = "\n".join([f"• {r.get('title', '')}: {r.get('body', '')}" for r in video_results])
	context_parts.append(f"[YOUTUBE VIDEO INFO (no transcript available)]:\nVideo URL: {clean_url}\nSearch results about this video:\n{snippets}")
	else:
	context_parts.append(f"[YOUTUBE VIDEO]: {clean_url} - No transcript or info available.")
	else:
	context_parts.append(f"[YOUTUBE VIDEO]: {clean_url} - Could not process.")

	# 3. Process other URLs
	other_urls = re.findall(r'https?://[^\s\)\]]+', processed_q)
	other_urls = [u.rstrip('.,;:') for u in other_urls
	if "youtube.com" not in u and "youtu.be" not in u]

	for url in other_urls[:2]: # Limit to 2 URLs
	print(f" 🌐 Fetching page: {url[:60]}...")
	page_content = fetch_webpage(url)
	if page_content:
	context_parts.append(f"[WEBPAGE: {url}]:\n{page_content}")

	# 4. Web search for additional context
	# Skip search if we have good file data (Excel/CSV with actual data)
	should_search = True
	if file_type in ["excel", "csv"] and len(file_content) > 500:
	should_search = False # We have data to analyze
	print(" ⏭️ Skipping search - using file data")

	if should_search and not yt_urls:
	# Generate search query
	search_query = processed_q[:200] if len(processed_q) < 200 else processed_q[:200]

	# Try to extract key terms for search
	query_prompt = ask_groq([
	{"role": "system", "content": "Extract the key search terms from this question. Output ONLY the search query (3-8 words), nothing else."},
	{"role": "user", "content": processed_q[:400]}
	], groq_key, max_tokens=30, temperature=0.0)

	if query_prompt and len(query_prompt) < 100 and len(query_prompt) > 3:
	search_query = query_prompt

	print(f" 🔍 Searching: '{search_query[:50]}'")

	# Try web search
	results = web_search(search_query, max_results=5)

	if results:
	# Add search snippets - these are often the most useful
	snippets = "\n".join([f"• {r.get('title', '')}: {r.get('body', '')}" for r in results])
	context_parts.append(f"[SEARCH RESULTS]:\n{snippets}")

	# Fetch Wikipedia page if in results (most reliable)
	wiki_fetched = False
	for r in results:
	href = r.get("href", "")
	if "wikipedia.org" in href and not wiki_fetched:
	page = fetch_webpage(href)
	if page and len(page) > 500:
	context_parts.append(f"[WIKIPEDIA PAGE]:\n{page[:6000]}")
	wiki_fetched = True
	print(f" 📖 Fetched Wikipedia: {href[:50]}")
	break

	# If no Wikipedia, fetch first non-wiki result
	if not wiki_fetched:
	for r in results[:2]:
	href = r.get("href", "")
	if href and "youtube" not in href:
	page = fetch_webpage(href)
	if page and len(page) > 300:
	context_parts.append(f"[WEB PAGE]:\n{page[:4000]}")
	print(f" 🌐 Fetched: {href[:50]}")
	break

	# Also try direct Wikipedia search
	wiki_content = search_wikipedia(search_query)
	if wiki_content and "[WIKIPEDIA PAGE]" not in str(context_parts):
	context_parts.append(f"[WIKIPEDIA]:\n{wiki_content}")

	# 5. Build context and query LLM
	context = "\n\n".join(context_parts) if context_parts else ""

	# Truncate context if too long
	if len(context) > 12000:
	context = context[:12000] + "\n[...truncated]"

	# Check if this is a counting/analysis question
	is_counting_q = any(w in processed_q.lower() for w in ['how many', 'count', 'number of', 'total'])
	is_list_q = any(w in processed_q.lower() for w in ['list', 'name all', 'what are'])

	# First attempt with context - use 2-step for complex questions
	if context and (is_counting_q or is_list_q):
	# Step 1: Extract relevant data
	extract_prompt = f"""From this context, extract ONLY the specific information needed to answer the question.

	Context: {context[:8000]}

	Question: {processed_q}

	List the relevant facts (be brief):"""

	extracted = ask_groq([
	{"role": "user", "content": extract_prompt}
	], groq_key, max_tokens=500, temperature=0.0)

	if extracted:
	print(f" 📋 Extracted: {extracted[:150]}...")
	# Step 2: Answer based on extracted info
	answer_raw = ask_groq([
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": f"Based on these facts:\n{extracted}\n\nQuestion: {processed_q}\n\nFinal answer (just the answer, nothing else):"}
	], groq_key, max_tokens=100, temperature=0.0)
	else:
	answer_raw = ""
	elif context:
	messages = [
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": f"Context:\n{context}\n\nQuestion: {processed_q}\n\nAnswer:"}
	]
	answer_raw = ask_groq(messages, groq_key, max_tokens=100, temperature=0.1)
	else:
	messages = [
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": f"Question: {processed_q}\n\nAnswer:"}
	]
	answer_raw = ask_groq(messages, groq_key, max_tokens=100, temperature=0.1)

	answer = clean_answer(answer_raw) if answer_raw else ""

	print(f" 📤 Raw: '{answer_raw[:100] if answer_raw else '[empty]'}' -> Clean: '{answer}'")

	# If answer isn't valid, try again with more forceful prompt
	if not is_valid_answer(answer):
	print(f" ⚠️ First attempt invalid: '{answer}', retrying...")

	# More forceful prompt
	retry_messages = [
	{"role": "system", "content": "Give ONLY the answer. One word or number if possible."},
	{"role": "user", "content": f"{processed_q}"}
	]
	answer_raw = ask_groq(retry_messages, groq_key, max_tokens=50, temperature=0.2)
	answer = clean_answer(answer_raw) if answer_raw else ""
	print(f" 📤 Retry: '{answer}'")

	# If still not valid, try one more time with knowledge-based approach
	if not is_valid_answer(answer):
	print(f" ⚠️ Second attempt invalid: '{answer}', trying knowledge-based...")

	retry_messages = [
	{"role": "system", "content": "Give ONLY the answer, nothing else. Best guess if unsure."},
	{"role": "user", "content": processed_q}
	]
	answer_raw = ask_groq(retry_messages, groq_key, max_tokens=50, temperature=0.5)
	answer = clean_answer(answer_raw) if answer_raw else ""
	print(f" 📤 Third try raw: '{answer_raw[:100] if answer_raw else '[empty]'}' -> Clean: '{answer}'")

	# If still no valid answer but we have some text, extract first meaningful chunk
	if not answer or len(answer.strip()) == 0 or not is_valid_answer(answer):
	if answer_raw and len(answer_raw.strip()) > 0:
	# Try to extract just the answer part
	lines = answer_raw.strip().split('\n')
	for line in lines:
	line = line.strip()
	if line and len(line) < 100 and not any(x in line.lower() for x in ['cannot', "don't know", 'unable', 'no image']):
	answer = clean_answer(line)
	print(f" 🔄 Extracted from response: '{answer}'")
	break

	# Absolute final fallback
	if not answer or len(answer.strip()) == 0:
	answer = "unknown"
	print(f" ❌ No answer found, defaulting to 'unknown'")

	print(f" ✅ Final Answer: {answer}")
	return answer


	# ==========================================
	# GRADIO INTERFACE
	# ==========================================

	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	"""Run the agent on all questions and submit answers."""
	space_id = os.getenv("SPACE_ID", "")

	if not profile:
	return "Effettua il login con Hugging Face per continuare.", None

	username = profile.username
	groq_key = os.getenv("GROQ_API_KEY", "")

	if not groq_key:
	return "❌ GROQ_API_KEY non configurata! Aggiungi la chiave nelle impostazioni dello Space.", None

	print(f"\n{'='*60}")
	print(f"👤 User: {username}")
	print(f"🤖 Agent: GAIA Agent v5")
	print(f"🔑 API Key: {groq_key[:8]}...{groq_key[-4:]}")
	print(f"{'='*60}")

	# Test Groq API connectivity first
	print("\n🔍 Testing Groq API connectivity...")
	test_response = ask_groq(
	[{"role": "user", "content": "Say 'OK' and nothing else."}],
	groq_key, max_tokens=10, temperature=0.0
	)
	if not test_response:
	return "❌ Groq API test failed! Check your API key and try again.", None
	print(f"✅ Groq API test passed: '{test_response}'")

	# Fetch questions
	try:
	resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
	resp.raise_for_status()
	questions = resp.json()
	except Exception as e:
	return f"❌ Errore nel recupero delle domande: {e}", None

	print(f"\n📋 {len(questions)} domande da processare\n")

	results = []
	answers = []
	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""

	for i, item in enumerate(questions):
	task_id = item.get("task_id", "")
	q = item.get("question")

	if not task_id or q is None:
	print(f"[{i+1}] Skipping invalid item")
	continue

	print(f"\n{'─'*60}")
	print(f"[{i+1}/{len(questions)}] Task: {task_id[:20]}...")

	try:
	answer = solve_question(q, task_id, groq_key)
	except Exception as e:
	print(f" 💥 Exception: {e}")
	traceback.print_exc()
	answer = "I don't know"

	answers.append({
	"task_id": task_id,
	"submitted_answer": answer
	})
	results.append({
	"Task ID": task_id[:20] + "...",
	"Question": q[:80] + ("..." if len(q) > 80 else ""),
	"Answer": answer
	})

	# Rate limit protection - increase delay between questions
	time.sleep(2.5)

	if not answers:
	return "❌ Nessuna risposta generata.", pd.DataFrame(results)

	# Submit answers
	print(f"\n{'='*60}")
	print(f"📤 Submitting {len(answers)} answers...")

	try:
	submit_resp = requests.post(
	f"{DEFAULT_API_URL}/submit",
	json={
	"username": username,
	"agent_code": agent_code,
	"answers": answers
	},
	timeout=60,
	)
	submit_resp.raise_for_status()
	result = submit_resp.json()

	score = result.get('score', 'N/A')
	correct = result.get('correct_count', '?')
	total = result.get('total_attempted', '?')
	message = result.get('message', '')

	status = f"""✅ Completato!
	👤 {result.get('username')}
	🏆 {score}% ({correct}/{total})
	📝 {message}"""

	print(f"\n{status}")
	return status, pd.DataFrame(results)

	except Exception as e:
	error_msg = f"❌ Errore nell'invio: {e}"
	print(error_msg)
	return error_msg, pd.DataFrame(results)


	def create_demo():
	"""Build and return the Gradio interface."""
	with gr.Blocks(title="GAIA Agent v5") as demo:
	gr.Markdown("""# 🚀 GAIA Agent v5

	Full-featured agent with Vision & Audio!
	- 🧠 Groq Llama 3.3 70B for reasoning
	- 👁️ Llama 3.2 Vision for image analysis
	- 🎤 Whisper for audio transcription
	- 🔍 Smart web search + Wikipedia
	- 📺 YouTube transcript extraction
	- 📁 File parsing (CSV, Excel, PDF, Python)
	""")

	gr.LoginButton()

	run_button = gr.Button("🔥 Avvia Valutazione", variant="primary", size="lg")

	status_output = gr.Textbox(
	label="Risultato",
	lines=6,
	interactive=False
	)

	results_table = gr.DataFrame(
	label="Risposte",
	wrap=True
	)

	run_button.click(
	fn=run_and_submit_all,
	outputs=[status_output, results_table]
	)

	return demo


	if __name__ == "__main__":
	demo = create_demo()
	demo.queue(default_concurrency_limit=1).launch(debug=True, share=False)