Final_Assignment_Template

Sleeping

File size: 43,212 Bytes

"""
GAIA Agent v5 — With Vision and Audio Transcription!
Target: 40%+ (8+/20)
"""
import os
import re
import io
import time
import base64
import traceback
import gradio as gr
import requests
import pandas as pd
from bs4 import BeautifulSoup
from typing import Optional, Tuple, List, Dict, Any

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
GROQ_API = "https://api.groq.com/openai/v1/chat/completions"
GROQ_AUDIO_API = "https://api.groq.com/openai/v1/audio/transcriptions"

# Models to try in order of preference
GROQ_MODELS = [
    "llama-3.3-70b-versatile",
    "llama-3.1-70b-versatile", 
    "mixtral-8x7b-32768",
]

GROQ_VISION_MODEL = "llama-3.2-90b-vision-preview"
GROQ_AUDIO_MODEL = "whisper-large-v3"

# ==========================================
# VISION & AUDIO TOOLS
# ==========================================

def transcribe_audio(audio_bytes: bytes, groq_key: str, filename: str = "audio.mp3") -> str:
    """Transcribe audio using Groq Whisper API."""
    if not groq_key or not audio_bytes:
        return ""
    
    try:
        print(f"    🎤 Transcribing audio ({len(audio_bytes)/1024:.1f} KB)...")
        
        files = {
            'file': (filename, audio_bytes, 'audio/mpeg'),
            'model': (None, GROQ_AUDIO_MODEL),
        }
        
        resp = requests.post(
            GROQ_AUDIO_API,
            headers={"Authorization": f"Bearer {groq_key}"},
            files=files,
            timeout=60,
        )
        
        if resp.status_code == 200:
            result = resp.json()
            text = result.get("text", "")
            print(f"    ✅ Transcribed: {text[:100]}...")
            return text
        else:
            print(f"    ⚠️ Audio transcription failed: {resp.status_code} - {resp.text[:200]}")
            return ""
    except Exception as e:
        print(f"    ⚠️ Audio transcription error: {e}")
        return ""


def analyze_image(image_bytes: bytes, question: str, groq_key: str) -> str:
    """Analyze image using Groq Vision API."""
    if not groq_key or not image_bytes:
        return ""
    
    try:
        print(f"    🖼️ Analyzing image ({len(image_bytes)/1024:.1f} KB)...")
        
        # Convert to base64
        image_b64 = base64.b64encode(image_bytes).decode('utf-8')
        
        # Detect image type
        if image_bytes[:8] == b'\x89PNG\r\n\x1a\n':
            mime_type = "image/png"
        elif image_bytes[:2] == b'\xff\xd8':
            mime_type = "image/jpeg"
        elif image_bytes[:6] in (b'GIF87a', b'GIF89a'):
            mime_type = "image/gif"
        else:
            mime_type = "image/png"  # default
        
        messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"Look at this image and answer the question precisely. Give ONLY the answer, no explanation.\n\nQuestion: {question}"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:{mime_type};base64,{image_b64}"
                        }
                    }
                ]
            }
        ]
        
        resp = requests.post(
            GROQ_API,
            headers={
                "Authorization": f"Bearer {groq_key}",
                "Content-Type": "application/json"
            },
            json={
                "model": GROQ_VISION_MODEL,
                "messages": messages,
                "temperature": 0.1,
                "max_tokens": 300,
            },
            timeout=60,
        )
        
        if resp.status_code == 200:
            result = resp.json()
            answer = result.get("choices", [{}])[0].get("message", {}).get("content", "")
            print(f"    ✅ Vision response: {answer[:100]}...")
            return answer
        else:
            print(f"    ⚠️ Vision failed: {resp.status_code} - {resp.text[:200]}")
            return ""
    except Exception as e:
        print(f"    ⚠️ Vision error: {e}")
        return ""


# ==========================================
# TOOLS
# ==========================================

def fetch_webpage(url: str, timeout: int = 15) -> str:
    """Fetch and extract text from a webpage."""
    try:
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.5",
        }
        resp = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True)
        resp.raise_for_status()
        
        soup = BeautifulSoup(resp.text, "html.parser")
        
        # Remove unwanted elements
        for el in soup(["script", "style", "nav", "footer", "header", "aside", "noscript", "iframe", "form"]):
            el.extract()
        
        # Try to get main content first
        main_content = soup.find("main") or soup.find("article") or soup.find("div", {"class": re.compile(r"content|main|article", re.I)})
        if main_content:
            text = main_content.get_text("\n", strip=True)
        else:
            text = soup.get_text("\n", strip=True)
        
        lines = [l.strip() for l in text.splitlines() if l.strip() and len(l.strip()) > 2]
        return "\n".join(lines)[:10000]
    except Exception as e:
        print(f"    ⚠️ Webpage fetch error: {e}")
        return ""


def fetch_youtube_transcript(url: str) -> str:
    """Fetch YouTube video transcript with multiple fallback methods."""
    try:
        from youtube_transcript_api import YouTubeTranscriptApi
        
        # Extract video ID
        patterns = [
            r"(?:v=|/v/|youtu\.be/|embed/|shorts/)([a-zA-Z0-9_-]{11})",
            r"^([a-zA-Z0-9_-]{11})$"
        ]
        vid = None
        for pattern in patterns:
            match = re.search(pattern, url)
            if match:
                vid = match.group(1)
                break
        
        if not vid:
            print(f"    ⚠️ Could not extract video ID from: {url}")
            return ""
        
        print(f"    📺 Video ID: {vid}")
        
        # Create API instance (new API style)
        ytt_api = YouTubeTranscriptApi()
        
        # Try multiple language options
        lang_options = [
            ("en",),
            ("en", "en-US", "en-GB"),
            ("it", "it-IT"),
            ("en", "it", "fr", "de", "es", "pt"),
        ]
        
        for langs in lang_options:
            try:
                transcript = ytt_api.fetch(vid, languages=langs)
                # transcript is a FetchedTranscript object, iterate to get snippets
                text = " ".join([snippet.text for snippet in transcript])
                if text:
                    print(f"    ✓ Got transcript ({len(text)} chars, langs: {langs})")
                    return text[:8000]
            except Exception as e:
                continue
        
        # Try listing all transcripts and fetching any available
        try:
            transcript_list = ytt_api.list(vid)
            
            # Try manually created first
            for t in transcript_list:
                if not t.is_generated:
                    try:
                        fetched = t.fetch()
                        text = " ".join([snippet.text for snippet in fetched])
                        if text:
                            print(f"    ✓ Got manual transcript ({len(text)} chars)")
                            return text[:8000]
                    except:
                        pass
            
            # Then auto-generated
            for t in transcript_list:
                if t.is_generated:
                    try:
                        fetched = t.fetch()
                        text = " ".join([snippet.text for snippet in fetched])
                        if text:
                            print(f"    ✓ Got auto transcript ({len(text)} chars)")
                            return text[:8000]
                    except:
                        pass
            
            # Try translated
            for t in transcript_list:
                try:
                    translated = t.translate('en')
                    fetched = translated.fetch()
                    text = " ".join([snippet.text for snippet in fetched])
                    if text:
                        print(f"    ✓ Got translated transcript ({len(text)} chars)")
                        return text[:8000]
                except:
                    pass
                    
        except Exception as e:
            print(f"    ⚠️ Transcript list error: {e}")
        
        return ""
    except ImportError:
        print("    ⚠️ youtube_transcript_api not installed")
        return ""
    except Exception as e:
        print(f"    ⚠️ YouTube error: {e}")
        return ""


def fetch_task_file(task_id: str) -> Tuple[str, str, Optional[bytes]]:
    """Fetch and parse attached file for a task. Returns (content_str, file_type, raw_bytes_for_media)."""
    try:
        url = f"{DEFAULT_API_URL}/files/{task_id}"
        resp = requests.get(url, timeout=30)
        
        if resp.status_code == 404:
            return "", "none", None
        if resp.status_code != 200:
            print(f"    ⚠️ File fetch failed: {resp.status_code}")
            return "", "none", None
        
        ct = resp.headers.get("Content-Type", "").lower()
        cd = resp.headers.get("Content-Disposition", "")
        
        # Extract filename
        filename = ""
        if "filename=" in cd:
            filename = cd.split("filename=")[-1].strip('" ')
        ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
        
        print(f"    📎 File: {filename or 'unknown'}, type: {ct[:50]}")

        # Text/Code files
        if any(t in ct for t in ["text/", "json", "javascript", "python"]) or ext in ["txt", "csv", "json", "py", "md", "js", "html"]:
            text = resp.text
            
            # CSV parsing
            if ext == "csv" or "csv" in ct:
                try:
                    df = pd.read_csv(io.StringIO(text))
                    summary = f"CSV file with {len(df)} rows and columns: {list(df.columns)}\n"
                    summary += f"Data:\n{df.to_string()}"
                    return summary[:8000], "csv", None
                except Exception as e:
                    print(f"    ⚠️ CSV parse error: {e}")
            
            # Python code
            if ext == "py":
                return f"Python code:\n```python\n{text[:6000]}\n```", "python", None
            
            return text[:8000], "text", None

        # Excel files
        if "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
            try:
                df = pd.read_excel(io.BytesIO(resp.content), engine="openpyxl")
                summary = f"Excel file with {len(df)} rows and columns: {list(df.columns)}\n"
                summary += f"Data:\n{df.to_string()}"
                return summary[:8000], "excel", None
            except Exception as e:
                print(f"    ⚠️ Excel parse error: {e}")
                try:
                    df = pd.read_excel(io.BytesIO(resp.content))
                    summary = f"Excel file with {len(df)} rows and columns: {list(df.columns)}\n"
                    summary += f"Data:\n{df.to_string()}"
                    return summary[:8000], "excel", None
                except:
                    return "Excel file (could not parse)", "excel", None

        # PDF files
        if "pdf" in ct or ext == "pdf":
            try:
                import PyPDF2
                reader = PyPDF2.PdfReader(io.BytesIO(resp.content))
                text_parts = []
                for i, page in enumerate(reader.pages):
                    page_text = page.extract_text() or ""
                    if page_text:
                        text_parts.append(f"--- Page {i+1} ---\n{page_text}")
                text = "\n".join(text_parts)
                return text[:8000] if text else "PDF (no extractable text)", "pdf", None
            except ImportError:
                print("    ⚠️ PyPDF2 not installed")
                return "PDF file (PyPDF2 not available)", "pdf", None
            except Exception as e:
                print(f"    ⚠️ PDF parse error: {e}")
                return "PDF file (parse error)", "pdf", None

        # Audio files - return raw bytes for transcription
        if "audio" in ct or ext in ["mp3", "wav", "m4a", "ogg", "flac"]:
            size_kb = len(resp.content) / 1024
            print(f"    🎵 Audio file detected ({size_kb:.1f} KB) - will transcribe")
            return f"Audio file ({ext or 'unknown'}, {size_kb:.1f} KB)", "audio", resp.content

        # Image files - return raw bytes for vision analysis
        if "image" in ct or ext in ["png", "jpg", "jpeg", "gif", "webp", "bmp"]:
            size_kb = len(resp.content) / 1024
            print(f"    🖼️ Image file detected ({size_kb:.1f} KB) - will analyze")
            return f"Image file ({ext or 'unknown'}, {size_kb:.1f} KB)", "image", resp.content

        # Try to decode as text
        try:
            text = resp.content.decode("utf-8")
            return text[:8000], "text", None
        except:
            try:
                text = resp.content.decode("latin-1")
                return text[:8000], "text", None
            except:
                return f"Binary file ({ct or 'unknown type'}, {len(resp.content)} bytes)", "binary", None
                
    except requests.exceptions.Timeout:
        print("    ⚠️ File fetch timeout")
        return "", "none", None
    except Exception as e:
        print(f"    ⚠️ File fetch error: {e}")
        return "", "none", None


def web_search(query: str, max_results: int = 5) -> List[Dict[str, str]]:
    """Search the web and return results."""
    results = []
    
    # Try ddgs package (new name)
    try:
        from ddgs import DDGS
        ddgs = DDGS()
        for r in ddgs.text(query, max_results=max_results):
            results.append({
                "title": r.get("title", ""),
                "body": r.get("body", ""),
                "href": r.get("href", "")
            })
        if results:
            print(f"    🔍 ddgs found {len(results)} results")
            return results
    except ImportError:
        pass
    except Exception as e:
        print(f"    ⚠️ ddgs error: {e}")
    
    # Fallback: try duckduckgo-search package
    try:
        from duckduckgo_search import DDGS
        with DDGS() as ddgs:
            for r in ddgs.text(query, max_results=max_results):
                results.append({
                    "title": r.get("title", ""),
                    "body": r.get("body", ""),
                    "href": r.get("href", "")
                })
        if results:
            print(f"    🔍 DDG found {len(results)} results")
            return results
    except ImportError:
        print("    ⚠️ duckduckgo-search not installed")
    except Exception as e:
        print(f"    ⚠️ DDG error: {e}")
    
    return results


def search_wikipedia(query: str) -> str:
    """Search Wikipedia and return article content."""
    try:
        headers = {
            "User-Agent": "GAIAAgent/1.0 (https://huggingface.co/spaces; contact@example.com)"
        }
        
        # Search for article
        search_url = "https://en.wikipedia.org/w/api.php"
        params = {
            "action": "query",
            "list": "search",
            "srsearch": query,
            "format": "json",
            "srlimit": 3
        }
        resp = requests.get(search_url, params=params, headers=headers, timeout=10)
        
        if resp.status_code != 200:
            print(f"    ⚠️ Wikipedia search HTTP {resp.status_code}")
            return ""
        
        data = resp.json()
        
        results = data.get("query", {}).get("search", [])
        if not results:
            return ""
        
        # Get the first article
        title = results[0]["title"]
        
        # Fetch article content using REST API
        encoded_title = requests.utils.quote(title.replace(' ', '_'))
        content_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{encoded_title}"
        resp = requests.get(content_url, headers=headers, timeout=10)
        
        if resp.status_code == 200:
            article = resp.json()
            extract = article.get("extract", "")
            if extract:
                print(f"    📖 Wikipedia: {title}")
                return f"Wikipedia - {title}:\n{extract}"
        
        return ""
    except requests.exceptions.Timeout:
        print(f"    ⚠️ Wikipedia timeout")
        return ""
    except Exception as e:
        print(f"    ⚠️ Wikipedia error: {e}")
        return ""


# ==========================================
# GROQ LLM
# ==========================================

def ask_groq(messages: List[Dict], groq_key: str, max_tokens: int = 400, temperature: float = 0.1, model: str = None) -> str:
    """Send request to Groq API with retries and model fallback."""
    if not groq_key:
        print("    ❌ GROQ_API_KEY is empty!")
        return ""
    
    # Use specified model or try all models in order
    models_to_try = [model] if model else GROQ_MODELS
    
    for model_name in models_to_try:
        for attempt in range(2):  # 2 attempts per model
            try:
                resp = requests.post(
                    GROQ_API,
                    headers={
                        "Authorization": f"Bearer {groq_key}",
                        "Content-Type": "application/json"
                    },
                    json={
                        "model": model_name,
                        "messages": messages,
                        "temperature": temperature,
                        "max_tokens": max_tokens,
                    },
                    timeout=60,
                )
                
                if resp.status_code == 200:
                    result = resp.json()
                    content = result.get("choices", [{}])[0].get("message", {}).get("content", "")
                    if content:
                        print(f"    📝 [{model_name}] Response: {content[:80]}...")
                        return content.strip()
                    else:
                        print(f"    ⚠️ [{model_name}] Empty content")
                elif resp.status_code == 429:
                    wait_time = 10 * (attempt + 1)
                    print(f"    ⏳ [{model_name}] Rate limited, waiting {wait_time}s...")
                    time.sleep(wait_time)
                elif resp.status_code == 401:
                    print(f"    ❌ Groq API key invalid!")
                    return ""
                elif resp.status_code == 404:
                    print(f"    ⚠️ Model {model_name} not found, trying next...")
                    break  # Try next model
                else:
                    print(f"    ⚠️ [{model_name}] HTTP {resp.status_code}: {resp.text[:200]}")
                    time.sleep(3)
            except requests.exceptions.Timeout:
                print(f"    ⚠️ [{model_name}] Timeout (attempt {attempt + 1}/2)")
                time.sleep(5)
            except Exception as e:
                print(f"    ⚠️ [{model_name}] Error: {type(e).__name__}: {e}")
                time.sleep(3)
    
    print("    ❌ All Groq attempts failed")
    return ""


# ==========================================
# TEXT PROCESSING
# ==========================================

def preprocess_question(question: str) -> str:
    """Handle reversed or scrambled text."""
    stripped = question.strip()
    
    # Check for reversed text
    reversed_text = stripped[::-1]
    
    # Keywords that indicate proper English text
    keywords = ["answer", "what", "who", "how", "find", "list", "which", "where", 
                "when", "the", "is", "are", "was", "were", "has", "have", "this",
                "that", "from", "with", "about", "question", "video", "image",
                "write", "opposite", "sentence", "if", "you", "understand"]
    
    orig_score = sum(1 for w in keywords if w in stripped.lower())
    rev_score = sum(1 for w in keywords if w in reversed_text.lower())
    
    print(f"    📊 Text analysis: orig_keywords={orig_score}, rev_keywords={rev_score}")
    
    # If reversed text has more keywords, use it
    if rev_score > orig_score + 1 and len(stripped) > 20:
        print(f"    🔄 Detected reversed text!")
        print(f"    📝 Reversed: {reversed_text[:100]}...")
        return reversed_text
    
    # Also check if text starts with punctuation (common in reversed text)
    if stripped and stripped[0] in '.!?,;:' and rev_score >= orig_score:
        print(f"    🔄 Text starts with punctuation, trying reversed")
        print(f"    📝 Reversed: {reversed_text[:100]}...")
        return reversed_text
    
    return stripped


def clean_answer(raw: str) -> str:
    """Extract and clean the final answer from LLM response."""
    if not raw:
        return ""
    
    answer = raw.strip()
    
    # Take first non-empty line
    for line in answer.split("\n"):
        line = line.strip()
        if line and not line.startswith("#"):
            answer = line
            break
    
    # Remove common prefixes (case-insensitive)
    prefixes = [
        "the answer is:", "the answer is", "answer:", "answer is:",
        "final answer:", "final answer is:", "the final answer is:",
        "the correct answer is:", "the correct answer is",
        "result:", "the result is:", 
        "based on my analysis,", "based on my analysis", 
        "based on the", "according to",
        "sure,", "here is", "here's", "i found that"
    ]
    
    # Apply prefix removal iteratively
    changed = True
    max_iterations = 10
    iterations = 0
    while changed and iterations < max_iterations:
        changed = False
        iterations += 1
        answer_lower = answer.lower()
        for prefix in prefixes:
            if answer_lower.startswith(prefix):
                answer = answer[len(prefix):].strip()
                changed = True
                break
    
    # Remove trailing punctuation (period, comma, etc.)
    # But preserve decimal numbers like "3.14"
    while answer and answer[-1] in '.,:;!':
        char = answer[-1]
        if char == '.' and len(answer) >= 2:
            # Check if this is a decimal number (has digits on both sides of a period)
            before = answer[:-1]
            # If there's already a period in the string AND it's followed by digits, it's a decimal
            if '.' in before:
                # e.g., "3.14." - the inner period is decimal, outer is punctuation
                answer = answer[:-1].strip()
            elif before and before[-1].isdigit():
                # Could be end of integer "42." or a decimal "3.14" 
                # Check if there are non-digit chars (indicating it's just "42." not "3.14")
                # A pure decimal would be all digits and one period
                test_str = before.lstrip('-')  # Handle negative numbers
                if test_str.isdigit():
                    # It's just an integer with a period, remove the period
                    answer = answer[:-1].strip()
                else:
                    # Might have letters or other chars, remove period
                    answer = answer[:-1].strip()
            else:
                answer = answer[:-1].strip()
        else:
            answer = answer[:-1].strip()
    
    # Clean up formatting
    answer = answer.replace("**", "").strip('"\'`')
    
    return answer.strip()


def is_valid_answer(answer: str) -> bool:
    """Check if an answer is valid (not a refusal or error)."""
    if not answer or len(answer.strip()) < 1:
        return False
    
    # If answer is too long, it's probably not a direct answer
    if len(answer) > 150:
        print(f"    ⚠️ Answer too long ({len(answer)} chars), likely not a direct answer")
        return False
    
    # Check for refusal phrases at the START of the answer
    refusal_starts = [
        "no image", "no information", "no transcript", "no data",
        "i do not", "i don't", "i cannot", "i can't", "i am not able",
        "unable to", "cannot determine", "not able to",
        "without access", "i'm not sure", "i am unable",
        "there is no", "there's no", "no file", "no video"
    ]
    
    answer_lower = answer.lower().strip()
    for phrase in refusal_starts:
        if answer_lower.startswith(phrase):
            print(f"    ⚠️ Answer starts with refusal: '{phrase}'")
            return False
    
    invalid_phrases = [
        "i don't know", "i dont know", "i do not know", 
        "n/a", "error", 
        "i cannot", "i can't", "i cant",
        "not available", "no answer", "unable to", 
        "i'm not sure", "im not sure", "i am not sure", 
        "no image", "cannot determine", "insufficient information", 
        "not provided", "cannot access", "i'm unable", "i am unable",
        "not able to", "i am not able", "however,", "based on typical",
        "without access", "no transcript", "no information"
    ]
    
    return not any(phrase in answer_lower for phrase in invalid_phrases)


# ==========================================
# MAIN SOLVER
# ==========================================

SYSTEM_PROMPT = """Answer the question with ONLY the final answer. No explanation.

Format:
- Numbers: just the number (e.g., 5)
- Names: just the name (e.g., John Smith) 
- Words: just the word (e.g., right)
- Lists: comma-separated (e.g., a, b, c)

IMPORTANT: 
- If counting items from a list or table, count carefully and give the exact number
- If asked for opposite of a word, give that opposite word
- Always give your best answer, never refuse"""


def is_simple_question(question: str) -> bool:
    """Check if question is simple enough to answer without web search."""
    q_lower = question.lower()
    # Simple questions about opposites, basic facts, math
    simple_patterns = [
        "opposite of", "antonym of", "what is the opposite",
        "write the opposite", "2+2", "2 + 2",
    ]
    return any(p in q_lower for p in simple_patterns) and len(question) < 200


def solve_question(question: str, task_id: str, groq_key: str) -> str:
    """Main function to solve a GAIA question."""
    print(f"\n[Q]: {question[:150]}{'...' if len(question) > 150 else ''}")
    
    # Preprocess the question
    processed_q = preprocess_question(question)
    context_parts = []
    
    # Check if it's a simple question that doesn't need web search
    if is_simple_question(processed_q):
        print("    ⚡ Simple question detected, answering directly")
        answer_raw = ask_groq([
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": f"Answer this directly: {processed_q}"}
        ], groq_key, max_tokens=50, temperature=0.0)
        answer = clean_answer(answer_raw) if answer_raw else ""
        if answer and is_valid_answer(answer):
            print(f"    ✅ Direct answer: {answer}")
            return answer
    
    # 1. Check for attached files
    file_content, file_type, file_bytes = fetch_task_file(task_id)
    if file_content and file_type != "none":
        # Handle images with Vision API
        if file_type == "image" and file_bytes:
            print(f"    🖼️ Analyzing image with Vision API...")
            vision_answer = analyze_image(file_bytes, processed_q, groq_key)
            if vision_answer and is_valid_answer(clean_answer(vision_answer)):
                # If vision gives a good answer, use it directly
                answer = clean_answer(vision_answer)
                print(f"    ✅ Vision answer: {answer}")
                return answer
            elif vision_answer:
                # Add vision analysis to context
                context_parts.append(f"[IMAGE ANALYSIS]:\n{vision_answer}")
        
        # Handle audio with Transcription API
        elif file_type == "audio" and file_bytes:
            print(f"    🎵 Transcribing audio with Whisper...")
            transcript = transcribe_audio(file_bytes, groq_key)
            if transcript:
                context_parts.append(f"[AUDIO TRANSCRIPTION]:\n{transcript}")
                print(f"    ✅ Got audio transcript ({len(transcript)} chars)")
            else:
                context_parts.append(f"[NOTE: Audio file attached but transcription failed.]")
        
        # Normal files
        else:
            context_parts.append(f"[ATTACHED FILE - {file_type.upper()}]:\n{file_content}")
            print(f"    📁 Got {file_type} file ({len(file_content)} chars)")
    
    # 2. Process YouTube URLs
    yt_urls = re.findall(r'https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)[^\s\)\]]+', processed_q)
    for yt_url in yt_urls[:2]:  # Limit to 2 videos
        clean_url = yt_url.rstrip('.,;:')
        print(f"    🎬 Fetching transcript: {clean_url}")
        transcript = fetch_youtube_transcript(clean_url)
        if transcript:
            context_parts.append(f"[YOUTUBE VIDEO TRANSCRIPT]:\n{transcript}")
        else:
            # Try to search for information about this video
            vid_match = re.search(r'(?:v=|youtu\.be/)([a-zA-Z0-9_-]{11})', clean_url)
            if vid_match:
                vid_id = vid_match.group(1)
                print(f"    🔍 No transcript, searching for video info: {vid_id}")
                video_results = web_search(f"youtube {vid_id} video content summary", max_results=3)
                if video_results:
                    snippets = "\n".join([f"• {r.get('title', '')}: {r.get('body', '')}" for r in video_results])
                    context_parts.append(f"[YOUTUBE VIDEO INFO (no transcript available)]:\nVideo URL: {clean_url}\nSearch results about this video:\n{snippets}")
                else:
                    context_parts.append(f"[YOUTUBE VIDEO]: {clean_url} - No transcript or info available.")
            else:
                context_parts.append(f"[YOUTUBE VIDEO]: {clean_url} - Could not process.")
    
    # 3. Process other URLs
    other_urls = re.findall(r'https?://[^\s\)\]]+', processed_q)
    other_urls = [u.rstrip('.,;:') for u in other_urls 
                  if "youtube.com" not in u and "youtu.be" not in u]
    
    for url in other_urls[:2]:  # Limit to 2 URLs
        print(f"    🌐 Fetching page: {url[:60]}...")
        page_content = fetch_webpage(url)
        if page_content:
            context_parts.append(f"[WEBPAGE: {url}]:\n{page_content}")
    
    # 4. Web search for additional context
    # Skip search if we have good file data (Excel/CSV with actual data)
    should_search = True
    if file_type in ["excel", "csv"] and len(file_content) > 500:
        should_search = False  # We have data to analyze
        print("    ⏭️ Skipping search - using file data")
    
    if should_search and not yt_urls:
        # Generate search query
        search_query = processed_q[:200] if len(processed_q) < 200 else processed_q[:200]
        
        # Try to extract key terms for search
        query_prompt = ask_groq([
            {"role": "system", "content": "Extract the key search terms from this question. Output ONLY the search query (3-8 words), nothing else."},
            {"role": "user", "content": processed_q[:400]}
        ], groq_key, max_tokens=30, temperature=0.0)
        
        if query_prompt and len(query_prompt) < 100 and len(query_prompt) > 3:
            search_query = query_prompt
        
        print(f"    🔍 Searching: '{search_query[:50]}'")
        
        # Try web search
        results = web_search(search_query, max_results=5)
        
        if results:
            # Add search snippets - these are often the most useful
            snippets = "\n".join([f"• {r.get('title', '')}: {r.get('body', '')}" for r in results])
            context_parts.append(f"[SEARCH RESULTS]:\n{snippets}")
            
            # Fetch Wikipedia page if in results (most reliable)
            wiki_fetched = False
            for r in results:
                href = r.get("href", "")
                if "wikipedia.org" in href and not wiki_fetched:
                    page = fetch_webpage(href)
                    if page and len(page) > 500:
                        context_parts.append(f"[WIKIPEDIA PAGE]:\n{page[:6000]}")
                        wiki_fetched = True
                        print(f"    📖 Fetched Wikipedia: {href[:50]}")
                        break
            
            # If no Wikipedia, fetch first non-wiki result
            if not wiki_fetched:
                for r in results[:2]:
                    href = r.get("href", "")
                    if href and "youtube" not in href:
                        page = fetch_webpage(href)
                        if page and len(page) > 300:
                            context_parts.append(f"[WEB PAGE]:\n{page[:4000]}")
                            print(f"    🌐 Fetched: {href[:50]}")
                            break
        
        # Also try direct Wikipedia search
        wiki_content = search_wikipedia(search_query)
        if wiki_content and "[WIKIPEDIA PAGE]" not in str(context_parts):
            context_parts.append(f"[WIKIPEDIA]:\n{wiki_content}")
    
    # 5. Build context and query LLM
    context = "\n\n".join(context_parts) if context_parts else ""
    
    # Truncate context if too long
    if len(context) > 12000:
        context = context[:12000] + "\n[...truncated]"
    
    # Check if this is a counting/analysis question
    is_counting_q = any(w in processed_q.lower() for w in ['how many', 'count', 'number of', 'total'])
    is_list_q = any(w in processed_q.lower() for w in ['list', 'name all', 'what are'])
    
    # First attempt with context - use 2-step for complex questions
    if context and (is_counting_q or is_list_q):
        # Step 1: Extract relevant data
        extract_prompt = f"""From this context, extract ONLY the specific information needed to answer the question.
        
Context: {context[:8000]}

Question: {processed_q}

List the relevant facts (be brief):"""
        
        extracted = ask_groq([
            {"role": "user", "content": extract_prompt}
        ], groq_key, max_tokens=500, temperature=0.0)
        
        if extracted:
            print(f"    📋 Extracted: {extracted[:150]}...")
            # Step 2: Answer based on extracted info
            answer_raw = ask_groq([
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": f"Based on these facts:\n{extracted}\n\nQuestion: {processed_q}\n\nFinal answer (just the answer, nothing else):"}
            ], groq_key, max_tokens=100, temperature=0.0)
        else:
            answer_raw = ""
    elif context:
        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {processed_q}\n\nAnswer:"}
        ]
        answer_raw = ask_groq(messages, groq_key, max_tokens=100, temperature=0.1)
    else:
        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": f"Question: {processed_q}\n\nAnswer:"}
        ]
        answer_raw = ask_groq(messages, groq_key, max_tokens=100, temperature=0.1)
    
    answer = clean_answer(answer_raw) if answer_raw else ""
    
    print(f"    📤 Raw: '{answer_raw[:100] if answer_raw else '[empty]'}' -> Clean: '{answer}'")
    
    # If answer isn't valid, try again with more forceful prompt
    if not is_valid_answer(answer):
        print(f"    ⚠️ First attempt invalid: '{answer}', retrying...")
        
        # More forceful prompt
        retry_messages = [
            {"role": "system", "content": "Give ONLY the answer. One word or number if possible."},
            {"role": "user", "content": f"{processed_q}"}
        ]
        answer_raw = ask_groq(retry_messages, groq_key, max_tokens=50, temperature=0.2)
        answer = clean_answer(answer_raw) if answer_raw else ""
        print(f"    📤 Retry: '{answer}'")
    
    # If still not valid, try one more time with knowledge-based approach
    if not is_valid_answer(answer):
        print(f"    ⚠️ Second attempt invalid: '{answer}', trying knowledge-based...")
        
        retry_messages = [
            {"role": "system", "content": "Give ONLY the answer, nothing else. Best guess if unsure."},
            {"role": "user", "content": processed_q}
        ]
        answer_raw = ask_groq(retry_messages, groq_key, max_tokens=50, temperature=0.5)
        answer = clean_answer(answer_raw) if answer_raw else ""
        print(f"    📤 Third try raw: '{answer_raw[:100] if answer_raw else '[empty]'}' -> Clean: '{answer}'")
    
    # If still no valid answer but we have some text, extract first meaningful chunk
    if not answer or len(answer.strip()) == 0 or not is_valid_answer(answer):
        if answer_raw and len(answer_raw.strip()) > 0:
            # Try to extract just the answer part
            lines = answer_raw.strip().split('\n')
            for line in lines:
                line = line.strip()
                if line and len(line) < 100 and not any(x in line.lower() for x in ['cannot', "don't know", 'unable', 'no image']):
                    answer = clean_answer(line)
                    print(f"    🔄 Extracted from response: '{answer}'")
                    break
    
    # Absolute final fallback
    if not answer or len(answer.strip()) == 0:
        answer = "unknown"
        print(f"    ❌ No answer found, defaulting to 'unknown'")
    
    print(f"    ✅ Final Answer: {answer}")
    return answer


# ==========================================
# GRADIO INTERFACE
# ==========================================

def run_and_submit_all(profile: gr.OAuthProfile | None):
    """Run the agent on all questions and submit answers."""
    space_id = os.getenv("SPACE_ID", "")
    
    if not profile:
        return "Effettua il login con Hugging Face per continuare.", None
    
    username = profile.username
    groq_key = os.getenv("GROQ_API_KEY", "")
    
    if not groq_key:
        return "❌ GROQ_API_KEY non configurata! Aggiungi la chiave nelle impostazioni dello Space.", None
    
    print(f"\n{'='*60}")
    print(f"👤 User: {username}")
    print(f"🤖 Agent: GAIA Agent v5")
    print(f"🔑 API Key: {groq_key[:8]}...{groq_key[-4:]}")
    print(f"{'='*60}")
    
    # Test Groq API connectivity first
    print("\n🔍 Testing Groq API connectivity...")
    test_response = ask_groq(
        [{"role": "user", "content": "Say 'OK' and nothing else."}],
        groq_key, max_tokens=10, temperature=0.0
    )
    if not test_response:
        return "❌ Groq API test failed! Check your API key and try again.", None
    print(f"✅ Groq API test passed: '{test_response}'")
    
    # Fetch questions
    try:
        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
        resp.raise_for_status()
        questions = resp.json()
    except Exception as e:
        return f"❌ Errore nel recupero delle domande: {e}", None
    
    print(f"\n📋 {len(questions)} domande da processare\n")
    
    results = []
    answers = []
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
    
    for i, item in enumerate(questions):
        task_id = item.get("task_id", "")
        q = item.get("question")
        
        if not task_id or q is None:
            print(f"[{i+1}] Skipping invalid item")
            continue
        
        print(f"\n{'─'*60}")
        print(f"[{i+1}/{len(questions)}] Task: {task_id[:20]}...")
        
        try:
            answer = solve_question(q, task_id, groq_key)
        except Exception as e:
            print(f"    💥 Exception: {e}")
            traceback.print_exc()
            answer = "I don't know"
        
        answers.append({
            "task_id": task_id,
            "submitted_answer": answer
        })
        results.append({
            "Task ID": task_id[:20] + "...",
            "Question": q[:80] + ("..." if len(q) > 80 else ""),
            "Answer": answer
        })
        
        # Rate limit protection - increase delay between questions
        time.sleep(2.5)
    
    if not answers:
        return "❌ Nessuna risposta generata.", pd.DataFrame(results)
    
    # Submit answers
    print(f"\n{'='*60}")
    print(f"📤 Submitting {len(answers)} answers...")
    
    try:
        submit_resp = requests.post(
            f"{DEFAULT_API_URL}/submit",
            json={
                "username": username,
                "agent_code": agent_code,
                "answers": answers
            },
            timeout=60,
        )
        submit_resp.raise_for_status()
        result = submit_resp.json()
        
        score = result.get('score', 'N/A')
        correct = result.get('correct_count', '?')
        total = result.get('total_attempted', '?')
        message = result.get('message', '')
        
        status = f"""✅ Completato!
👤 {result.get('username')}
🏆 {score}% ({correct}/{total})
📝 {message}"""
        
        print(f"\n{status}")
        return status, pd.DataFrame(results)
        
    except Exception as e:
        error_msg = f"❌ Errore nell'invio: {e}"
        print(error_msg)
        return error_msg, pd.DataFrame(results)


def create_demo():
    """Build and return the Gradio interface."""
    with gr.Blocks(title="GAIA Agent v5") as demo:
        gr.Markdown("""# 🚀 GAIA Agent v5
        
**Full-featured agent with Vision & Audio!**
- 🧠 Groq Llama 3.3 70B for reasoning
- 👁️ Llama 3.2 Vision for image analysis
- 🎤 Whisper for audio transcription
- 🔍 Smart web search + Wikipedia
- 📺 YouTube transcript extraction
- 📁 File parsing (CSV, Excel, PDF, Python)
""")
        
        gr.LoginButton()
        
        run_button = gr.Button("🔥 Avvia Valutazione", variant="primary", size="lg")
        
        status_output = gr.Textbox(
            label="Risultato",
            lines=6,
            interactive=False
        )
        
        results_table = gr.DataFrame(
            label="Risposte",
            wrap=True
        )
        
        run_button.click(
            fn=run_and_submit_all,
            outputs=[status_output, results_table]
        )
    
    return demo


if __name__ == "__main__":
    demo = create_demo()
    demo.queue(default_concurrency_limit=1).launch(debug=True, share=False)