Final_Assignment_Template

Sleeping

App Files Files Community

emanuelediluzio commited on Apr 8

Commit

af83907

verified ·

1 Parent(s): 0812c3f

Update app.py

Browse files

Files changed (1) hide show

app.py +650 -231

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-GAIA Agent v3 — smarter search, richer context, 2-step reasoning.
-Target: 6/20 (30%)
 """
 import os
 import re
@@ -11,376 +11,795 @@ import gradio as gr
 import requests
 import pandas as pd
 from bs4 import BeautifulSoup
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 GROQ_API = "https://api.groq.com/openai/v1/chat/completions"
 # ==========================================
 # TOOLS
 # ==========================================
-def fetch_webpage(url: str) -> str:
     try:
-        resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=12)
         resp.raise_for_status()
         soup = BeautifulSoup(resp.text, "html.parser")
-        for el in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
             el.extract()
-        lines = [l.strip() for l in soup.get_text("\n", strip=True).splitlines() if l.strip()]
-        return "\n".join(lines)[:8000]
     except Exception as e:
         return ""
 def fetch_youtube_transcript(url: str) -> str:
     try:
         from youtube_transcript_api import YouTubeTranscriptApi
-        match = re.search(r"(?:v=|youtu\.be/|embed/)([^&\n?#]+)", url)
-        vid = match.group(1) if match else url.strip()
-        # Try multiple approaches
-        for langs in [["en"], ["en", "it", "fr", "de", "es"]]:
             try:
-                entries = YouTubeTranscriptApi.get_transcript(vid, languages=langs)
-                return " ".join([e["text"] for e in entries])[:6000]
-            except Exception:
                 continue
-        # Last resort: any available transcript
         try:
-            tl = YouTubeTranscriptApi.list_transcripts(vid)
-            for t in tl:
-                entries = t.fetch()
-                return " ".join([e["text"] for e in entries])[:6000]
-        except Exception:
-            pass
         return ""
     except Exception as e:
         return ""
-def fetch_task_file(task_id: str) -> tuple:
-    """Returns (content_str, file_type)"""
     try:
-        resp = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=20)
         if resp.status_code != 200:
             return "", "none"
-        ct = resp.headers.get("Content-Type", "")
         cd = resp.headers.get("Content-Disposition", "")
-        filename = cd.split("filename=")[-1].strip('" ') if "filename=" in cd else ""
         ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
-        if any(t in ct for t in ["text", "json", "csv"]) or ext in ["txt", "csv", "json", "py", "md"]:
             if ext == "csv" or "csv" in ct:
                 try:
-                    df = pd.read_csv(io.StringIO(resp.text))
-                    return f"CSV data ({len(df)} rows, columns: {list(df.columns)}):\n{df.to_string()}"[:6000], "csv"
-                except Exception:
-                    pass
-            return resp.text[:6000], "text"
         if "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
             try:
                 df = pd.read_excel(io.BytesIO(resp.content), engine="openpyxl")
-                return f"Excel data ({len(df)} rows, columns: {list(df.columns)}):\n{df.to_string()}"[:6000], "excel"
-            except Exception:
-                return "Excel file (parse error)", "excel"
         if "pdf" in ct or ext == "pdf":
             try:
                 import PyPDF2
                 reader = PyPDF2.PdfReader(io.BytesIO(resp.content))
-                text = "\n".join([p.extract_text() or "" for p in reader.pages])
-                return text[:6000], "pdf"
-            except Exception:
-                return "PDF (parse error)", "pdf"
-        if "audio" in ct or ext in ["mp3", "wav", "m4a"]:
-            return f"Audio file ({ext or ct}, {len(resp.content)} bytes)", "audio"
-        if "image" in ct or ext in ["png", "jpg", "jpeg", "gif"]:
-            return f"Image file ({ext or ct})", "image"
         try:
-            return resp.content.decode("utf-8")[:6000], "text"
-        except Exception:
-            return f"Binary file ({ct})", "binary"
-    except Exception:
         return "", "none"
-def web_search(query: str, max_results: int = 5) -> list:
-    """Returns list of {title, body, href}"""
     try:
         from ddgs import DDGS
         with DDGS() as ddgs:
-            return list(ddgs.text(query, max_results=max_results))
-    except Exception:
-        return []
 # ==========================================
-# GROQ
 # ==========================================
-def ask_groq(messages: list, groq_key: str, max_tokens: int = 300) -> str:
     for attempt in range(3):
         try:
             resp = requests.post(
                 GROQ_API,
-                headers={"Authorization": f"Bearer {groq_key}", "Content-Type": "application/json"},
                 json={
                     "model": "llama-3.3-70b-versatile",
                     "messages": messages,
-                    "temperature": 0.1,
                     "max_tokens": max_tokens,
                 },
-                timeout=25,
             )
             if resp.status_code == 200:
                 return resp.json()["choices"][0]["message"]["content"].strip()
             elif resp.status_code == 429:
-                time.sleep(5 * (attempt + 1))
             else:
-                print(f"    Groq {resp.status_code}")
-                return ""
-        except Exception as e:
-            print(f"    Groq err: {e}")
             time.sleep(3)
     return ""
 # ==========================================
-# PREPROCESSING
 # ==========================================
 def preprocess_question(question: str) -> str:
     stripped = question.strip()
-    rev = stripped[::-1]
-    kw = ["answer", "what", "who", "how", "find", "list", "which", "where", "when", "the", "is", "are"]
-    if sum(1 for w in kw if w in rev.lower()) > sum(1 for w in kw if w in stripped.lower()) + 1 and len(stripped) > 20:
-        print(f"  🔄 Reversed text detected")
-        return rev
-    return question
 def clean_answer(raw: str) -> str:
-    answer = str(raw).strip()
     for line in answer.split("\n"):
         line = line.strip()
-        if line:
             answer = line
             break
-    for prefix in ["the answer is:", "the answer is", "final answer:", "final answer is",
-                    "answer:", "answer is", "the result is", "result:", "the correct answer is",
-                    "based on my analysis,", "based on the", "according to", "sure,", "here is"]:
-        if answer.lower().startswith(prefix):
-            answer = answer[len(prefix):].strip()
-            break
-    if answer.endswith(".") and not re.search(r"\d\.$", answer):
-        answer = answer[:-1].strip()
-    answer = answer.replace("**", "").strip('"').strip("'").strip("`").strip()
-    return answer
-def is_valid(answer: str) -> bool:
     if not answer or len(answer.strip()) < 1:
         return False
-    bad = {"i don't know", "unknown", "n/a", "none", "error", "i cannot", "i can't",
-           "not available", "no answer", "unable to", "i'm not sure", "no image"}
-    return not any(b in answer.lower() for b in bad)
 # ==========================================
-# SOLVE
 # ==========================================
-SYSTEM = """You are solving GAIA benchmark questions. You receive context and must give the EXACT answer.
-RULES (MANDATORY):
-- Output ONLY the final answer. NOTHING else.
-- No "The answer is", no explanation, no reasoning.
-- Numbers: just the number (e.g. 3 or 12.50)
-- Names: just the name (e.g. Marie Curie)
-- Lists: comma-separated (e.g. apple, banana, cherry)
-- No period at the end
-- If you need to compute something from the data, do the math and give the result
-- For reversed/scrambled text: the question has already been unscrambled for you"""
-def solve_question(question: str, task_id: str, groq_key: str) -> str:
-    print(f"\n[Q]: {question[:130]}")
-    processed = preprocess_question(question)
     context_parts = []
-    # 1. Fetch attached file
     file_content, file_type = fetch_task_file(task_id)
     if file_content and file_type != "none":
-        context_parts.append(f"[ATTACHED FILE ({file_type})]:\n{file_content}")
-        print(f"  📁 {file_type}: {len(file_content)}ch")
-    # 2. YouTube?
-    yt_urls = re.findall(r'https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)[^\s\)]+', processed)
-    for yt_url in yt_urls:
-        transcript = fetch_youtube_transcript(yt_url)
         if transcript:
-            context_parts.append(f"[YOUTUBE TRANSCRIPT]:\n{transcript}")
-            print(f"  🎬 Transcript: {len(transcript)}ch")
         else:
-            context_parts.append(f"[YOUTUBE]: Could not fetch transcript for {yt_url}")
-            print(f"  🎬 Transcript FAILED")
-    # 3. Other URLs?
-    other_urls = re.findall(r'https?://[^\s\)]+', processed)
-    other_urls = [u for u in other_urls if "youtube.com" not in u and "youtu.be" not in u]
-    for url in other_urls[:2]:
-        page = fetch_webpage(url)
-        if page:
-            context_parts.append(f"[WEBPAGE {url}]:\n{page}")
-            print(f"  🌐 Page: {len(page)}ch")
-    # 4. Web search — use Groq to generate a good search query first
-    if not yt_urls and file_type not in ["excel", "csv"]:
-        # Ask Groq for optimal search query
-        sq_resp = ask_groq([
-            {"role": "system", "content": "Generate a concise web search query (max 8 words) to answer this question. Output ONLY the search query, nothing else."},
-            {"role": "user", "content": processed[:500]},
-        ], groq_key, max_tokens=30)
-        search_query = sq_resp if sq_resp and len(sq_resp) < 100 else processed[:150]
-        print(f"  🔍 Search: '{search_query}'")
         results = web_search(search_query, max_results=5)
         if results:
             # Add search snippets
-            snippets = "\n".join([f"- {r.get('title','')}: {r.get('body','')}" for r in results])
-            context_parts.append(f"[SEARCH RESULTS for '{search_query}']:\n{snippets}")
-            # Fetch top 2 result pages for richer context
-            fetched = 0
             for r in results:
-                if fetched >= 2:
                     break
                 href = r.get("href", "")
-                if href and "youtube.com" not in href:
                     page = fetch_webpage(href)
-                    if page and len(page) > 200:
-                        context_parts.append(f"[PAGE: {r.get('title','')}]:\n{page[:4000]}")
-                        fetched += 1
-                        print(f"  📄 Fetched: {r.get('title','')[:50]}")
-        # If no results or question is very specific, try a second search
-        if len(results) < 2:
-            results2 = web_search(processed[:100], max_results=3)
-            if results2:
-                snippets2 = "\n".join([f"- {r.get('title','')}: {r.get('body','')}" for r in results2])
-                context_parts.append(f"[SEARCH 2]:\n{snippets2}")
-    # 5. Build final context and ask Groq
-    context = "\n\n".join(context_parts) if context_parts else "No context available. Answer from your knowledge."
-    # Truncate context to avoid token limits
-    if len(context) > 12000:
-        context = context[:12000] + "\n[...truncated]"
-    answer_raw = ask_groq([
-        {"role": "system", "content": SYSTEM},
-        {"role": "user", "content": f"CONTEXT:\n{context}\n\nQUESTION: {processed}"},
-    ], groq_key, max_tokens=300)
-    answer = clean_answer(answer_raw) if answer_raw else ""
-    # If not valid, try once more with just the question (knowledge-based)
-    if not is_valid(answer):
-        print(f"  ⚠️ Invalid: '{answer}', retrying without context...")
-        answer_raw = ask_groq([
-            {"role": "system", "content": SYSTEM},
-            {"role": "user", "content": f"QUESTION: {processed}"},
-        ], groq_key, max_tokens=300)
-        answer = clean_answer(answer_raw) if answer_raw else "I don't know"
-    if not is_valid(answer):
         answer = "I don't know"
-    print(f"  ✅ {answer}")
     return answer
 # ==========================================
-# RUNNER
 # ==========================================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    space_id = os.getenv("SPACE_ID")
     if not profile:
-        return "Fai il Login con Hugging Face.", None
     username = profile.username
     groq_key = os.getenv("GROQ_API_KEY", "")
     if not groq_key:
-        return "❌ GROQ_API_KEY mancante!", None
-    print(f"\n{'='*50}\n👤 {username}\n{'='*50}")
     try:
-        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
         resp.raise_for_status()
         questions = resp.json()
     except Exception as e:
-        return f"Errore: {e}", None
-    print(f"📋 {len(questions)} domande\n")
     results = []
     answers = []
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     for i, item in enumerate(questions):
         task_id = item.get("task_id", "")
         q = item.get("question")
         if not task_id or q is None:
             continue
-        print(f"\n[{i+1}/{len(questions)}] {'═'*40}")
         try:
-            ans = solve_question(q, task_id, groq_key)
         except Exception as e:
-            ans = "I don't know"
-            print(f"  💥 {e}")
-        answers.append({"task_id": task_id, "submitted_answer": ans})
-        results.append({"Task ID": task_id, "Question": q[:100], "Answer": ans})
-        time.sleep(1.5)  # rate limit buffer
     if not answers:
-        return "Nessuna risposta.", pd.DataFrame(results)
-    print(f"\n📤 Submitting {len(answers)} answers...")
     try:
-        resp = requests.post(
             f"{DEFAULT_API_URL}/submit",
-            json={"username": username, "agent_code": agent_code, "answers": answers},
             timeout=60,
         )
-        resp.raise_for_status()
-        r = resp.json()
-        status = (
-            f"✅ Completato!\n👤 {r.get('username')}\n"
-            f"🏆 {r.get('score', 'N/A')}% ({r.get('correct_count', '?')}/{r.get('total_attempted', '?')})\n"
-            f"📝 {r.get('message', '')}"
-        )
         print(f"\n{status}")
         return status, pd.DataFrame(results)
     except Exception as e:
-        return f"❌ Invio fallito: {e}", pd.DataFrame(results)
-with gr.Blocks() as demo:
-    gr.Markdown("# 🚀 GAIA Agent v3\nGroq Llama 3.3 70B — smart search + page fetch")
-    gr.LoginButton()
-    run_button = gr.Button("🔥 Avvia Valutazione", variant="primary")
-    status_output = gr.Textbox(label="Risultato", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Risposte", wrap=True)
-    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
-    demo.queue(default_concurrency_limit=1).launch(debug=True, share=False)

 """
+GAIA Agent v4 — Enhanced version with better error handling and tools
+Target: 30%+ (6+/20)
 """
 import os
 import re
 import requests
 import pandas as pd
 from bs4 import BeautifulSoup
+from typing import Optional, Tuple, List, Dict, Any
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 GROQ_API = "https://api.groq.com/openai/v1/chat/completions"
 # ==========================================
 # TOOLS
 # ==========================================
+def fetch_webpage(url: str, timeout: int = 15) -> str:
+    """Fetch and extract text from a webpage."""
     try:
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.5",
+        }
+        resp = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True)
         resp.raise_for_status()
         soup = BeautifulSoup(resp.text, "html.parser")
+        # Remove unwanted elements
+        for el in soup(["script", "style", "nav", "footer", "header", "aside", "noscript", "iframe", "form"]):
             el.extract()
+        # Try to get main content first
+        main_content = soup.find("main") or soup.find("article") or soup.find("div", {"class": re.compile(r"content|main|article", re.I)})
+        if main_content:
+            text = main_content.get_text("\n", strip=True)
+        else:
+            text = soup.get_text("\n", strip=True)
+        lines = [l.strip() for l in text.splitlines() if l.strip() and len(l.strip()) > 2]
+        return "\n".join(lines)[:10000]
     except Exception as e:
+        print(f"    ⚠️ Webpage fetch error: {e}")
         return ""
 def fetch_youtube_transcript(url: str) -> str:
+    """Fetch YouTube video transcript with multiple fallback methods."""
     try:
         from youtube_transcript_api import YouTubeTranscriptApi
+        # Extract video ID
+        patterns = [
+            r"(?:v=|/v/|youtu\.be/|embed/|shorts/)([a-zA-Z0-9_-]{11})",
+            r"^([a-zA-Z0-9_-]{11})$"
+        ]
+        vid = None
+        for pattern in patterns:
+            match = re.search(pattern, url)
+            if match:
+                vid = match.group(1)
+                break
+        if not vid:
+            print(f"    ⚠️ Could not extract video ID from: {url}")
+            return ""
+        print(f"    📺 Video ID: {vid}")
+        # Create API instance (new API style)
+        ytt_api = YouTubeTranscriptApi()
+        # Try multiple language options
+        lang_options = [
+            ("en",),
+            ("en", "en-US", "en-GB"),
+            ("it", "it-IT"),
+            ("en", "it", "fr", "de", "es", "pt"),
+        ]
+        for langs in lang_options:
             try:
+                transcript = ytt_api.fetch(vid, languages=langs)
+                # transcript is a FetchedTranscript object, iterate to get snippets
+                text = " ".join([snippet.text for snippet in transcript])
+                if text:
+                    print(f"    ✓ Got transcript ({len(text)} chars, langs: {langs})")
+                    return text[:8000]
+            except Exception as e:
                 continue
+        # Try listing all transcripts and fetching any available
         try:
+            transcript_list = ytt_api.list(vid)
+            # Try manually created first
+            for t in transcript_list:
+                if not t.is_generated:
+                    try:
+                        fetched = t.fetch()
+                        text = " ".join([snippet.text for snippet in fetched])
+                        if text:
+                            print(f"    ✓ Got manual transcript ({len(text)} chars)")
+                            return text[:8000]
+                    except:
+                        pass
+            # Then auto-generated
+            for t in transcript_list:
+                if t.is_generated:
+                    try:
+                        fetched = t.fetch()
+                        text = " ".join([snippet.text for snippet in fetched])
+                        if text:
+                            print(f"    ✓ Got auto transcript ({len(text)} chars)")
+                            return text[:8000]
+                    except:
+                        pass
+            # Try translated
+            for t in transcript_list:
+                try:
+                    translated = t.translate('en')
+                    fetched = translated.fetch()
+                    text = " ".join([snippet.text for snippet in fetched])
+                    if text:
+                        print(f"    ✓ Got translated transcript ({len(text)} chars)")
+                        return text[:8000]
+                except:
+                    pass
+        except Exception as e:
+            print(f"    ⚠️ Transcript list error: {e}")
+        return ""
+    except ImportError:
+        print("    ⚠️ youtube_transcript_api not installed")
         return ""
     except Exception as e:
+        print(f"    ⚠️ YouTube error: {e}")
         return ""
+def fetch_task_file(task_id: str) -> Tuple[str, str]:
+    """Fetch and parse attached file for a task. Returns (content_str, file_type)."""
     try:
+        url = f"{DEFAULT_API_URL}/files/{task_id}"
+        resp = requests.get(url, timeout=30)
+        if resp.status_code == 404:
+            return "", "none"
         if resp.status_code != 200:
+            print(f"    ⚠️ File fetch failed: {resp.status_code}")
             return "", "none"
+        ct = resp.headers.get("Content-Type", "").lower()
         cd = resp.headers.get("Content-Disposition", "")
+        # Extract filename
+        filename = ""
+        if "filename=" in cd:
+            filename = cd.split("filename=")[-1].strip('" ')
         ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
+        print(f"    📎 File: {filename or 'unknown'}, type: {ct[:50]}")
+        # Text/Code files
+        if any(t in ct for t in ["text/", "json", "javascript", "python"]) or ext in ["txt", "csv", "json", "py", "md", "js", "html"]:
+            text = resp.text
+            # CSV parsing
             if ext == "csv" or "csv" in ct:
                 try:
+                    df = pd.read_csv(io.StringIO(text))
+                    summary = f"CSV file with {len(df)} rows and columns: {list(df.columns)}\n"
+                    summary += f"Data:\n{df.to_string()}"
+                    return summary[:8000], "csv"
+                except Exception as e:
+                    print(f"    ⚠️ CSV parse error: {e}")
+            # Python code
+            if ext == "py":
+                return f"Python code:\n```python\n{text[:6000]}\n```", "python"
+            return text[:8000], "text"
+        # Excel files
         if "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
             try:
                 df = pd.read_excel(io.BytesIO(resp.content), engine="openpyxl")
+                summary = f"Excel file with {len(df)} rows and columns: {list(df.columns)}\n"
+                summary += f"Data:\n{df.to_string()}"
+                return summary[:8000], "excel"
+            except Exception as e:
+                print(f"    ⚠️ Excel parse error: {e}")
+                try:
+                    df = pd.read_excel(io.BytesIO(resp.content))
+                    summary = f"Excel file with {len(df)} rows and columns: {list(df.columns)}\n"
+                    summary += f"Data:\n{df.to_string()}"
+                    return summary[:8000], "excel"
+                except:
+                    return "Excel file (could not parse)", "excel"
+        # PDF files
         if "pdf" in ct or ext == "pdf":
             try:
                 import PyPDF2
                 reader = PyPDF2.PdfReader(io.BytesIO(resp.content))
+                text_parts = []
+                for i, page in enumerate(reader.pages):
+                    page_text = page.extract_text() or ""
+                    if page_text:
+                        text_parts.append(f"--- Page {i+1} ---\n{page_text}")
+                text = "\n".join(text_parts)
+                return text[:8000] if text else "PDF (no extractable text)", "pdf"
+            except ImportError:
+                print("    ⚠️ PyPDF2 not installed")
+                return "PDF file (PyPDF2 not available)", "pdf"
+            except Exception as e:
+                print(f"    ⚠️ PDF parse error: {e}")
+                return "PDF file (parse error)", "pdf"
+        # Audio files
+        if "audio" in ct or ext in ["mp3", "wav", "m4a", "ogg", "flac"]:
+            size_kb = len(resp.content) / 1024
+            return f"Audio file ({ext or 'unknown'}, {size_kb:.1f} KB). Cannot transcribe audio directly.", "audio"
+        # Image files
+        if "image" in ct or ext in ["png", "jpg", "jpeg", "gif", "webp", "bmp"]:
+            size_kb = len(resp.content) / 1024
+            return f"Image file ({ext or 'unknown'}, {size_kb:.1f} KB). Cannot analyze images directly.", "image"
+        # Try to decode as text
         try:
+            text = resp.content.decode("utf-8")
+            return text[:8000], "text"
+        except:
+            try:
+                text = resp.content.decode("latin-1")
+                return text[:8000], "text"
+            except:
+                return f"Binary file ({ct or 'unknown type'}, {len(resp.content)} bytes)", "binary"
+    except requests.exceptions.Timeout:
+        print("    ⚠️ File fetch timeout")
+        return "", "none"
+    except Exception as e:
+        print(f"    ⚠️ File fetch error: {e}")
         return "", "none"
+def web_search(query: str, max_results: int = 5) -> List[Dict[str, str]]:
+    """Search the web and return results."""
+    results = []
+    # Try ddgs package (new name)
     try:
         from ddgs import DDGS
+        ddgs = DDGS()
+        for r in ddgs.text(query, max_results=max_results):
+            results.append({
+                "title": r.get("title", ""),
+                "body": r.get("body", ""),
+                "href": r.get("href", "")
+            })
+        if results:
+            print(f"    🔍 ddgs found {len(results)} results")
+            return results
+    except ImportError:
+        pass
+    except Exception as e:
+        print(f"    ⚠️ ddgs error: {e}")
+    # Fallback: try duckduckgo-search package
+    try:
+        from duckduckgo_search import DDGS
         with DDGS() as ddgs:
+            for r in ddgs.text(query, max_results=max_results):
+                results.append({
+                    "title": r.get("title", ""),
+                    "body": r.get("body", ""),
+                    "href": r.get("href", "")
+                })
+        if results:
+            print(f"    🔍 DDG found {len(results)} results")
+            return results
+    except ImportError:
+        print("    ⚠️ duckduckgo-search not installed")
+    except Exception as e:
+        print(f"    ⚠️ DDG error: {e}")
+    return results
+def search_wikipedia(query: str) -> str:
+    """Search Wikipedia and return article content."""
+    try:
+        headers = {
+            "User-Agent": "GAIAAgent/1.0 (https://huggingface.co/spaces; contact@example.com)"
+        }
+        # Search for article
+        search_url = "https://en.wikipedia.org/w/api.php"
+        params = {
+            "action": "query",
+            "list": "search",
+            "srsearch": query,
+            "format": "json",
+            "srlimit": 3
+        }
+        resp = requests.get(search_url, params=params, headers=headers, timeout=10)
+        if resp.status_code != 200:
+            print(f"    ⚠️ Wikipedia search HTTP {resp.status_code}")
+            return ""
+        data = resp.json()
+        results = data.get("query", {}).get("search", [])
+        if not results:
+            return ""
+        # Get the first article
+        title = results[0]["title"]
+        # Fetch article content using REST API
+        encoded_title = requests.utils.quote(title.replace(' ', '_'))
+        content_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{encoded_title}"
+        resp = requests.get(content_url, headers=headers, timeout=10)
+        if resp.status_code == 200:
+            article = resp.json()
+            extract = article.get("extract", "")
+            if extract:
+                print(f"    📖 Wikipedia: {title}")
+                return f"Wikipedia - {title}:\n{extract}"
+        return ""
+    except requests.exceptions.Timeout:
+        print(f"    ⚠️ Wikipedia timeout")
+        return ""
+    except Exception as e:
+        print(f"    ⚠️ Wikipedia error: {e}")
+        return ""
 # ==========================================
+# GROQ LLM
 # ==========================================
+def ask_groq(messages: List[Dict], groq_key: str, max_tokens: int = 400, temperature: float = 0.1) -> str:
+    """Send request to Groq API with retries."""
     for attempt in range(3):
         try:
             resp = requests.post(
                 GROQ_API,
+                headers={
+                    "Authorization": f"Bearer {groq_key}",
+                    "Content-Type": "application/json"
+                },
                 json={
                     "model": "llama-3.3-70b-versatile",
                     "messages": messages,
+                    "temperature": temperature,
                     "max_tokens": max_tokens,
                 },
+                timeout=30,
             )
             if resp.status_code == 200:
                 return resp.json()["choices"][0]["message"]["content"].strip()
             elif resp.status_code == 429:
+                wait_time = 5 * (attempt + 1)
+                print(f"    ⏳ Rate limited, waiting {wait_time}s...")
+                time.sleep(wait_time)
             else:
+                print(f"    ⚠️ Groq API error: {resp.status_code} - {resp.text[:200]}")
+                time.sleep(2)
+        except requests.exceptions.Timeout:
+            print(f"    ⚠️ Groq timeout (attempt {attempt + 1})")
             time.sleep(3)
+        except Exception as e:
+            print(f"    ⚠️ Groq error: {e}")
+            time.sleep(2)
     return ""
 # ==========================================
+# TEXT PROCESSING
 # ==========================================
 def preprocess_question(question: str) -> str:
+    """Handle reversed or scrambled text."""
     stripped = question.strip()
+    # Check for reversed text
+    reversed_text = stripped[::-1]
+    # Keywords that indicate proper English text
+    keywords = ["answer", "what", "who", "how", "find", "list", "which", "where",
+                "when", "the", "is", "are", "was", "were", "has", "have", "this",
+                "that", "from", "with", "about", "question", "video", "image"]
+    orig_score = sum(1 for w in keywords if w in stripped.lower())
+    rev_score = sum(1 for w in keywords if w in reversed_text.lower())
+    # If reversed text has significantly more keywords, use it
+    if rev_score > orig_score + 2 and len(stripped) > 30:
+        print(f"    🔄 Detected reversed text, using reversed version")
+        return reversed_text
+    return stripped
 def clean_answer(raw: str) -> str:
+    """Extract and clean the final answer from LLM response."""
+    if not raw:
+        return ""
+    answer = raw.strip()
+    # Take first non-empty line
     for line in answer.split("\n"):
         line = line.strip()
+        if line and not line.startswith("#"):
             answer = line
             break
+    # Remove common prefixes (case-insensitive)
+    prefixes = [
+        "the answer is:", "the answer is", "answer:", "answer is:",
+        "final answer:", "final answer is:", "the final answer is:",
+        "the correct answer is:", "the correct answer is",
+        "result:", "the result is:",
+        "based on my analysis,", "based on my analysis",
+        "based on the", "according to",
+        "sure,", "here is", "here's", "i found that"
+    ]
+    # Apply prefix removal iteratively
+    changed = True
+    max_iterations = 10
+    iterations = 0
+    while changed and iterations < max_iterations:
+        changed = False
+        iterations += 1
+        answer_lower = answer.lower()
+        for prefix in prefixes:
+            if answer_lower.startswith(prefix):
+                answer = answer[len(prefix):].strip()
+                changed = True
+                break
+    # Remove trailing punctuation (period, comma, etc.)
+    # But preserve decimal numbers like "3.14"
+    while answer and answer[-1] in '.,:;!':
+        char = answer[-1]
+        if char == '.' and len(answer) >= 2:
+            # Check if this is a decimal number (has digits on both sides of a period)
+            before = answer[:-1]
+            # If there's already a period in the string AND it's followed by digits, it's a decimal
+            if '.' in before:
+                # e.g., "3.14." - the inner period is decimal, outer is punctuation
+                answer = answer[:-1].strip()
+            elif before and before[-1].isdigit():
+                # Could be end of integer "42." or a decimal "3.14"
+                # Check if there are non-digit chars (indicating it's just "42." not "3.14")
+                # A pure decimal would be all digits and one period
+                test_str = before.lstrip('-')  # Handle negative numbers
+                if test_str.isdigit():
+                    # It's just an integer with a period, remove the period
+                    answer = answer[:-1].strip()
+                else:
+                    # Might have letters or other chars, remove period
+                    answer = answer[:-1].strip()
+            else:
+                answer = answer[:-1].strip()
+        else:
+            answer = answer[:-1].strip()
+    # Clean up formatting
+    answer = answer.replace("**", "").strip('"\'`')
+    return answer.strip()
+def is_valid_answer(answer: str) -> bool:
+    """Check if an answer is valid (not a refusal or error)."""
     if not answer or len(answer.strip()) < 1:
         return False
+    invalid_phrases = [
+        "i don't know", "i dont know", "i do not know",
+        "unknown", "n/a", "none", "error",
+        "i cannot", "i can't", "i cant",
+        "not available", "no answer", "unable to",
+        "i'm not sure", "im not sure", "i am not sure",
+        "no image", "cannot determine", "insufficient information",
+        "not provided", "cannot access", "i'm unable", "i am unable"
+    ]
+    answer_lower = answer.lower()
+    return not any(phrase in answer_lower for phrase in invalid_phrases)
 # ==========================================
+# MAIN SOLVER
 # ==========================================
+SYSTEM_PROMPT = """You are an expert AI assistant solving GAIA benchmark questions.
+CRITICAL RULES - Follow these EXACTLY:
+1. Output ONLY the final answer - no explanations, no reasoning, no "The answer is"
+2. Numbers: output just the number (e.g., "42" or "3.14")
+3. Names: output just the name (e.g., "Marie Curie" or "Paris")
+4. Lists: use comma-separated format (e.g., "apple, banana, cherry")
+5. Dates: use the format requested or standard format
+6. Do NOT add a period at the end
+7. If data is provided (CSV, Excel, etc.), analyze it carefully and compute any needed calculations
+8. For math/counting questions, show your work internally but output only the final number
+SPECIAL CASES:
+- For reversed/scrambled questions: the question has been corrected for you
+- For video questions without transcript: answer based on any description provided
+- For image questions: answer based on any text description of the image
+- When asked about specific facts, be precise and concise"""
+def solve_question(question: str, task_id: str, groq_key: str) -> str:
+    """Main function to solve a GAIA question."""
+    print(f"\n[Q]: {question[:150]}{'...' if len(question) > 150 else ''}")
+    # Preprocess the question
+    processed_q = preprocess_question(question)
     context_parts = []
+    # 1. Check for attached files
     file_content, file_type = fetch_task_file(task_id)
     if file_content and file_type != "none":
+        context_parts.append(f"[ATTACHED FILE - {file_type.upper()}]:\n{file_content}")
+        print(f"    📁 Got {file_type} file ({len(file_content)} chars)")
+    # 2. Process YouTube URLs
+    yt_urls = re.findall(r'https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)[^\s\)\]]+', processed_q)
+    for yt_url in yt_urls[:2]:  # Limit to 2 videos
+        clean_url = yt_url.rstrip('.,;:')
+        print(f"    🎬 Fetching transcript: {clean_url}")
+        transcript = fetch_youtube_transcript(clean_url)
         if transcript:
+            context_parts.append(f"[YOUTUBE VIDEO TRANSCRIPT]:\n{transcript}")
         else:
+            context_parts.append(f"[YOUTUBE VIDEO]: {clean_url} - Could not fetch transcript. Answer based on your knowledge if possible.")
+    # 3. Process other URLs
+    other_urls = re.findall(r'https?://[^\s\)\]]+', processed_q)
+    other_urls = [u.rstrip('.,;:') for u in other_urls
+                  if "youtube.com" not in u and "youtu.be" not in u]
+    for url in other_urls[:2]:  # Limit to 2 URLs
+        print(f"    🌐 Fetching page: {url[:60]}...")
+        page_content = fetch_webpage(url)
+        if page_content:
+            context_parts.append(f"[WEBPAGE: {url}]:\n{page_content}")
+    # 4. Web search for additional context
+    # Skip search if we have good file data (Excel/CSV with actual data)
+    should_search = True
+    if file_type in ["excel", "csv"] and len(file_content) > 500:
+        should_search = False  # We have data to analyze
+        print("    ⏭️ Skipping search - using file data")
+    if should_search and not yt_urls:
+        # Generate search query
+        search_query = processed_q[:200] if len(processed_q) < 200 else processed_q[:200]
+        # Try to extract key terms for search
+        query_prompt = ask_groq([
+            {"role": "system", "content": "Extract the key search terms from this question. Output ONLY the search query (3-8 words), nothing else."},
+            {"role": "user", "content": processed_q[:400]}
+        ], groq_key, max_tokens=30, temperature=0.0)
+        if query_prompt and len(query_prompt) < 100 and len(query_prompt) > 3:
+            search_query = query_prompt
+        print(f"    🔍 Searching: '{search_query[:50]}'")
+        # Try web search
         results = web_search(search_query, max_results=5)
         if results:
             # Add search snippets
+            snippets = "\n".join([f"• {r.get('title', '')}: {r.get('body', '')}" for r in results])
+            context_parts.append(f"[WEB SEARCH RESULTS]:\n{snippets}")
+            # Fetch top result pages
+            fetched_count = 0
             for r in results:
+                if fetched_count >= 2:
                     break
                 href = r.get("href", "")
+                if href and "youtube.com" not in href and "youtu.be" not in href:
                     page = fetch_webpage(href)
+                    if page and len(page) > 300:
+                        context_parts.append(f"[PAGE - {r.get('title', 'Unknown')}]:\n{page[:4000]}")
+                        fetched_count += 1
+        # Also try Wikipedia for factual questions
+        wiki_content = search_wikipedia(search_query)
+        if wiki_content:
+            context_parts.append(f"[WIKIPEDIA]:\n{wiki_content}")
+    # 5. Build context and query LLM
+    context = "\n\n".join(context_parts) if context_parts else ""
+    # Truncate context if too long
+    if len(context) > 14000:
+        context = context[:14000] + "\n[...truncated for length]"
+    # First attempt with context
+    if context:
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": f"CONTEXT:\n{context}\n\n---\nQUESTION: {processed_q}\n\nProvide ONLY the final answer:"}
+        ]
+    else:
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": f"QUESTION: {processed_q}\n\nProvide ONLY the final answer:"}
+        ]
+    answer_raw = ask_groq(messages, groq_key, max_tokens=400, temperature=0.1)
+    answer = clean_answer(answer_raw)
+    # If answer isn't valid, try again with different approach
+    if not is_valid_answer(answer):
+        print(f"    ⚠️ First attempt invalid: '{answer}', retrying...")
+        # Try with just the question and knowledge
+        retry_messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": f"Using your knowledge, answer this question with ONLY the final answer:\n\n{processed_q}"}
+        ]
+        answer_raw = ask_groq(retry_messages, groq_key, max_tokens=400, temperature=0.2)
+        answer = clean_answer(answer_raw)
+    # Final validation
+    if not is_valid_answer(answer):
         answer = "I don't know"
+    print(f"    ✅ Answer: {answer}")
     return answer
 # ==========================================
+# GRADIO INTERFACE
 # ==========================================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """Run the agent on all questions and submit answers."""
+    space_id = os.getenv("SPACE_ID", "")
     if not profile:
+        return "Effettua il login con Hugging Face per continuare.", None
     username = profile.username
     groq_key = os.getenv("GROQ_API_KEY", "")
     if not groq_key:
+        return "❌ GROQ_API_KEY non configurata! Aggiungi la chiave nelle impostazioni dello Space.", None
+    print(f"\n{'='*60}")
+    print(f"👤 User: {username}")
+    print(f"🤖 Agent: GAIA Agent v4")
+    print(f"{'='*60}")
+    # Fetch questions
     try:
+        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
         resp.raise_for_status()
         questions = resp.json()
     except Exception as e:
+        return f"❌ Errore nel recupero delle domande: {e}", None
+    print(f"\n📋 {len(questions)} domande da processare\n")
     results = []
     answers = []
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
     for i, item in enumerate(questions):
         task_id = item.get("task_id", "")
         q = item.get("question")
         if not task_id or q is None:
+            print(f"[{i+1}] Skipping invalid item")
             continue
+        print(f"\n{'─'*60}")
+        print(f"[{i+1}/{len(questions)}] Task: {task_id[:20]}...")
         try:
+            answer = solve_question(q, task_id, groq_key)
         except Exception as e:
+            print(f"    💥 Exception: {e}")
+            traceback.print_exc()
+            answer = "I don't know"
+        answers.append({
+            "task_id": task_id,
+            "submitted_answer": answer
+        })
+        results.append({
+            "Task ID": task_id[:20] + "...",
+            "Question": q[:80] + ("..." if len(q) > 80 else ""),
+            "Answer": answer
+        })
+        # Rate limit protection
+        time.sleep(1.5)
     if not answers:
+        return "❌ Nessuna risposta generata.", pd.DataFrame(results)
+    # Submit answers
+    print(f"\n{'='*60}")
+    print(f"📤 Submitting {len(answers)} answers...")
     try:
+        submit_resp = requests.post(
             f"{DEFAULT_API_URL}/submit",
+            json={
+                "username": username,
+                "agent_code": agent_code,
+                "answers": answers
+            },
             timeout=60,
         )
+        submit_resp.raise_for_status()
+        result = submit_resp.json()
+        score = result.get('score', 'N/A')
+        correct = result.get('correct_count', '?')
+        total = result.get('total_attempted', '?')
+        message = result.get('message', '')
+        status = f"""✅ Completato!
+👤 {result.get('username')}
+🏆 {score}% ({correct}/{total})
+📝 {message}"""
         print(f"\n{status}")
         return status, pd.DataFrame(results)
     except Exception as e:
+        error_msg = f"❌ Errore nell'invio: {e}"
+        print(error_msg)
+        return error_msg, pd.DataFrame(results)
+def create_demo():
+    """Build and return the Gradio interface."""
+    with gr.Blocks(title="GAIA Agent v4") as demo:
+        gr.Markdown("""# 🚀 GAIA Agent v4
+**Enhanced agent with better tools and reasoning**
+- Groq Llama 3.3 70B
+- Smart web search + Wikipedia
+- YouTube transcript extraction
+- File parsing (CSV, Excel, PDF, Python)
+""")
+        gr.LoginButton()
+        run_button = gr.Button("🔥 Avvia Valutazione", variant="primary", size="lg")
+        status_output = gr.Textbox(
+            label="Risultato",
+            lines=6,
+            interactive=False
+        )
+        results_table = gr.DataFrame(
+            label="Risposte",
+            wrap=True
+        )
+        run_button.click(
+            fn=run_and_submit_all,
+            outputs=[status_output, results_table]
+        )
+    return demo
 if __name__ == "__main__":
+    demo = create_demo()
+    demo.queue(default_concurrency_limit=1).launch(debug=True, share=False)