Final_Assignment_Template

Sleeping

App Files Files Community

mohdadrian commited on Jan 17

Commit

35bdbb2

verified ·

1 Parent(s): 5cf7143

Update app.py

Browse files

Files changed (1) hide show

app.py +156 -175

app.py CHANGED Viewed

@@ -36,37 +36,55 @@ def web_search(query: str) -> str:
         for r in results:
             output.append(f"- {r.get('title','')}: {r.get('body','')}")
         return "\n".join(output)
-    except:
         return ""
-def get_task_file(task_id: str) -> dict:
-    """Get file associated with a GAIA task"""
     try:
         url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
-        response = requests.get(url, timeout=20)
         if response.status_code == 404:
             return {"has_file": False}
         content_type = response.headers.get('content-type', '').lower()
         disposition = response.headers.get('content-disposition', '')
-        filename = ""
         if 'filename=' in disposition:
-            filename = disposition.split('filename=')[-1].strip('"\'')
-        result = {"has_file": True, "filename": filename, "type": content_type}
         # Python files
-        if filename.endswith('.py'):
             result["content"] = response.text
             result["file_type"] = "python"
             return result
-        # Text/CSV/JSON files
         if 'text' in content_type or filename.endswith(('.txt', '.md', '.csv', '.json')):
-            result["content"] = response.text[:8000]
             result["file_type"] = "text"
             return result
@@ -75,72 +93,86 @@ def get_task_file(task_id: str) -> dict:
             try:
                 from io import BytesIO
                 df = pd.read_excel(BytesIO(response.content))
-                result["content"] = df.to_string()
                 result["dataframe"] = df
                 result["file_type"] = "excel"
                 return result
             except Exception as e:
-                result["content"] = f"Excel error: {e}"
                 result["file_type"] = "excel"
                 return result
         # Images
         if 'image' in content_type or filename.endswith(('.png', '.jpg', '.jpeg', '.gif')):
             result["file_type"] = "image"
-            result["content"] = "[Cannot process image]"
             return result
-        # MP3/Audio
-        if 'audio' in content_type or filename.endswith(('.mp3', '.wav')):
-            result["file_type"] = "audio"
-            result["content"] = "[Cannot process audio]"
             return result
-        result["content"] = response.text[:5000] if len(response.content) < 50000 else "[Large binary file]"
-        result["file_type"] = "other"
-        return result
     except Exception as e:
         return {"has_file": False, "error": str(e)}
-def execute_python_code(code: str) -> str:
-    """Execute Python code and capture ALL output"""
     try:
         import io
         import sys
-        # Create string buffer for stdout
         old_stdout = sys.stdout
-        sys.stdout = buffer = io.StringIO()
-        # Execute the code
-        exec_globals = {
-            '__builtins__': __builtins__,
-            'print': print,
-        }
-        exec(code, exec_globals)
-        # Get output
-        output = buffer.getvalue()
         sys.stdout = old_stdout
-        return output.strip() if output.strip() else "Code executed, no print output"
     except Exception as e:
-        return f"Execution error: {e}"
-def reverse_string(text: str) -> str:
-    """Reverse a string"""
     return text[::-1]
-def is_reversed_text(text: str) -> bool:
-    """Check if text appears to be reversed"""
-    # Common reversed English patterns
-    indicators = ['.rewsna', 'eht sa', 'tfel', 'drow eht', 'etisoppo', 'siht']
-    text_lower = text.lower()
-    return any(ind in text_lower for ind in indicators)
 # ============================================
@@ -151,155 +183,115 @@ class BasicAgent:
     def __init__(self):
         print("Initializing agent...")
         self.client = get_groq_client()
-        print("✅ Ready!")
-    def ask_llm(self, prompt: str, max_retries: int = 3) -> str:
-        """Query the LLM with retry logic"""
-        for attempt in range(max_retries):
             try:
                 response = self.client.chat.completions.create(
                     model="llama-3.3-70b-versatile",
                     messages=[{"role": "user", "content": prompt}],
-                    temperature=0,
                     max_tokens=200,
                 )
                 return response.choices[0].message.content.strip()
             except Exception as e:
                 if "rate" in str(e).lower() or "429" in str(e):
                     wait = (attempt + 1) * 20
-                    print(f"    ⏳ Rate limit, waiting {wait}s...")
                     time.sleep(wait)
                 else:
-                    print(f"    ❌ LLM error: {e}")
-                    return "unknown"
-        return "unknown"
-    def clean_answer(self, answer: str) -> str:
-        """Clean up the answer to exact match format"""
         if not answer:
-            return "unknown"
-        # Remove common prefixes
-        prefixes = [
-            "Answer:", "The answer is:", "The answer is", "A:", "**Answer:**",
-            "Final answer:", "Final Answer:", "Based on the", "According to",
-            "The result is:", "The result is", "The output is:", "The output is",
-        ]
-        for p in prefixes:
-            if answer.startswith(p):
-                answer = answer[len(p):].strip()
-            elif answer.lower().startswith(p.lower()):
                 answer = answer[len(p):].strip()
-        # Remove markdown formatting
         answer = answer.replace("**", "").replace("```", "").strip()
-        # Remove surrounding quotes
-        if (answer.startswith('"') and answer.endswith('"')) or \
-           (answer.startswith("'") and answer.endswith("'")):
-            answer = answer[1:-1]
-        # Remove trailing period for short answers
         if answer.endswith('.') and len(answer.split()) <= 5:
             answer = answer[:-1]
-        # Filter out non-answers
-        bad_phrases = ["I'm unable", "I cannot", "I don't have", "I couldn't", "unfortunately"]
-        if any(bp.lower() in answer.lower() for bp in bad_phrases):
-            return "unknown"
         return answer.strip()
     def __call__(self, question: str, task_id: str = None) -> str:
-        """Process a question and return the answer"""
-        # === STEP 1: Handle reversed text ===
-        if is_reversed_text(question):
-            decoded = reverse_string(question)
-            print(f"    [Reversed text detected, decoded]")
-            question = decoded
-        # === STEP 2: Get associated file ===
-        file_info = get_task_file(task_id) if task_id else {"has_file": False}
-        file_context = ""
         if file_info.get("has_file"):
-            file_type = file_info.get("file_type", "")
-            filename = file_info.get("filename", "")
-            print(f"    [File: {filename} ({file_type})]")
-            if file_type == "python":
-                # Execute Python code and get output
-                code = file_info.get("content", "")
-                output = execute_python_code(code)
-                print(f"    [Python output: {output[:50]}...]")
-                file_context = f"Python code output:\n{output}"
-            elif file_type == "excel":
-                df = file_info.get("dataframe")
-                if df is not None:
-                    # Provide summary and data
-                    file_context = f"Excel file ({len(df)} rows):\n{file_info.get('content', '')[:3000]}"
-                else:
-                    file_context = f"Excel content:\n{file_info.get('content', '')[:3000]}"
-            elif file_type == "text":
-                file_context = f"File content:\n{file_info.get('content', '')[:4000]}"
-            elif file_type in ["image", "audio"]:
-                file_context = f"[This task has a {file_type} file which cannot be processed]"
-        # === STEP 3: Web search if needed ===
-        search_context = ""
-        needs_search = not file_info.get("has_file") or file_info.get("file_type") in ["image", "audio"]
-        if needs_search:
-            search_triggers = [
-                "who is", "who was", "who did", "who nominated", "who played",
-                "what is", "what was", "what are",
-                "how many", "how much",
-                "where ", "when ",
-                "surname", "first name", "name of",
-                "album", "studio album", "mercedes sosa",
-                "actor", "actress", "movie", "film",
-                "wikipedia", "article",
-                "athlete", "pitcher", "yankee", "player",
-                "country", "competition", "malko",
-                "veterinarian", "equine"
-            ]
-            if any(t in question.lower() for t in search_triggers):
-                # Create focused search query
-                search_query = question[:120]
-                results = web_search(search_query)
-                if results:
-                    search_context = f"Search results:\n{results[:2500]}"
-                    print(f"    [Web search done]")
-        # === STEP 4: Build prompt and ask LLM ===
-        context = ""
-        if file_context:
-            context += f"\n\n{file_context}"
-        if search_context:
-            context += f"\n\n{search_context}"
-        prompt = f"""Answer this question. Give ONLY the final answer - no explanation.
-RULES:
-- Just the answer (number, name, word, or short phrase)
-- No "The answer is" or similar prefixes
-- If it's a number, just the number
-- If it's a name, just the name
-- If it's a list, comma-separated items
-- Be precise - this is graded by exact match
-{context}
 Question: {question}
 Answer:"""
-        answer = self.ask_llm(prompt)
-        return self.clean_answer(answer)
 # ============================================
@@ -327,8 +319,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     try:
         questions = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15).json()
-        print(f"📋 {len(questions)} questions")
-        print(f"⏱️ Est. time: {len(questions) * DELAY_BETWEEN_QUESTIONS // 60} min\n")
     except Exception as e:
         return f"❌ Fetch failed: {e}", None
@@ -340,10 +331,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         task_id = q.get("task_id")
         question = q.get("question", "")
-        print(f"\n[{i+1}/{len(questions)}] {question[:60]}...")
         answer = agent(question, task_id)
-        print(f"    ✓ Answer: {answer}")
         answers.append({"task_id": task_id, "submitted_answer": answer})
         results.append({"#": i+1, "Question": question[:50]+"...", "Answer": answer})
@@ -352,7 +344,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             time.sleep(DELAY_BETWEEN_QUESTIONS)
     total = time.time() - start
-    print(f"\n⏱️ Total: {total/60:.1f} min")
     try:
         result = requests.post(
@@ -371,7 +363,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         status = f"✅ Done in {total/60:.1f} min\n\n"
         status += f"🎯 Score: {score}% ({correct}/{total_q})\n\n"
-        status += "🎉 PASSED! 30%+ achieved!" if score >= 30 else f"📈 Need {30-score}% more to pass"
         return status, pd.DataFrame(results)
     except Exception as e:
@@ -384,26 +376,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 with gr.Blocks() as demo:
     gr.Markdown("# 🎯 GAIA Agent - Unit 4")
-    gr.Markdown("""
-    **Model:** Llama 3.3 70B via Groq
-    **Features:**
-    - ✅ Python code execution
-    - ✅ Excel file analysis
-    - ✅ Reversed text detection
-    - ✅ Web search
-    ⏱️ ~5 minutes runtime
-    """)
     gr.LoginButton()
-    run_btn = gr.Button("🚀 Run Evaluation", variant="primary", size="lg")
-    status = gr.Textbox(label="Status", lines=6)
     table = gr.DataFrame(label="Results")
     run_btn.click(run_and_submit_all, outputs=[status, table])
 if __name__ == "__main__":
-    print("🎯 GAIA Agent Starting")
     print(f"GROQ_API_KEY: {'✅' if os.environ.get('GROQ_API_KEY') else '❌'}")
     demo.launch()

         for r in results:
             output.append(f"- {r.get('title','')}: {r.get('body','')}")
         return "\n".join(output)
+    except Exception as e:
+        print(f"    Search error: {e}")
         return ""
+def fetch_task_file(task_id: str) -> dict:
+    """Fetch file from GAIA API"""
+    if not task_id:
+        return {"has_file": False}
     try:
         url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+        print(f"    Fetching file: {url}")
+        response = requests.get(url, timeout=30)
+        print(f"    Response status: {response.status_code}")
         if response.status_code == 404:
+            print(f"    No file for this task")
+            return {"has_file": False}
+        if response.status_code != 200:
+            print(f"    Error status: {response.status_code}")
             return {"has_file": False}
         content_type = response.headers.get('content-type', '').lower()
         disposition = response.headers.get('content-disposition', '')
+        # Extract filename
+        filename = "unknown"
         if 'filename=' in disposition:
+            filename = disposition.split('filename=')[-1].strip('"\'').strip()
+        elif 'filename*=' in disposition:
+            filename = disposition.split('filename*=')[-1].strip('"\'').strip()
+        print(f"    File: {filename}, Type: {content_type}")
+        result = {"has_file": True, "filename": filename, "content_type": content_type}
         # Python files
+        if filename.endswith('.py') or 'python' in content_type:
             result["content"] = response.text
             result["file_type"] = "python"
+            print(f"    Python file, {len(response.text)} chars")
             return result
+        # Text files
         if 'text' in content_type or filename.endswith(('.txt', '.md', '.csv', '.json')):
+            result["content"] = response.text
             result["file_type"] = "text"
             return result
             try:
                 from io import BytesIO
                 df = pd.read_excel(BytesIO(response.content))
+                result["content"] = df.to_csv(index=False)
                 result["dataframe"] = df
                 result["file_type"] = "excel"
+                print(f"    Excel file, {len(df)} rows")
                 return result
             except Exception as e:
+                print(f"    Excel parse error: {e}")
+                result["content"] = f"Excel file (error: {e})"
                 result["file_type"] = "excel"
                 return result
         # Images
         if 'image' in content_type or filename.endswith(('.png', '.jpg', '.jpeg', '.gif')):
             result["file_type"] = "image"
+            result["content"] = "[IMAGE - cannot process]"
             return result
+        # Audio/Video
+        if 'audio' in content_type or 'video' in content_type:
+            result["file_type"] = "media"
+            result["content"] = "[MEDIA - cannot process]"
             return result
+        # Try as text
+        try:
+            result["content"] = response.text[:8000]
+            result["file_type"] = "text"
+            return result
+        except:
+            result["content"] = "[Binary file]"
+            result["file_type"] = "binary"
+            return result
     except Exception as e:
+        print(f"    File fetch error: {e}")
         return {"has_file": False, "error": str(e)}
+def run_python_code(code: str) -> str:
+    """Execute Python code and return output"""
     try:
         import io
         import sys
         old_stdout = sys.stdout
+        old_stderr = sys.stderr
+        sys.stdout = stdout_buffer = io.StringIO()
+        sys.stderr = stderr_buffer = io.StringIO()
+        try:
+            exec(code, {"__builtins__": __builtins__})
+        except Exception as e:
+            sys.stdout = old_stdout
+            sys.stderr = old_stderr
+            return f"Execution error: {e}"
         sys.stdout = old_stdout
+        sys.stderr = old_stderr
+        output = stdout_buffer.getvalue()
+        errors = stderr_buffer.getvalue()
+        if output:
+            return output.strip()
+        if errors:
+            return f"Stderr: {errors.strip()}"
+        return "No output"
     except Exception as e:
+        return f"Error: {e}"
+def reverse_text(text: str) -> str:
     return text[::-1]
+def is_reversed(text: str) -> bool:
+    """Check if text is reversed English"""
+    patterns = ['.rewsna', 'eht sa', 'tfel', 'drow eht', 'etisoppo', 'tahW', 'erehW']
+    return any(p in text for p in patterns)
 # ============================================
     def __init__(self):
         print("Initializing agent...")
         self.client = get_groq_client()
+        print("✅ Agent ready!")
+    def ask(self, prompt: str) -> str:
+        """Ask LLM with retries"""
+        for attempt in range(3):
             try:
                 response = self.client.chat.completions.create(
                     model="llama-3.3-70b-versatile",
                     messages=[{"role": "user", "content": prompt}],
+                    temperature=0.1,
                     max_tokens=200,
                 )
                 return response.choices[0].message.content.strip()
             except Exception as e:
                 if "rate" in str(e).lower() or "429" in str(e):
                     wait = (attempt + 1) * 20
+                    print(f"    Rate limit, waiting {wait}s...")
                     time.sleep(wait)
                 else:
+                    print(f"    LLM error: {e}")
+                    return ""
+        return ""
+    def clean(self, answer: str) -> str:
+        """Clean answer for exact match"""
         if not answer:
+            return ""
+        # Remove prefixes
+        for p in ["Answer:", "The answer is:", "The answer is", "A:", "Final answer:", "**"]:
+            if answer.lower().startswith(p.lower()):
                 answer = answer[len(p):].strip()
+        # Clean formatting
         answer = answer.replace("**", "").replace("```", "").strip()
+        answer = answer.strip('"\'')
+        # Remove trailing punctuation for short answers
         if answer.endswith('.') and len(answer.split()) <= 5:
             answer = answer[:-1]
         return answer.strip()
     def __call__(self, question: str, task_id: str = None) -> str:
+        context_parts = []
+        # === Handle reversed text ===
+        if is_reversed(question):
+            question = reverse_text(question)
+            print(f"    [Decoded reversed: {question[:50]}...]")
+        # === Fetch file ===
+        file_info = fetch_task_file(task_id)
         if file_info.get("has_file"):
+            ftype = file_info.get("file_type", "")
+            content = file_info.get("content", "")
+            if ftype == "python" and content:
+                print(f"    [Executing Python...]")
+                output = run_python_code(content)
+                print(f"    [Output: {output[:100]}]")
+                context_parts.append(f"Python code output:\n{output}")
+            elif ftype == "excel":
+                context_parts.append(f"Excel data:\n{content[:4000]}")
+            elif ftype == "text":
+                context_parts.append(f"File content:\n{content[:4000]}")
+            elif ftype in ["image", "media"]:
+                context_parts.append("[This task has an image/media file that cannot be processed]")
+        # === Web search ===
+        do_search = not file_info.get("has_file") or file_info.get("file_type") in ["image", "media"]
+        if do_search:
+            results = web_search(question[:100])
+            if results:
+                context_parts.append(f"Web search:\n{results[:2500]}")
+                print(f"    [Search done]")
+        # === Ask LLM ===
+        context = "\n\n".join(context_parts)
+        prompt = f"""Answer this question with ONLY the answer. No explanation.
+Rules:
+- Give just the answer (number, name, or short phrase)
+- No "The answer is" prefix
+- Be precise - exact match grading
+- If unsure, give your best guess
+{f"Context:{chr(10)}{context}" if context else ""}
 Question: {question}
 Answer:"""
+        answer = self.ask(prompt)
+        answer = self.clean(answer)
+        # Don't return empty
+        if not answer or "unable" in answer.lower() or "cannot" in answer.lower():
+            # Try simpler prompt
+            simple = self.ask(f"Answer in 1-3 words: {question}")
+            answer = self.clean(simple) or "unknown"
+        return answer
 # ============================================
     try:
         questions = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15).json()
+        print(f"📋 {len(questions)} questions\n")
     except Exception as e:
         return f"❌ Fetch failed: {e}", None
         task_id = q.get("task_id")
         question = q.get("question", "")
+        print(f"\n[{i+1}/{len(questions)}] Task: {task_id}")
+        print(f"Q: {question[:70]}...")
         answer = agent(question, task_id)
+        print(f"A: {answer}")
         answers.append({"task_id": task_id, "submitted_answer": answer})
         results.append({"#": i+1, "Question": question[:50]+"...", "Answer": answer})
             time.sleep(DELAY_BETWEEN_QUESTIONS)
     total = time.time() - start
+    print(f"\n⏱️ {total/60:.1f} min total")
     try:
         result = requests.post(
         status = f"✅ Done in {total/60:.1f} min\n\n"
         status += f"🎯 Score: {score}% ({correct}/{total_q})\n\n"
+        status += "🎉 PASSED!" if score >= 30 else f"Need {30-score}% more"
         return status, pd.DataFrame(results)
     except Exception as e:
 with gr.Blocks() as demo:
     gr.Markdown("# 🎯 GAIA Agent - Unit 4")
+    gr.Markdown("**Llama 3.3 70B** via Groq | ~5 min runtime")
     gr.LoginButton()
+    run_btn = gr.Button("🚀 Run", variant="primary", size="lg")
+    status = gr.Textbox(label="Status", lines=5)
     table = gr.DataFrame(label="Results")
     run_btn.click(run_and_submit_all, outputs=[status, table])
 if __name__ == "__main__":
     print(f"GROQ_API_KEY: {'✅' if os.environ.get('GROQ_API_KEY') else '❌'}")
     demo.launch()