Final_Assignment_Template

Sleeping

App Files Files Community

mohdadrian commited on Jan 17

Commit

5cf7143

verified ·

1 Parent(s): fbdc7b4

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -113

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from duckduckgo_search import DDGS
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-DELAY_BETWEEN_QUESTIONS = 15  # 15 seconds to avoid rate limits on 70B model
 # ============================================
 # GROQ CLIENT
@@ -26,6 +26,7 @@ def get_groq_client():
 # ============================================
 def web_search(query: str) -> str:
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(query, max_results=5))
@@ -33,18 +34,17 @@ def web_search(query: str) -> str:
             return ""
         output = []
         for r in results:
-            output.append(f"Title: {r.get('title','')}")
-            output.append(f"Snippet: {r.get('body','')}")
-            output.append("---")
         return "\n".join(output)
     except:
         return ""
 def get_task_file(task_id: str) -> dict:
     try:
         url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
-        response = requests.get(url, timeout=15)
         if response.status_code == 404:
             return {"has_file": False}
@@ -58,66 +58,89 @@ def get_task_file(task_id: str) -> dict:
         result = {"has_file": True, "filename": filename, "type": content_type}
-        # Python files - return code
         if filename.endswith('.py'):
             result["content"] = response.text
-            result["is_code"] = True
             return result
-        # Text files
         if 'text' in content_type or filename.endswith(('.txt', '.md', '.csv', '.json')):
-            result["content"] = response.text[:6000]
             return result
-        # Excel
         if 'excel' in content_type or 'spreadsheet' in content_type or filename.endswith(('.xlsx', '.xls')):
             try:
                 from io import BytesIO
                 df = pd.read_excel(BytesIO(response.content))
                 result["content"] = df.to_string()
-                result["is_excel"] = True
                 return result
             except Exception as e:
-                result["content"] = f"[Excel parse error: {e}]"
                 return result
-        # Image - can't process
-        if 'image' in content_type:
-            result["content"] = "[IMAGE FILE]"
-            result["is_image"] = True
             return result
-        result["content"] = f"[File: {content_type}]"
         return result
     except Exception as e:
         return {"has_file": False, "error": str(e)}
-def reverse_string(text: str) -> str:
-    return text[::-1]
-def is_reversed(text: str) -> bool:
-    indicators = ['.rewsna', 'eht sa', 'tfel', 'drow eht', 'etisoppo']
-    return any(x in text.lower() for x in indicators)
-def execute_python(code: str) -> str:
-    """Safely execute Python code and return output"""
     try:
         import io
         import sys
-        from contextlib import redirect_stdout
-        # Capture stdout
-        f = io.StringIO()
-        with redirect_stdout(f):
-            exec(code, {"__builtins__": __builtins__})
-        output = f.getvalue()
-        return output.strip() if output else "No output"
     except Exception as e:
-        return f"Error: {e}"
 # ============================================
@@ -126,18 +149,19 @@ def execute_python(code: str) -> str:
 class BasicAgent:
     def __init__(self):
-        print("Initializing Groq agent (70B model)...")
         self.client = get_groq_client()
         print("✅ Ready!")
-    def ask(self, prompt: str, max_retries: int = 3) -> str:
         for attempt in range(max_retries):
             try:
                 response = self.client.chat.completions.create(
-                    model="llama-3.3-70b-versatile",  # Smart model
                     messages=[{"role": "user", "content": prompt}],
                     temperature=0,
-                    max_tokens=150,
                 )
                 return response.choices[0].message.content.strip()
             except Exception as e:
@@ -146,98 +170,135 @@ class BasicAgent:
                     print(f"    ⏳ Rate limit, waiting {wait}s...")
                     time.sleep(wait)
                 else:
-                    return f"Error: {e}"
         return "unknown"
     def clean_answer(self, answer: str) -> str:
         # Remove common prefixes
         prefixes = [
-            "Answer:", "The answer is:", "The answer is", "A:",
-            "Final answer:", "Final answer", "Based on",
-            "I found that", "The result is", "**", "```"
         ]
         for p in prefixes:
-            if answer.lower().startswith(p.lower()):
                 answer = answer[len(p):].strip()
-        # Remove markdown and quotes
         answer = answer.replace("**", "").replace("```", "").strip()
-        answer = answer.strip('"\'')
-        # If answer is too long or contains "I'm unable", return unknown
-        if "I'm unable" in answer or "I cannot" in answer or "I don't" in answer:
-            return "unknown"
         # Remove trailing period for short answers
         if answer.endswith('.') and len(answer.split()) <= 5:
             answer = answer[:-1]
         return answer.strip()
     def __call__(self, question: str, task_id: str = None) -> str:
-        original_question = question
-        context_parts = []
-        # 1. Handle reversed text
-        if is_reversed(question):
-            question = reverse_string(question)
-            context_parts.append(f"[Original was reversed. Decoded: {question}]")
-        # 2. Check for file
-        file_info = {"has_file": False}
-        if task_id:
-            file_info = get_task_file(task_id)
-            if file_info.get("has_file"):
-                if file_info.get("is_code"):
-                    # Execute Python code
-                    code = file_info.get("content", "")
-                    output = execute_python(code)
-                    context_parts.append(f"Python code output: {output}")
-                elif file_info.get("is_excel"):
-                    context_parts.append(f"Excel data:\n{file_info.get('content', '')[:3000]}")
-                elif file_info.get("is_image"):
-                    context_parts.append("[This task has an image file which cannot be processed]")
                 else:
-                    context_parts.append(f"File content:\n{file_info.get('content', '')[:3000]}")
-        # 3. Web search if needed (and no useful file)
-        if not file_info.get("has_file") or file_info.get("is_image"):
             search_triggers = [
-                "who ", "what ", "when ", "where ", "how many", "how much",
-                "album", "actor", "movie", "wikipedia", "surname", "name",
-                "athlete", "pitcher", "yankee", "country", "competition",
-                "nominated", "published", "article", "mercedes", "sosa"
             ]
             if any(t in question.lower() for t in search_triggers):
-                search_results = web_search(question)
-                if search_results:
-                    context_parts.append(f"Search results:\n{search_results[:2500]}")
-        # 4. Build prompt
-        context = "\n\n".join(context_parts) if context_parts else ""
-        prompt = f"""You must answer this question with ONLY the final answer.
 RULES:
-- Give ONLY the answer (a word, number, name, or short phrase)
-- NO explanations, NO "I think", NO "Based on"
-- If asked for a number, give just the number
-- If asked for a name, give just the name
-- If it's a list, give comma-separated items
-- NEVER say "I'm unable to" or "I cannot" - give your best guess
-{f"CONTEXT:{chr(10)}{context}" if context else ""}
-QUESTION: {question}
-YOUR ANSWER (just the answer):"""
-        answer = self.ask(prompt)
         return self.clean_answer(answer)
@@ -255,7 +316,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not os.environ.get("GROQ_API_KEY"):
         return "❌ Add GROQ_API_KEY to secrets!", None
-    print(f"\n{'='*40}\nUser: {username}\n{'='*40}")
     try:
         agent = BasicAgent()
@@ -265,7 +328,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     try:
         questions = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15).json()
         print(f"📋 {len(questions)} questions")
-        print(f"⏱️ Expected time: ~{len(questions) * DELAY_BETWEEN_QUESTIONS // 60} minutes\n")
     except Exception as e:
         return f"❌ Fetch failed: {e}", None
@@ -277,21 +340,19 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         task_id = q.get("task_id")
         question = q.get("question", "")
-        print(f"[{i+1}/{len(questions)}] {question[:50]}...")
         answer = agent(question, task_id)
-        print(f"    → {answer[:50]}")
         answers.append({"task_id": task_id, "submitted_answer": answer})
-        results.append({"#": i+1, "Q": question[:40]+"...", "A": answer[:50]})
-        # Delay to avoid rate limits
         if i < len(questions) - 1:
-            print(f"    ⏳ Waiting {DELAY_BETWEEN_QUESTIONS}s...")
             time.sleep(DELAY_BETWEEN_QUESTIONS)
     total = time.time() - start
-    print(f"\n⏱️ Total: {total:.0f}s ({total/60:.1f} min)")
     try:
         result = requests.post(
@@ -308,8 +369,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         correct = result.get('correct_count', 0)
         total_q = result.get('total_attempted', 0)
-        status = f"✅ Done in {total/60:.1f} min\n\n🎯 {score}% ({correct}/{total_q})\n\n"
-        status += "🎉 PASSED!" if score >= 30 else f"Need {30-score}% more"
         return status, pd.DataFrame(results)
     except Exception as e:
@@ -323,18 +385,25 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 with gr.Blocks() as demo:
     gr.Markdown("# 🎯 GAIA Agent - Unit 4")
     gr.Markdown("""
-    **Groq + Llama 3.3 70B** (smart model)
-    ⏱️ Takes ~5 minutes (15s delay between questions to avoid rate limits)
     """)
     gr.LoginButton()
-    run_btn = gr.Button("🚀 Run", variant="primary", size="lg")
-    status = gr.Textbox(label="Status", lines=5)
     table = gr.DataFrame(label="Results")
     run_btn.click(run_and_submit_all, outputs=[status, table])
 if __name__ == "__main__":
     print(f"GROQ_API_KEY: {'✅' if os.environ.get('GROQ_API_KEY') else '❌'}")
     demo.launch()

 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+DELAY_BETWEEN_QUESTIONS = 15
 # ============================================
 # GROQ CLIENT
 # ============================================
 def web_search(query: str) -> str:
+    """Search with DuckDuckGo"""
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(query, max_results=5))
             return ""
         output = []
         for r in results:
+            output.append(f"- {r.get('title','')}: {r.get('body','')}")
         return "\n".join(output)
     except:
         return ""
 def get_task_file(task_id: str) -> dict:
+    """Get file associated with a GAIA task"""
     try:
         url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+        response = requests.get(url, timeout=20)
         if response.status_code == 404:
             return {"has_file": False}
         result = {"has_file": True, "filename": filename, "type": content_type}
+        # Python files
         if filename.endswith('.py'):
             result["content"] = response.text
+            result["file_type"] = "python"
             return result
+        # Text/CSV/JSON files
         if 'text' in content_type or filename.endswith(('.txt', '.md', '.csv', '.json')):
+            result["content"] = response.text[:8000]
+            result["file_type"] = "text"
             return result
+        # Excel files
         if 'excel' in content_type or 'spreadsheet' in content_type or filename.endswith(('.xlsx', '.xls')):
             try:
                 from io import BytesIO
                 df = pd.read_excel(BytesIO(response.content))
                 result["content"] = df.to_string()
+                result["dataframe"] = df
+                result["file_type"] = "excel"
                 return result
             except Exception as e:
+                result["content"] = f"Excel error: {e}"
+                result["file_type"] = "excel"
                 return result
+        # Images
+        if 'image' in content_type or filename.endswith(('.png', '.jpg', '.jpeg', '.gif')):
+            result["file_type"] = "image"
+            result["content"] = "[Cannot process image]"
             return result
+        # MP3/Audio
+        if 'audio' in content_type or filename.endswith(('.mp3', '.wav')):
+            result["file_type"] = "audio"
+            result["content"] = "[Cannot process audio]"
+            return result
+        result["content"] = response.text[:5000] if len(response.content) < 50000 else "[Large binary file]"
+        result["file_type"] = "other"
         return result
     except Exception as e:
         return {"has_file": False, "error": str(e)}
+def execute_python_code(code: str) -> str:
+    """Execute Python code and capture ALL output"""
     try:
         import io
         import sys
+        # Create string buffer for stdout
+        old_stdout = sys.stdout
+        sys.stdout = buffer = io.StringIO()
+        # Execute the code
+        exec_globals = {
+            '__builtins__': __builtins__,
+            'print': print,
+        }
+        exec(code, exec_globals)
+        # Get output
+        output = buffer.getvalue()
+        sys.stdout = old_stdout
+        return output.strip() if output.strip() else "Code executed, no print output"
     except Exception as e:
+        return f"Execution error: {e}"
+def reverse_string(text: str) -> str:
+    """Reverse a string"""
+    return text[::-1]
+def is_reversed_text(text: str) -> bool:
+    """Check if text appears to be reversed"""
+    # Common reversed English patterns
+    indicators = ['.rewsna', 'eht sa', 'tfel', 'drow eht', 'etisoppo', 'siht']
+    text_lower = text.lower()
+    return any(ind in text_lower for ind in indicators)
 # ============================================
 class BasicAgent:
     def __init__(self):
+        print("Initializing agent...")
         self.client = get_groq_client()
         print("✅ Ready!")
+    def ask_llm(self, prompt: str, max_retries: int = 3) -> str:
+        """Query the LLM with retry logic"""
         for attempt in range(max_retries):
             try:
                 response = self.client.chat.completions.create(
+                    model="llama-3.3-70b-versatile",
                     messages=[{"role": "user", "content": prompt}],
                     temperature=0,
+                    max_tokens=200,
                 )
                 return response.choices[0].message.content.strip()
             except Exception as e:
                     print(f"    ⏳ Rate limit, waiting {wait}s...")
                     time.sleep(wait)
                 else:
+                    print(f"    ❌ LLM error: {e}")
+                    return "unknown"
         return "unknown"
     def clean_answer(self, answer: str) -> str:
+        """Clean up the answer to exact match format"""
+        if not answer:
+            return "unknown"
         # Remove common prefixes
         prefixes = [
+            "Answer:", "The answer is:", "The answer is", "A:", "**Answer:**",
+            "Final answer:", "Final Answer:", "Based on the", "According to",
+            "The result is:", "The result is", "The output is:", "The output is",
         ]
         for p in prefixes:
+            if answer.startswith(p):
+                answer = answer[len(p):].strip()
+            elif answer.lower().startswith(p.lower()):
                 answer = answer[len(p):].strip()
+        # Remove markdown formatting
         answer = answer.replace("**", "").replace("```", "").strip()
+        # Remove surrounding quotes
+        if (answer.startswith('"') and answer.endswith('"')) or \
+           (answer.startswith("'") and answer.endswith("'")):
+            answer = answer[1:-1]
         # Remove trailing period for short answers
         if answer.endswith('.') and len(answer.split()) <= 5:
             answer = answer[:-1]
+        # Filter out non-answers
+        bad_phrases = ["I'm unable", "I cannot", "I don't have", "I couldn't", "unfortunately"]
+        if any(bp.lower() in answer.lower() for bp in bad_phrases):
+            return "unknown"
         return answer.strip()
     def __call__(self, question: str, task_id: str = None) -> str:
+        """Process a question and return the answer"""
+        # === STEP 1: Handle reversed text ===
+        if is_reversed_text(question):
+            decoded = reverse_string(question)
+            print(f"    [Reversed text detected, decoded]")
+            question = decoded
+        # === STEP 2: Get associated file ===
+        file_info = get_task_file(task_id) if task_id else {"has_file": False}
+        file_context = ""
+        if file_info.get("has_file"):
+            file_type = file_info.get("file_type", "")
+            filename = file_info.get("filename", "")
+            print(f"    [File: {filename} ({file_type})]")
+            if file_type == "python":
+                # Execute Python code and get output
+                code = file_info.get("content", "")
+                output = execute_python_code(code)
+                print(f"    [Python output: {output[:50]}...]")
+                file_context = f"Python code output:\n{output}"
+            elif file_type == "excel":
+                df = file_info.get("dataframe")
+                if df is not None:
+                    # Provide summary and data
+                    file_context = f"Excel file ({len(df)} rows):\n{file_info.get('content', '')[:3000]}"
                 else:
+                    file_context = f"Excel content:\n{file_info.get('content', '')[:3000]}"
+            elif file_type == "text":
+                file_context = f"File content:\n{file_info.get('content', '')[:4000]}"
+            elif file_type in ["image", "audio"]:
+                file_context = f"[This task has a {file_type} file which cannot be processed]"
+        # === STEP 3: Web search if needed ===
+        search_context = ""
+        needs_search = not file_info.get("has_file") or file_info.get("file_type") in ["image", "audio"]
+        if needs_search:
             search_triggers = [
+                "who is", "who was", "who did", "who nominated", "who played",
+                "what is", "what was", "what are",
+                "how many", "how much",
+                "where ", "when ",
+                "surname", "first name", "name of",
+                "album", "studio album", "mercedes sosa",
+                "actor", "actress", "movie", "film",
+                "wikipedia", "article",
+                "athlete", "pitcher", "yankee", "player",
+                "country", "competition", "malko",
+                "veterinarian", "equine"
             ]
             if any(t in question.lower() for t in search_triggers):
+                # Create focused search query
+                search_query = question[:120]
+                results = web_search(search_query)
+                if results:
+                    search_context = f"Search results:\n{results[:2500]}"
+                    print(f"    [Web search done]")
+        # === STEP 4: Build prompt and ask LLM ===
+        context = ""
+        if file_context:
+            context += f"\n\n{file_context}"
+        if search_context:
+            context += f"\n\n{search_context}"
+        prompt = f"""Answer this question. Give ONLY the final answer - no explanation.
 RULES:
+- Just the answer (number, name, word, or short phrase)
+- No "The answer is" or similar prefixes
+- If it's a number, just the number
+- If it's a name, just the name
+- If it's a list, comma-separated items
+- Be precise - this is graded by exact match
+{context}
+Question: {question}
+Answer:"""
+        answer = self.ask_llm(prompt)
         return self.clean_answer(answer)
     if not os.environ.get("GROQ_API_KEY"):
         return "❌ Add GROQ_API_KEY to secrets!", None
+    print(f"\n{'='*50}")
+    print(f"User: {username}")
+    print(f"{'='*50}")
     try:
         agent = BasicAgent()
     try:
         questions = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15).json()
         print(f"📋 {len(questions)} questions")
+        print(f"⏱️ Est. time: {len(questions) * DELAY_BETWEEN_QUESTIONS // 60} min\n")
     except Exception as e:
         return f"❌ Fetch failed: {e}", None
         task_id = q.get("task_id")
         question = q.get("question", "")
+        print(f"\n[{i+1}/{len(questions)}] {question[:60]}...")
         answer = agent(question, task_id)
+        print(f"    ✓ Answer: {answer}")
         answers.append({"task_id": task_id, "submitted_answer": answer})
+        results.append({"#": i+1, "Question": question[:50]+"...", "Answer": answer})
         if i < len(questions) - 1:
             time.sleep(DELAY_BETWEEN_QUESTIONS)
     total = time.time() - start
+    print(f"\n⏱️ Total: {total/60:.1f} min")
     try:
         result = requests.post(
         correct = result.get('correct_count', 0)
         total_q = result.get('total_attempted', 0)
+        status = f"✅ Done in {total/60:.1f} min\n\n"
+        status += f"🎯 Score: {score}% ({correct}/{total_q})\n\n"
+        status += "🎉 PASSED! 30%+ achieved!" if score >= 30 else f"📈 Need {30-score}% more to pass"
         return status, pd.DataFrame(results)
     except Exception as e:
 with gr.Blocks() as demo:
     gr.Markdown("# 🎯 GAIA Agent - Unit 4")
     gr.Markdown("""
+    **Model:** Llama 3.3 70B via Groq
+    **Features:**
+    - ✅ Python code execution
+    - ✅ Excel file analysis
+    - ✅ Reversed text detection
+    - ✅ Web search
+    ⏱️ ~5 minutes runtime
     """)
     gr.LoginButton()
+    run_btn = gr.Button("🚀 Run Evaluation", variant="primary", size="lg")
+    status = gr.Textbox(label="Status", lines=6)
     table = gr.DataFrame(label="Results")
     run_btn.click(run_and_submit_all, outputs=[status, table])
 if __name__ == "__main__":
+    print("🎯 GAIA Agent Starting")
     print(f"GROQ_API_KEY: {'✅' if os.environ.get('GROQ_API_KEY') else '❌'}")
     demo.launch()