Final_Assignment_Template

Sleeping

App Files Files Community

Vinsmart06 commited on 16 days ago

Commit

cd66b27

verified ·

1 Parent(s): 8afb125

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -118

app.py CHANGED Viewed

@@ -64,164 +64,228 @@ def youtube_captions(self, url):
 from openai import OpenAI
 class BasicAgent:
-    def __call__(self, question, file_url=None):
-        return self.agent_loop(question, file_url)
     def __init__(self):
         print("🚀 Super GAIA Agent initialized")
         self.client = OpenAI()
-        # Initialize Whisper model once to avoid reloading in the loop
         self.audio_model = whisper.load_model("base")
-    def read_audio(self, file_url):
-        try:
-            r = requests.get(file_url, timeout=20)
-            with open("temp_audio.mp3", "wb") as f: f.write(r.content)
-            result = self.audio_model.transcribe("temp_audio.mp3")
-            return result
-        except Exception as e:
-            return f"Audio error: {str(e)}"
-    def download_file(self, url):
-        if not url or not url.startswith("http"):
-            return None
-        try:
-            r = requests.get(url, timeout=20)
-            file_name = url.split("/")[-1] or "temp_file"
-            with open(file_name, "wb") as f:
-                f.write(r.content)
-            return file_name
-        except Exception as e:
-            print(f"Download error: {e}")
-            return None
-    # --- Robust Wikipedia Tool ---
     def wiki_search(self, query):
         try:
             query = query.strip(' ".,')
-            # Step 1: find the best matching title
-            search = requests.get(
                 "https://en.wikipedia.org/w/api.php",
                 params={"action": "query", "list": "search", "srsearch": query,
-                        "format": "json", "srlimit": 1},
                 timeout=10
             ).json()
-            if not search.get("query", {}).get("search"):
-                return f"No Wikipedia results for '{query}'"
-            title = search["query"]["search"][0]["title"]
-            # Step 2: fetch the full summary via REST API
             summary = requests.get(
-                f"https://en.wikipedia.org/api/rest_v1/page/summary/{title.replace(' ', '_')}",
                 timeout=10
             ).json()
-            return f"WIKI: {title}\n{summary.get('extract', 'No extract found.')}"
         except Exception as e:
-            return f"Wiki error: {str(e)}"
-    def execute_tool(self, tool, input_data, file_url):
-        input_data = input_data.strip(' ".,')
-        # If agent provides no URL or says 'none', use the system-provided file_url
-        target = file_url if (not input_data or "http" not in input_data) else input_data
         try:
-            if tool == "wiki_search": return self.wiki_search(input_data)
-            if tool in ["read_image", "read_excel", "read_audio"]:
-                if not target: return "Error: No file URL available for this task."
-                r = requests.get(target, timeout=20)
-                ext = target.split('.')[-1].lower() if '.' in target else 'tmp'
-                with open(f"temp.{ext}", "wb") as f: f.write(r.content)
-                if tool == "read_image": return f"IMAGE_CONTENT: {pytesseract.image_to_string(Image.open(f'temp.{ext}'))}"
-                if tool == "read_excel": return f"EXCEL_DATA: {pd.read_excel(f'temp.{ext}').to_string()[:3000]}"
-                if tool == "read_audio": return f"TRANSCRIPT: {self.audio_model.transcribe(f'temp.{ext}')}"
-            if tool == "scrape_page":
-                soup = BeautifulSoup(requests.get(input_data, timeout=10).text, "html.parser")
-                return f"PAGE_TEXT: {soup.get_text()[:4000]}"
         except Exception as e:
-            return f"Tool error: {str(e)}"
-        return f"Unknown tool: {tool}"
-        # 2. Handle web/search tools
-        if tool == "youtube_captions":
-            return self.youtube_captions(input_data)
     def agent_loop(self, question, file_url):
-        # Pre-load file content if available
         pre_context = ""
         if file_url:
-            ext = file_url.split('.')[-1].lower()
-            if ext in ['mp3', 'wav', 'ogg', 'm4a']:
-                result = self.execute_tool("read_audio", file_url, file_url)
-                pre_context = f"\nFILE TRANSCRIPTION: {result}"
-            elif ext in ['xlsx', 'xls']:
-                result = self.execute_tool("read_excel", file_url, file_url)
-                pre_context = f"\nEXCEL DATA: {result}"
-            elif ext in ['png', 'jpg', 'jpeg']:
-                result = self.execute_tool("read_image", file_url, file_url)
-                pre_context = f"\nIMAGE TEXT: {result}"
             elif ext == 'py':
-                r = requests.get(file_url, timeout=10)
-                pre_context = f"\nPYTHON CODE:\n{r.text[:3000]}"
         memory = pre_context  # seed memory with file content
-        for step in range(10):
-            prompt = f"""You are a precise GAIA solver.
-FILE_URL (use this for file tools if available): {file_url if file_url else 'None'}
-AVAILABLE TOOLS: wiki_search, read_image, read_audio, read_excel, scrape_page
-RULES:
-- If FILE_URL is not None, use it as INPUT when calling read_image, read_audio, or read_excel
-- NEVER use INPUT: none — always use the FILE_URL as INPUT for file tools
-- For web questions, use wiki_search or scrape_page
-- Decode reversed text before answering
-- Botanical vegetables only: exclude all items containing seeds (tomatoes, peppers, zucchini, corn, beans, peas)
-- Botanical herbs like basil are NOT vegetables
-- For YouTube video questions, use scrape_page with the YouTube URL
-- For questions about papers or articles, use scrape_page on the URL
-- For any factual lookup not in a file, use wiki_search first, then scrape_page
-Question: {question}
-History: {memory}
-Respond EXACTLY:
-TOOL: tool_name
-INPUT: tool_input
-OR
-FINAL: your_precise_answer"""
             response = self.client.chat.completions.create(
-                model="gpt-4o",
-                #model ="gpt-5.2-chat-latest",
                 temperature=0,
-                messages=[{"role": "system", "content": "You are a scientific agent. Always use tools to verify facts before answering."},
-                          {"role": "user", "content": prompt}]
             )
             resp = response.choices[0].message.content.strip()
-            print(f"Step {step}: {resp}")
             if "FINAL:" in resp:
                 return resp.split("FINAL:")[-1].strip()
-            # Improved regex to handle tool names regardless of trailing punctuation
             t_match = re.search(r"TOOL:\s*(\w+)", resp, re.I)
-            i_match = re.search(r"INPUT:\s*(.*)", resp, re.I)
             if t_match and i_match:
-                t_name = t_match.group(1).lower().strip()
-                t_input = i_match.group(1).strip()
-                result = self.execute_tool(t_name, t_input, file_url)
-                memory += f"\nStep {step} - {t_name} output: {result[:1200]}"
             else:
-                memory += f"\nStep {step} - Thought: {resp}"
-        return "No answer found."
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 from openai import OpenAI
 class BasicAgent:
     def __init__(self):
         print("🚀 Super GAIA Agent initialized")
         self.client = OpenAI()
         self.audio_model = whisper.load_model("base")
+    def __call__(self, question, file_url=None):
+        return self.agent_loop(question, file_url)
+    # ── TOOL: Wikipedia ──────────────────────────────────────────────
     def wiki_search(self, query):
         try:
             query = query.strip(' ".,')
+            # 1. Find best matching title
+            r = requests.get(
                 "https://en.wikipedia.org/w/api.php",
                 params={"action": "query", "list": "search", "srsearch": query,
+                        "format": "json", "srlimit": 3},
                 timeout=10
             ).json()
+            results = r.get("query", {}).get("search", [])
+            if not results:
+                return f"No Wikipedia results for: {query}"
+            title = results[0]["title"]
+            # 2. Get full extract via REST
             summary = requests.get(
+                f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(title)}",
                 timeout=10
             ).json()
+            extract = summary.get("extract", "")
+            if not extract:
+                return f"No extract for: {title}"
+            return f"WIKI [{title}]: {extract[:3000]}"
         except Exception as e:
+            return f"Wiki error: {e}"
+    # ── TOOL: Scrape web page ─────────────────────────────────────────
+    def scrape_page(self, url):
+        url = url.strip(' "')
+        # Block YouTube — it never returns useful content via scraping
+        if "youtube.com" in url or "youtu.be" in url:
+            return "YouTube pages cannot be scraped. Use yt-dlp captions instead or search for video transcript online."
         try:
+            headers = {"User-Agent": "Mozilla/5.0"}
+            resp = requests.get(url, timeout=15, headers=headers)
+            soup = BeautifulSoup(resp.text, "html.parser")
+            # Remove nav/footer/script noise
+            for tag in soup(["script", "style", "nav", "footer", "header"]):
+                tag.decompose()
+            text = soup.get_text(separator=" ", strip=True)
+            return f"PAGE [{url[:60]}]: {text[:4000]}"
         except Exception as e:
+            return f"Scrape error: {e}"
+    # ── TOOL: Read audio via Whisper ──────────────────────────────────
+    def read_audio(self, url):
+        try:
+            url = url.strip(' "')
+            r = requests.get(url, timeout=30)
+            with open("temp_audio_file.mp3", "wb") as f:
+                f.write(r.content)
+            result = self.audio_model.transcribe("temp_audio_file.mp3")
+            return f"TRANSCRIPT: {result['text']}"
+        except Exception as e:
+            return f"Audio error: {e}"
+    # ── TOOL: Read Excel ──────────────────────────────────────────────
+    def read_excel(self, url):
+        try:
+            url = url.strip(' "')
+            r = requests.get(url, timeout=20)
+            with open("temp_file.xlsx", "wb") as f:
+                f.write(r.content)
+            df = pd.read_excel("temp_file.xlsx")
+            return f"EXCEL_DATA:\n{df.to_string()[:4000]}"
+        except Exception as e:
+            return f"Excel error: {e}"
+    # ── TOOL: Read image via OCR ──────────────────────────────────────
+    def read_image(self, url):
+        try:
+            url = url.strip(' "')
+            r = requests.get(url, timeout=20)
+            ext = url.split('.')[-1].lower() or 'png'
+            fname = f"temp_img.{ext}"
+            with open(fname, "wb") as f:
+                f.write(r.content)
+            img = Image.open(fname)
+            text = pytesseract.image_to_string(img)
+            return f"IMAGE_TEXT: {text[:3000]}" if text.strip() else "IMAGE_TEXT: (no text detected by OCR)"
+        except Exception as e:
+            return f"Image error: {e}"
+    # ── TOOL: Execute Python code ─────────────────────────────────────
+    def run_python(self, url):
+        try:
+            url = url.strip(' "')
+            r = requests.get(url, timeout=15)
+            code = r.text
+            # Safe exec with captured stdout
+            import io, contextlib
+            stdout = io.StringIO()
+            with contextlib.redirect_stdout(stdout):
+                exec(code, {})
+            output = stdout.getvalue()
+            return f"PYTHON_OUTPUT: {output[:2000]}" if output else "PYTHON_OUTPUT: (no print output)"
+        except Exception as e:
+            return f"Python exec error: {e}"
+    # ── Route tool calls ──────────────────────────────────────────────
+    def execute_tool(self, tool, input_data, file_url):
+        # Use file_url as fallback when input_data has no URL
+        target = input_data.strip(' "')
+        if not target.startswith("http") and file_url:
+            target = file_url
+        if tool == "wiki_search":
+            return self.wiki_search(input_data)
+        elif tool == "scrape_page":
+            return self.scrape_page(target)
+        elif tool == "read_audio":
+            return self.read_audio(target)
+        elif tool == "read_excel":
+            return self.read_excel(target)
+        elif tool == "read_image":
+            return self.read_image(target)
+        elif tool == "run_python":
+            return self.run_python(target)
+        else:
+            return f"Unknown tool: {tool}"
+    # ── Main agent loop ───────────────────────────────────────────────
     def agent_loop(self, question, file_url):
+        # ── PRE-LOAD: handle file-based questions before the loop ──
         pre_context = ""
         if file_url:
+            ext = file_url.split('.')[-1].lower().split('?')[0]
+            print(f"  [Pre-load] detected file ext={ext}, url={file_url}")
+            if ext in ['mp3', 'wav', 'ogg', 'm4a', 'flac']:
+                pre_context = self.read_audio(file_url)
+            elif ext in ['xlsx', 'xls', 'csv']:
+                pre_context = self.read_excel(file_url)
+            elif ext in ['png', 'jpg', 'jpeg', 'gif', 'webp']:
+                pre_context = self.read_image(file_url)
             elif ext == 'py':
+                try:
+                    pre_context = "PYTHON_CODE:\n" + requests.get(file_url, timeout=10).text[:3000]
+                except:
+                    pass
         memory = pre_context  # seed memory with file content
+        system_prompt = """You are a precise GAIA benchmark solver.
+Rules:
+- Always output exactly: TOOL: tool_name\\nINPUT: tool_input  OR  FINAL: answer
+- Never repeat a failed tool call with the same input — change the query or try a different tool
+- For math/logic questions: reason step by step then output FINAL
+- Botanical rule: vegetables are plant parts that are NOT fruits. Seeds inside = botanical fruit (tomato, pepper, corn, zucchini, green beans, peas, cucumber). Roots/stems/leaves = vegetable (carrot, celery, lettuce, broccoli, sweet potato). Basil = herb, not vegetable.
+- For reversed text: decode it completely before answering"""
+        for step in range(10):
+            # Build prompt with all context
+            prompt = f"""FILE_URL: {file_url if file_url else 'None'}
+QUESTION: {question}
+ACCUMULATED KNOWLEDGE:
+{memory if memory else '(none yet)'}
+AVAILABLE TOOLS: wiki_search, scrape_page, read_audio, read_excel, read_image, run_python
+What is your next action? Output TOOL+INPUT or FINAL:"""
             response = self.client.chat.completions.create(
+                model="gpt-4o",          # upgraded from gpt-4o-mini
                 temperature=0,
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": prompt}
+                ]
             )
             resp = response.choices[0].message.content.strip()
+            print(f"  Step {step}: {resp[:120]}")
+            # ── Check for final answer ──
             if "FINAL:" in resp:
                 return resp.split("FINAL:")[-1].strip()
+            # ── Parse tool call ──
             t_match = re.search(r"TOOL:\s*(\w+)", resp, re.I)
+            i_match = re.search(r"INPUT:\s*(.+)", resp, re.I | re.DOTALL)
             if t_match and i_match:
+                tool_name = t_match.group(1).lower().strip()
+                tool_input = i_match.group(1).strip().split('\n')[0]  # first line only
+                result = self.execute_tool(tool_name, tool_input, file_url)
+                print(f"  [{tool_name}] → {result[:100]}")
+                # Only add useful results to memory (skip empty/error loops)
+                if len(result) > 30 and "error" not in result.lower()[:20]:
+                    memory += f"\n\n[Step {step} - {tool_name}({tool_input[:50]})]\n{result[:1500]}"
+                else:
+                    # Tool failed — tell the model so it tries something different
+                    memory += f"\n\n[Step {step} - {tool_name} FAILED: {result[:200]}. Try a different approach.]"
             else:
+                # Model gave a thought without a tool call — add to memory as reasoning
+                memory += f"\n\n[Step {step} - Reasoning]: {resp[:300]}"
+        # Fallback: ask the model to give best answer from what it has
+        fallback = self.client.chat.completions.create(
+            model="gpt-4o",
+            temperature=0,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": f"Based on everything gathered, give your best FINAL answer.\nQUESTION: {question}\nKNOWLEDGE:\n{memory}"}
+            ]
+        )
+        resp = fallback.choices[0].message.content.strip()
+        if "FINAL:" in resp:
+            return resp.split("FINAL:")[-1].strip()
+        return resp
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """