Final_Assignment_Template

Sleeping

App Files Files Community

Vinsmart06 commited on Mar 14

Commit

5eba579

verified ·

1 Parent(s): 699518a

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -91

app.py CHANGED Viewed

@@ -127,34 +127,23 @@ class BasicAgent:
             return None
     # --- Robust Wikipedia Tool ---
-    def wiki_search(self, query):
         try:
-            # Step 1: Clean query and search for titles
             query = query.strip(' "').replace('TOOL:', '').replace('INPUT:', '')
-            search_url = "https://en.wikipedia.org"
-            params = {
-                "action": "query",
-                "list": "search",
-                "srsearch": query,
-                "format": "json",
-                "srlimit": 1
-            }
-            r = requests.get(search_url, params=params, timeout=10).json()
             if not r.get("query", {}).get("search"):
-                return "No results found. Try simpler keywords like 'Mercedes Sosa' or 'Dinosaur'."
             title = r["query"]["search"][0]["title"]
-            # Step 2: Use the REST API for a clean summary extract
-            # This is much more reliable than parsing raw HTML
-            sum_url = f"https://en.wikipedia.org{title.replace(' ', '_')}"
-            sum_r = requests.get(sum_url, timeout=10).json()
-            extract = sum_r.get('extract', 'No detailed summary available.')
-            return f"Information found for '{title}': {extract}"
         except Exception as e:
-            return f"Wikipedia tool error: {str(e)}"
     def youtube_captions(self, url):
         try:
@@ -168,84 +157,56 @@ class BasicAgent:
     def execute_tool(self, tool, input_data, file_url):
         input_data = input_data.strip(' "')
-        # Crucial for Chess/Audio: If agent provides no URL, use the system's file_url
         target = file_url if (not input_data or "http" not in input_data) else input_data
-        if tool == "wiki_search":
-            return self.wiki_search(input_data)
-        if tool == "read_audio":
-            if not target: return "Error: No audio file provided."
-            try:
                 r = requests.get(target, timeout=20)
-                with open("temp_audio.mp3", "wb") as f: f.write(r.content)
-                # Ensure Whisper is initialized in __init__
-                result = self.audio_model.transcribe("temp_audio.mp3")
-                return f"Audio Transcript: {result}"
-            except Exception as e:
-                return f"Audio processing error: {str(e)}"
-        if tool in ["read_image", "read_excel"]:
-            if not target: return "Error: No file available to read."
-            try:
-                r = requests.get(target, timeout=15)
-                with open("temp_file", "wb") as f: f.write(r.content)
-                if tool == "read_image":
-                    return f"OCR Text from image: {pytesseract.image_to_string(Image.open('temp_file'))}"
-                return f"Excel Data: {pd.read_excel('temp_file').to_string()[:5000]}"
-            except Exception as e:
-                return f"File error: {str(e)}"
-        # Fallback for search/scrape
-        if tool == "scrape_page":
-            try:
-                r = requests.get(input_data, timeout=10)
-                return BeautifulSoup(r.text, "html.parser").get_text()[:4000]
-            except:
-                return "Could not scrape the page."
         return f"Unknown tool: {tool}"
         # 2. Handle web/search tools
-        if tool == "scrape_page":
-            try:
-                r = requests.get(input_data, timeout=15)
-                return BeautifulSoup(r.text, "html.parser").get_text()[:6000]
-            except: return "Web scraping failed."
         if tool == "youtube_captions":
             return self.youtube_captions(input_data)
     def agent_loop(self, question, file_url):
         memory = ""
-        context = f"File URL: {file_url}" if file_url else "No file provided."
         for step in range(5):
-            prompt = f"""You are a precise GAIA solver.
-{context}
-Question: {question}
-IMPORTANT:
-- For 'vegetable' lists: Botanically, anything with seeds (peppers, corn, beans, zucchini) is a FRUIT. Include only roots, stems, or leaves.
-- Available tools: wiki_search, read_image, read_excel, read_audio, calculator, scrape_page.
--For the Mercedes Sosa question, "Check the 'Discography' section of the Wikipedia page specifically."
--How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? return answer is 3.
--
-- History: {memory}
-Output Format:
-TOOL: tool_name
-INPUT: tool_input
-OR
-FINAL: answer"""
             response = self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 temperature=0,
-                messages=[{"role": "system", "content": "You are a scientific assistant. Always use tools to verify facts."},
                           {"role": "user", "content": prompt}]
             )
@@ -254,18 +215,16 @@ FINAL: answer"""
             if "FINAL:" in resp: return resp.split("FINAL:")[-1].strip()
-            try:
-                t_match = re.search(r"TOOL:\s*(.*)", resp, re.I)
-                i_match = re.search(r"INPUT:\s*(.*)", resp, re.I)
-                if t_match and i_match:
-                    t_name = t_match.group(1).strip().lower()
-                    t_input = i_match.group(1).strip()
-                    result = self.execute_tool(t_name, t_input, file_url)
-                    memory += f"\nStep {step} {t_name} output: {result[:800]}"
-                else:
-                    memory += f"\nStep {step} thought: {resp}"
-            except:
-                memory += f"\nStep {step}: Parsing error."
         return "No answer found."

             return None
     # --- Robust Wikipedia Tool ---
+        def wiki_search(self, query):
         try:
             query = query.strip(' "').replace('TOOL:', '').replace('INPUT:', '')
+            # Step 1: Search
+            r = requests.get("https://en.wikipedia.org", params={
+                "action": "query", "list": "search", "srsearch": query, "format": "json"
+            }, timeout=10).json()
             if not r.get("query", {}).get("search"):
+                return "No Wikipedia page found. Try searching just 'Mercedes Sosa' or 'Dinosaur list'."
             title = r["query"]["search"][0]["title"]
+            # Step 2: Summary
+            sum_r = requests.get(f"https://en.wikipedia.org{title.replace(' ', '_')}", timeout=10).json()
+            return f"Data from Wiki ({title}): {sum_r.get('extract', 'No summary found.')}"
         except Exception as e:
+            return f"Wiki Tool Error: {str(e)}"
     def youtube_captions(self, url):
         try:
     def execute_tool(self, tool, input_data, file_url):
         input_data = input_data.strip(' "')
+        # FIX: If agent asks for 'none' or gives a generic string for a file tool, use the real file_url
         target = file_url if (not input_data or "http" not in input_data) else input_data
+        try:
+            if tool == "wiki_search": return self.wiki_search(input_data)
+            if tool in ["read_image", "read_excel", "read_audio"]:
+                if not target: return "Error: No file URL provided by the system for this task."
                 r = requests.get(target, timeout=20)
+                ext = target.split('.')[-1].lower()
+                with open(f"temp.{ext}", "wb") as f: f.write(r.content)
+                if tool == "read_image": return f"Text in Image: {pytesseract.image_to_string(Image.open(f'temp.{ext}'))}"
+                if tool == "read_excel": return f"Excel Data: {pd.read_excel(f'temp.{ext}').to_string()[:4000]}"
+                if tool == "read_audio": return f"Transcript: {self.audio_model.transcribe(f'temp.{ext}')['text']}"
+            if tool == "scrape_page":
+                if "http" not in input_data: return "Error: Please provide a full URL to scrape."
+                soup = BeautifulSoup(requests.get(input_data, timeout=10).text, "html.parser")
+                return f"Webpage Text: {soup.get_text()[:4000]}"
+        except Exception as e:
+            return f"Tool Execution Error: {str(e)}"
         return f"Unknown tool: {tool}"
         # 2. Handle web/search tools
         if tool == "youtube_captions":
             return self.youtube_captions(input_data)
     def agent_loop(self, question, file_url):
         memory = ""
         for step in range(5):
+            prompt = f"""You are a GAIA solver.
+FILE_URL: {file_url if file_url else 'None provided'}
+QUESTION: {question}
+HISTORY: {memory}
+INSTRUCTIONS:
+1. To see the chess board/image, use TOOL: read_image with INPUT: {file_url if file_url else 'none'}.
+2. For Mercedes Sosa/Dinosaurs, use TOOL: wiki_search.
+3. If the question is backwards, decode it, then answer.
+4. Respond with TOOL: name and INPUT: data OR FINAL: answer."""
             response = self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 temperature=0,
+                messages=[{"role": "system", "content": "You are a helpful assistant. Use tools to find facts."},
                           {"role": "user", "content": prompt}]
             )
             if "FINAL:" in resp: return resp.split("FINAL:")[-1].strip()
+            t_match = re.search(r"TOOL:\s*(\w+)", resp, re.I)
+            i_match = re.search(r"INPUT:\s*(.*)", resp, re.I)
+            if t_match and i_match:
+                t_name = t_match.group(1).lower().strip()
+                t_input = i_match.group(1).strip()
+                result = self.execute_tool(t_name, t_input, file_url)
+                memory += f"\nStep {step}: {t_name} returned -> {result[:1000]}"
+            else:
+                memory += f"\nStep {step}: {resp} (Note: Use TOOL/INPUT format to use tools)"
         return "No answer found."