Final_Assignment_3

Sleeping

App Files Files Community

MohamedAliAmiraa commited on Aug 4, 2025

Commit

2a3a0a8

verified ·

1 Parent(s): d10e815

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -98

app.py CHANGED Viewed

@@ -22,10 +22,10 @@ except ImportError:
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Agent Definition: The 'Orchestrator' Strategy ---
 class BasicAgent:
     def __init__(self):
-        print("Initializing Orchestrator Agent...")
         try:
             self.llm = AzureChatOpenAI(
                 azure_endpoint="https://dsap.openai.azure.com/",
@@ -36,147 +36,168 @@ class BasicAgent:
             )
         except KeyError:
             raise KeyError("CRITICAL: 'AZURE_API_KEY' secret is missing.")
         print("Agent initialized.")
     # --- Tool Definitions ---
-    def search_and_browse(self, query: str) -> str:
-        """Searches the web with DuckDuckGo and browses the top results."""
-        print(f"Tool: search_and_browse, Query: {query}")
-        context = ""
         try:
             with DDGS() as ddgs:
-                results = [r for r in ddgs.text(query, max_results=3)]
-                if not results: return f"No results found for '{query}'."
-                for result in results:
-                    try:
-                        url = result['href']
-                        response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
-                        soup = BeautifulSoup(response.content, 'html.parser')
-                        text = ' '.join(soup.get_text().split())
-                        context += f"Source URL: {url}\nContent: {text[:1500]}\n\n"
-                    except Exception as e:
-                        context += f"Could not browse {url}: {e}\n\n"
-            return context
-        except Exception as e:
-            return f"Error during search: {e}"
-    def analyze_file(self, file_url: str) -> str:
-        """Downloads a file from a URL and extracts its content as text."""
-        print(f"Tool: analyze_file, URL: {file_url}")
         try:
-            response = requests.get(file_url)
-            response.raise_for_status()
-            if file_url.endswith('.xlsx'):
-                df = pd.read_excel(io.BytesIO(response.content))
-                return f"Excel file content:\n{df.to_string()}"
-            elif file_url.endswith('.py'):
-                return f"Python file content:\n{response.text}"
-            elif file_url.endswith(('.mp3', '.wav')):
-                # Audio processing is complex. For this final version, we will state the limitation clearly.
-                return "Limitation: Audio file detected. I cannot transcribe audio to determine its content. Please describe the audio if possible."
-            else: # Images, etc.
-                return "Limitation: This file type (e.g., image) cannot be analyzed. Please describe the content of the file."
-        except Exception as e:
-            return f"Error analyzing file: {e}"
-    def process_youtube(self, question: str) -> str:
-        """Extracts transcript from a YouTube URL in the question and returns it."""
-        print(f"Tool: process_youtube")
-        url_match = re.search(r'https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([\w-]+)', question)
-        if not url_match: return "No YouTube URL found."
         try:
-            video_id = url_match.group(1)
-            # This is the correct, static method call for the library
-            transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-            return "YouTube Transcript: " + " ".join([item['text'] for item in transcript_list])
-        except Exception as e:
-            return f"Error processing YouTube transcript: {e}"
-    # --- Main Orchestrator Logic ---
-    def __call__(self, task: Dict[str, Any]) -> str:
-        question = task.get("question")
-        print(f"\n--- New Task ---\nQuestion: {question[:150]}...")
-        context = ""
-        # 1. Check for a file URL first
-        file_url = task.get("files", [None])[0]
-        if file_url:
-            context = self.analyze_file(file_url)
-        # 2. Check for a YouTube URL in the question text
-        elif "youtube.com" in question or "youtu.be" in question:
-            context = self.process_youtube(question)
-        # 3. Default to web search for everything else
-        else:
-            context = self.search_and_browse(query=question)
-        # 4. Final step: Use the gathered context to generate an answer
-        final_prompt = f"Based ONLY on the following context, provide a direct and concise answer to the user's question. Do not use any other information.\n\nContext:\n{context}\n\nQuestion:\n{question}"
         try:
-            final_answer = self.llm.invoke(final_prompt).content
-            print(f"Final Answer: {final_answer}")
-            return final_answer
-        except Exception as e:
-            return f"Error during final answer generation: {e}"
 # --- Your Original, Correct Submission and Gradio Code ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
-    if profile and profile.username:
-        username = f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
-    api_url, questions_url, submit_url = DEFAULT_API_URL, f"{DEFAULT_API_URL}/questions", f"{DEFAULT_API_URL}/submit"
     try: agent = BasicAgent()
-    except Exception as e: return f"Error initializing agent: {e}\n\n{traceback.format_exc()}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
     try:
-        response = requests.get(questions_url, timeout=20)
         response.raise_for_status()
         questions_data = response.json()
     except Exception as e: return f"Error fetching questions: {e}", None
     results_log, answers_payload = [], []
     for item in questions_data:
-        task_id, question_text = item.get("task_id"), item.get("question")
-        if not task_id or question_text is None: continue
         try:
             submitted_answer = agent(item)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {traceback.format_exc()}"})
-    if not answers_payload: return "Agent did not produce answers.", pd.DataFrame(results_log)
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
-        final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\n"
-                        f"Overall Score: {result_data.get('score', 'N/A')}% "
-                        f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)")
         return final_status, pd.DataFrame(results_log)
-    except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log)
-# This is your original, correct interface structure that works.
 with gr.Blocks() as demo:
     gr.Markdown("# Agent Evaluation Runner")
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    # The click event with NO 'inputs' argument.
     run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":

 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Agent Definition: A True ReAct Agent ---
 class BasicAgent:
     def __init__(self):
+        print("Initializing ReAct Agent...")
         try:
             self.llm = AzureChatOpenAI(
                 azure_endpoint="https://dsap.openai.azure.com/",
             )
         except KeyError:
             raise KeyError("CRITICAL: 'AZURE_API_KEY' secret is missing.")
+        self.tools = {
+            "search": self.search,
+            "browse": self.browse,
+            "python": self.python,
+            "youtube_transcript": self.youtube_transcript,
+        }
+        self.system_prompt = self._create_system_prompt()
         print("Agent initialized.")
+    def _create_system_prompt(self) -> str:
+        """Creates the master prompt that guides the ReAct agent."""
+        tool_docs = "\n".join([f"- {name}: {inspect.getdoc(func)}" for name, func in self.tools.items()])
+        return f"""
+You are a helpful assistant that answers questions by thinking step-by-step and using the tools provided.
+You have access to the following tools:
+{tool_docs}
+Follow this process:
+1.  **Thought:** Analyze the user's question and decide what to do.
+2.  **Action:** Choose ONE tool from the list: {", ".join(self.tools.keys())}.
+3.  **Action Input:** Provide the input for the chosen tool.
+4.  **Observation:** After you use a tool, you will see its output.
+5.  Repeat this Thought/Action/Action Input/Observation cycle until you are certain you have the final answer.
+6.  **Thought:** Conclude that you have the final answer.
+7.  **Final Answer:** Provide the final answer to the user.
+**Important Guidelines:**
+- For web-based questions, `search` first to get URLs, then `browse` the most relevant URL.
+- If a question provides a file URL, you MUST use the `python` tool to download and analyze it. Example: `requests.get(url).content`.
+- If you can answer the question directly without tools, do so immediately with a "Final Answer".
+Begin!
+"""
     # --- Tool Definitions ---
+    def search(self, query: str) -> str:
+        """Searches the web with DuckDuckGo to find relevant URLs."""
+        print(f"Tool: search, Query: {query}")
         try:
             with DDGS() as ddgs:
+                results = [r for r in ddgs.text(query, max_results=5)]
+            return str(results) if results else "No results found."
+        except Exception as e: return f"Error during search: {e}"
+    def browse(self, url: str) -> str:
+        """Gets the full, clean text content of a single webpage."""
+        print(f"Tool: browse, URL: {url}")
         try:
+            response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
+            soup = BeautifulSoup(response.content, 'html.parser')
+            return ' '.join(soup.get_text().split())[:4000] # Limit context size
+        except Exception as e: return f"Error browsing {url}: {e}"
+    def python(self, code: str) -> str:
+        """Executes Python code to analyze data or files. ALWAYS use this for file URLs."""
+        print(f"Tool: python, Code: {code}")
+        code = code.strip().strip("`").replace("python\n", "").strip()
+        buffer = io.StringIO()
         try:
+            safe_globals = {'pd': pd, 'np': np, 'requests': requests, 'io': io, 'librosa': librosa, 'sf': sf, 'openpyxl': openpyxl}
+            with redirect_stdout(buffer):
+                exec(code, safe_globals)
+            return f"Execution successful. Output:\n{buffer.getvalue()}"
+        except Exception as e: return f"Execution failed. Error:\n{traceback.format_exc()}"
+    def youtube_transcript(self, url: str) -> str:
+        """Fetches the transcript of a YouTube video."""
+        print(f"Tool: youtube_transcript, URL: {url}")
         try:
+            video_id = re.search(r"(?<=v=)[\w-]+", url).group(0)
+            return " ".join([item['text'] for item in YouTubeTranscriptApi.get_transcript(video_id)])
+        except Exception as e: return f"Error processing YouTube transcript: {e}"
+    # --- Main ReAct Loop ---
+    def __call__(self, task: Dict[str, Any]) -> str:
+        question = task.get("question", "")
+        if task.get("files"):
+            question += f"\nFile available at: {task['files'][0]}"
+        prompt = f"{self.system_prompt}\nQuestion: {question}\nThought:"
+        history = ""
+        for i in range(8): # Max 8 steps
+            print(f"--- Step {i+1} ---")
+            full_prompt = prompt + history
+            llm_response = self.llm.invoke(full_prompt).content.strip()
+            history += f"{llm_response}"
+            final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_response, re.DOTALL)
+            if final_answer_match:
+                print(f"Final Answer Found: {final_answer_match.group(1)}")
+                return final_answer_match.group(1).strip()
+            action_match = re.search(r"Action:\s*(\w+)\s*Action Input:\s*(.*)", llm_response, re.DOTALL)
+            if action_match:
+                tool_name = action_match.group(1).strip()
+                tool_input = action_match.group(2).strip(' \n"`')
+                if tool_name in self.tools:
+                    try:
+                        tool_result = self.tools[tool_name](tool_input)
+                    except Exception as e:
+                        tool_result = f"Error calling tool {tool_name}: {e}"
+                else:
+                    tool_result = f"Error: Unknown tool '{tool_name}'."
+                history += f"\nObservation: {tool_result}\nThought:"
+            else:
+                # If the agent doesn't format an action, just return its last thought.
+                return llm_response
+        return "Agent could not reach a final answer after multiple steps."
 # --- Your Original, Correct Submission and Gradio Code ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
+    if not (profile and profile.username):
         return "Please Login to Hugging Face with the button.", None
+    username = profile.username
+    print(f"User logged in: {username}")
     try: agent = BasicAgent()
+    except Exception as e: return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
     try:
+        response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
         response.raise_for_status()
         questions_data = response.json()
     except Exception as e: return f"Error fetching questions: {e}", None
     results_log, answers_payload = [], []
     for item in questions_data:
+        if not (task_id := item.get("task_id")): continue
         try:
             submitted_answer = agent(item)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
         except Exception as e:
+            submitted_answer = f"AGENT ERROR: {traceback.format_exc()}"
+        results_log.append({"Task ID": task_id, "Question": item.get("question"), "Submitted Answer": submitted_answer})
+    submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
     try:
+        response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
+        final_status = (f"Submission Successful! Score: {result_data.get('score', 'N/A')}%")
         return final_status, pd.DataFrame(results_log)
+    except Exception as e:
+        return f"Submission Failed: {e}", pd.DataFrame(results_log)
 with gr.Blocks() as demo:
     gr.Markdown("# Agent Evaluation Runner")
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    # The correct call with NO 'inputs' argument.
     run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":