Final_Assignment_3

Sleeping

App Files Files Community

MohamedAliAmiraa commited on Aug 4, 2025

Commit

d10e815

verified ·

1 Parent(s): 20ff901

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -76

app.py CHANGED Viewed

@@ -22,87 +22,109 @@ except ImportError:
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Agent Definition: The 'Router' Strategy ---
 class BasicAgent:
     def __init__(self):
-        print("Initializing Agent...")
         try:
             self.llm = AzureChatOpenAI(
                 azure_endpoint="https://dsap.openai.azure.com/",
                 api_key=os.environ["AZURE_API_KEY"],
                 azure_deployment="GPT4o-INTERNSHIP",
                 api_version="2024-08-01-preview",
-                temperature=0.0, max_retries=3,
             )
         except KeyError:
             raise KeyError("CRITICAL: 'AZURE_API_KEY' secret is missing.")
         print("Agent initialized.")
-    def __call__(self, task: Dict[str, Any]) -> str:
-        question = task.get("question")
-        print(f"\n--- New Task ---\nQuestion: {question[:100]}...")
-        # STRATEGY 1: Handle YouTube URLs directly
-        if "youtube.com" in question or "youtu.be" in question:
-            url_match = re.search(r'https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([\w-]+)', question)
-            if url_match:
-                try:
-                    video_id = url_match.group(1)
-                    transcript = " ".join([item['text'] for item in YouTubeTranscriptApi.get_transcript(video_id)])
-                    prompt = f"Based on the following transcript, please answer the question.\n\nTranscript:\n{transcript[:4000]}\n\nQuestion:\n{question}"
-                    return self.llm.invoke(prompt).content
-                except Exception as e: return f"Error processing YouTube video: {e}"
-        # STRATEGY 2: Handle File Attachments directly
-        file_url = task.get("files", [None])[0]
-        if file_url:
-            print(f"File detected: {file_url}")
-            code_to_run = ""
             if file_url.endswith('.xlsx'):
-                code_to_run = f"import pandas as pd; df = pd.read_excel('{file_url}'); print(df.to_string())"
-            elif file_url.endswith(('.mp3', '.wav')):
-                 # Inform the LLM that audio processing is complex and ask for confirmation
-                 return "This question requires analyzing an audio file. This can be time-consuming and complex. Please confirm if I should proceed with downloading and analyzing the audio."
             elif file_url.endswith('.py'):
-                code_to_run = f"import requests; r = requests.get('{file_url}'); print(r.text)"
-            else: # For images or other file types
-                return "I cannot directly analyze images or this file type. Please describe the content of the file if possible."
-            # Execute the generated code for Excel or Python files
-            buffer = io.StringIO()
-            try:
-                with redirect_stdout(buffer):
-                    exec(code_to_run, {'pd': pd, 'requests': requests, 'io': io})
-                file_content = buffer.getvalue()
-                prompt = f"The content of the file has been extracted as follows:\n\n{file_content}\n\nPlease use this content to answer the original question.\n\nQuestion:\n{question}"
-                return self.llm.invoke(prompt).content
-            except Exception as e:
-                return f"Failed to execute Python code for file analysis. Error: {e}"
-        # STRATEGY 3: Default to Web Search for all other questions
         try:
-            with DDGS() as ddgs:
-                results = [r for r in ddgs.text(f"{question}", max_results=3)]
-                if not results: return "Could not find information on the web."
-                context = ""
-                for result in results:
-                    context += f"Title: {result['title']}\nURL: {result['href']}\nSnippet: {result['body']}\n\n"
-                prompt = f"Based on the following search results, please provide a direct and concise answer to the question.\n\nSearch Results:\n{context}\n\nQuestion:\n{question}"
-                return self.llm.invoke(prompt).content
         except Exception as e:
-            return f"An error occurred during web search: {e}"
-# --- Your Original Submission and Gradio Code ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
-    and displays the results.
-    """
     space_id = os.getenv("SPACE_ID")
-    if profile:
         username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
@@ -112,7 +134,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     api_url, questions_url, submit_url = DEFAULT_API_URL, f"{DEFAULT_API_URL}/questions", f"{DEFAULT_API_URL}/submit"
     try: agent = BasicAgent()
-    except Exception as e: return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
@@ -131,7 +153,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload: return "Agent did not produce answers.", pd.DataFrame(results_log)
@@ -147,28 +169,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         return final_status, pd.DataFrame(results_log)
     except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log)
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
-    gr.Markdown(
-        """
-        **Instructions:**
-        1. Please clone this space, then modify the code...
-        2. Log in to your Hugging Face account using the button below...
-        3. Click 'Run Evaluation & Submit All Answers'...
-        """
-    )
-    # This is your original, correct interface structure
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    # The click event with NO 'inputs' argument. This is the correct way.
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
     demo.launch(debug=True, share=False)

 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Agent Definition: The 'Orchestrator' Strategy ---
 class BasicAgent:
     def __init__(self):
+        print("Initializing Orchestrator Agent...")
         try:
             self.llm = AzureChatOpenAI(
                 azure_endpoint="https://dsap.openai.azure.com/",
                 api_key=os.environ["AZURE_API_KEY"],
                 azure_deployment="GPT4o-INTERNSHIP",
                 api_version="2024-08-01-preview",
+                temperature=0.0, max_retries=2,
             )
         except KeyError:
             raise KeyError("CRITICAL: 'AZURE_API_KEY' secret is missing.")
         print("Agent initialized.")
+    # --- Tool Definitions ---
+    def search_and_browse(self, query: str) -> str:
+        """Searches the web with DuckDuckGo and browses the top results."""
+        print(f"Tool: search_and_browse, Query: {query}")
+        context = ""
+        try:
+            with DDGS() as ddgs:
+                results = [r for r in ddgs.text(query, max_results=3)]
+                if not results: return f"No results found for '{query}'."
+                for result in results:
+                    try:
+                        url = result['href']
+                        response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
+                        soup = BeautifulSoup(response.content, 'html.parser')
+                        text = ' '.join(soup.get_text().split())
+                        context += f"Source URL: {url}\nContent: {text[:1500]}\n\n"
+                    except Exception as e:
+                        context += f"Could not browse {url}: {e}\n\n"
+            return context
+        except Exception as e:
+            return f"Error during search: {e}"
+    def analyze_file(self, file_url: str) -> str:
+        """Downloads a file from a URL and extracts its content as text."""
+        print(f"Tool: analyze_file, URL: {file_url}")
+        try:
+            response = requests.get(file_url)
+            response.raise_for_status()
             if file_url.endswith('.xlsx'):
+                df = pd.read_excel(io.BytesIO(response.content))
+                return f"Excel file content:\n{df.to_string()}"
             elif file_url.endswith('.py'):
+                return f"Python file content:\n{response.text}"
+            elif file_url.endswith(('.mp3', '.wav')):
+                # Audio processing is complex. For this final version, we will state the limitation clearly.
+                return "Limitation: Audio file detected. I cannot transcribe audio to determine its content. Please describe the audio if possible."
+            else: # Images, etc.
+                return "Limitation: This file type (e.g., image) cannot be analyzed. Please describe the content of the file."
+        except Exception as e:
+            return f"Error analyzing file: {e}"
+    def process_youtube(self, question: str) -> str:
+        """Extracts transcript from a YouTube URL in the question and returns it."""
+        print(f"Tool: process_youtube")
+        url_match = re.search(r'https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([\w-]+)', question)
+        if not url_match: return "No YouTube URL found."
         try:
+            video_id = url_match.group(1)
+            # This is the correct, static method call for the library
+            transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+            return "YouTube Transcript: " + " ".join([item['text'] for item in transcript_list])
         except Exception as e:
+            return f"Error processing YouTube transcript: {e}"
+    # --- Main Orchestrator Logic ---
+    def __call__(self, task: Dict[str, Any]) -> str:
+        question = task.get("question")
+        print(f"\n--- New Task ---\nQuestion: {question[:150]}...")
+        context = ""
+        # 1. Check for a file URL first
+        file_url = task.get("files", [None])[0]
+        if file_url:
+            context = self.analyze_file(file_url)
+        # 2. Check for a YouTube URL in the question text
+        elif "youtube.com" in question or "youtu.be" in question:
+            context = self.process_youtube(question)
+        # 3. Default to web search for everything else
+        else:
+            context = self.search_and_browse(query=question)
+        # 4. Final step: Use the gathered context to generate an answer
+        final_prompt = f"Based ONLY on the following context, provide a direct and concise answer to the user's question. Do not use any other information.\n\nContext:\n{context}\n\nQuestion:\n{question}"
+        try:
+            final_answer = self.llm.invoke(final_prompt).content
+            print(f"Final Answer: {final_answer}")
+            return final_answer
+        except Exception as e:
+            return f"Error during final answer generation: {e}"
+# --- Your Original, Correct Submission and Gradio Code ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
+    if profile and profile.username:
         username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
     api_url, questions_url, submit_url = DEFAULT_API_URL, f"{DEFAULT_API_URL}/questions", f"{DEFAULT_API_URL}/submit"
     try: agent = BasicAgent()
+    except Exception as e: return f"Error initializing agent: {e}\n\n{traceback.format_exc()}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {traceback.format_exc()}"})
     if not answers_payload: return "Agent did not produce answers.", pd.DataFrame(results_log)
         return final_status, pd.DataFrame(results_log)
     except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log)
+# This is your original, correct interface structure that works.
 with gr.Blocks() as demo:
+    gr.Markdown("# Agent Evaluation Runner")
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    # The click event with NO 'inputs' argument.
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
     demo.launch(debug=True, share=False)