Final_Assignment_Template

Sleeping

App Files Files Community

sergiosampayob commited on Apr 26, 2025

Commit

8a5e4d4

1 Parent(s): 4e38b79

agent update: new custom tools and imports

Browse files

Files changed (2) hide show

app.py +197 -5
requirements.txt +8 -1

app.py CHANGED Viewed

@@ -1,9 +1,18 @@
 import os
 import gradio as gr
-import requests
-import inspect
 import pandas as pd
-from smolagents import CodeAgent, DuckDuckGoSearchTool, VisitWebpageTool, OpenAIServerModel, LiteLLMModel
 # (Keep Constants as is)
 # --- Constants ---
@@ -11,13 +20,196 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         self.agent = CodeAgent(
-            tools=[DuckDuckGoSearchTool(), VisitWebpageTool()],
             model=OpenAIServerModel(model_id="gpt-4o"),
             add_base_tools=True,
         )
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
@@ -98,7 +290,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)

+import requests
 import os
 import gradio as gr
 import pandas as pd
+from smolagents import CodeAgent, DuckDuckGoSearchTool, VisitWebpageTool, OpenAIServerModel, Tool
+from youtube_transcript_api import YouTubeTranscriptApi
+import whisper
+from pytubefix import YouTube
+from pytubefix.cli import on_progress
+from bs4 import BeautifulSoup
+import wikipediaapi
+import cv2
+import numpy as np
 # (Keep Constants as is)
 # --- Constants ---
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+class ImageLoaderTool(Tool):
+    name = "image_loader"
+    description = (
+        "Loads an image from a given URL using cv2 and returns it as a numpy array. "
+        "Input: URL of the image."
+        "Output: Image as a numpy array."
+        "Note: This tool requires the 'cv2' library to be installed."
+    )
+    inputs = {
+        "image_url": {"type": "string", "description": "URL of the image."},
+    }
+    output_type = "numpy.ndarray"
+    def forward(self, image_url: str) -> str:
+        if not image_url.startswith("http"):
+            raise ValueError(f"Invalid URL: {image_url}")
+        try:
+            response = requests.get(image_url)
+            image = cv2.imdecode(np.frombuffer(response.content, np.uint8), cv2.IMREAD_COLOR)
+            return image
+        except Exception as e:
+            raise ValueError(f"Error loading image: {e}")
+class SpeechToTextTool(Tool):
+    name = "speech_to_text"
+    description = (
+        "Converts an audio file to text. "
+    )
+    inputs = {
+        "audio_file_path": {"type": "string", "description": "Path to the audio file."},
+    }
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+        self.model = whisper.load_model("base")
+    def forward(self, audio_file_path: str) -> str:
+        if not os.path.exists(audio_file_path):
+            raise ValueError(f"Audio file not found: {audio_file_path}")
+        result = self.model.transcribe(audio_file_path)
+        return result.get("text", "")
+class YoutubeSubtitlesTranscriptTool(Tool):
+    name = "youtube_subtitles_transcript"
+    description = (
+        "Fetches the transcript of a YouTube video. "
+        "Input: YouTube video URL."
+        "Output: Transcript text."
+    )
+    inputs = {
+        "video_url": {"type": "string", "description": "YouTube video URL."},
+    }
+    output_type = "string"
+    def forward(self, video_url: str) -> str:
+        if not video_url.startswith("https://www.youtube.com/watch?v="):
+            raise ValueError(f"Invalid YouTube URL: {video_url}")
+        video_id = video_url.split("v=")[-1]
+        try:
+            transcript = YouTubeTranscriptApi.get_transcript(video_id)
+            transcript_text = " ".join([entry["text"] for entry in transcript])
+            return transcript_text
+        except Exception as transcript_error:
+            print(f"Transcript not available: {transcript_error}")
+            try:
+                # Fallback: Download audio for processing
+                youtube_audio_transcript_tool = YoutubeAudioTranscriptTool()
+                transcript_text = youtube_audio_transcript_tool.forward(video_url)
+                print("Audio downloaded successfully.")
+                return transcript_text  # Assuming the tool returns some text representation
+            except Exception as e:
+                raise ValueError(f"Error downloading audio or converting to text: {e}")
+class YoutubeAudioTranscriptTool(Tool):
+    name = "youtube_audio_transcript"
+    description = (
+        "Downloads the audio from a YouTube video and converts it to text. "
+        "Input: YouTube video URL."
+    )
+    inputs = {
+        "video_url": {"type": "string", "description": "YouTube video URL."},
+    }
+    output_type = "string"
+    def forward(self, video_url: str) -> str:
+        if not video_url.startswith("https://www.youtube.com/watch?v="):
+            raise ValueError(f"Invalid YouTube URL: {video_url}")
+        try:
+            yt = YouTube(video_url, on_progress_callback=on_progress)
+            audio_stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
+            audio_file_path = audio_stream.download(filename_prefix="audio_")
+            speech_to_text_tool = SpeechToTextTool()
+            transcript = speech_to_text_tool.forward(audio_file_path)
+            os.remove(audio_file_path)  # Clean up the downloaded file
+            return transcript
+        except Exception as e:
+            raise ValueError(f"Error downloading audio or converting to text: {e}")
+class WikipediaSearchTool(Tool):
+    name = "wikipedia_search"
+    description = (
+        "Searches Wikipedia for a given query and returns the summary of the first result."
+        "Input: Search query."
+        "Output: Wikipedia article."
+    )
+    inputs = {
+        "query": {"type": "string", "description": "Search query."},
+    }
+    output_type = "string"
+    def forward(self, query: str) -> str:
+        wiki_wiki = wikipediaapi.Wikipedia(
+            user_agent='wikipedia_agent',
+            language='en',
+            extract_format=wikipediaapi.ExtractFormat.WIKI
+        )
+        p_wiki = wiki_wiki.page(query)
+        if not p_wiki.exists():
+            raise ValueError(f"No Wikipedia page found for query: {query}")
+        print(p_wiki.text)
+        return p_wiki.text
+class ParseURLTool(Tool):
+    name = "parse_url"
+    description = (
+        "Parses a URL and returns the text content of the webpage."
+        "Input: URL."
+        "Output: Text content of the webpage."
+    )
+    inputs = {
+        "url": {"type": "string", "description": "URL to parse."},
+    }
+    output_type = "string"
+    def forward(self, url: str) -> str:
+        if not url:
+            raise ValueError("URL cannot be empty.")
+        # Fetch the HTML content
+        response = requests.get(url)
+        # Retrieve the HTML content
+        html = response.text
+        # Create a BesutifulSoup Object
+        soup = BeautifulSoup(html, 'html.parser')
+        # Select all <p> tags
+        paragraphs = soup.select("p")
+        webpage_text_list = []
+        for para in paragraphs:
+            # Get the text content of each <p> tag
+            text = para.text
+            webpage_text_list.append(text)
+        webpage_text = ",".join(webpage_text_list)
+        print(f"Webpage text:\n {webpage_text}")
+        return webpage_text
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         self.agent = CodeAgent(
             model=OpenAIServerModel(model_id="gpt-4o"),
+            tools=[
+                DuckDuckGoSearchTool(),
+                VisitWebpageTool(),
+                WikipediaSearchTool(),
+                YoutubeSubtitlesTranscriptTool(),
+                YoutubeAudioTranscriptTool(),
+                SpeechToTextTool(),
+                ParseURLTool(),
+                ],
             add_base_tools=True,
+            additional_authorized_imports=[
+                "re",
+                "requests",
+                "bs4",
+                "urllib",
+                "pytubefix",
+                "pytubefix.cli",
+                "youtube_transcript_api",
+                "wikipediaapi",
+                "whisper",
+                "pandas",
+                "cv2",
+                "numpy",
+            ],
         )
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": "", "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)

requirements.txt CHANGED Viewed

@@ -1,3 +1,10 @@
 gradio
 requests
-smolagents[all]

 gradio
 requests
+smolagents[all]
+openai-whisper
+wikepedia-api
+youtube-transcript-api
+pytubefix
+opencv-python
+numpy
+pandas