sergiosampayob commited on
Commit
8a5e4d4
·
1 Parent(s): 4e38b79

agent update: new custom tools and imports

Browse files
Files changed (2) hide show
  1. app.py +197 -5
  2. requirements.txt +8 -1
app.py CHANGED
@@ -1,9 +1,18 @@
 
1
  import os
 
2
  import gradio as gr
3
- import requests
4
- import inspect
5
  import pandas as pd
6
- from smolagents import CodeAgent, DuckDuckGoSearchTool, VisitWebpageTool, OpenAIServerModel, LiteLLMModel
 
 
 
 
 
 
 
 
 
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
@@ -11,13 +20,196 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  # --- Basic Agent Definition ---
13
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  class BasicAgent:
15
  def __init__(self):
16
  print("BasicAgent initialized.")
17
  self.agent = CodeAgent(
18
- tools=[DuckDuckGoSearchTool(), VisitWebpageTool()],
19
  model=OpenAIServerModel(model_id="gpt-4o"),
 
 
 
 
 
 
 
 
 
20
  add_base_tools=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  )
22
  def __call__(self, question: str) -> str:
23
  print(f"Agent received question (first 50 chars): {question[:50]}...")
@@ -98,7 +290,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
98
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
99
 
100
  # 4. Prepare Submission
101
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
102
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
103
  print(status_update)
104
 
 
1
+ import requests
2
  import os
3
+
4
  import gradio as gr
 
 
5
  import pandas as pd
6
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, VisitWebpageTool, OpenAIServerModel, Tool
7
+ from youtube_transcript_api import YouTubeTranscriptApi
8
+ import whisper
9
+ from pytubefix import YouTube
10
+ from pytubefix.cli import on_progress
11
+ from bs4 import BeautifulSoup
12
+ import wikipediaapi
13
+ import cv2
14
+ import numpy as np
15
+
16
 
17
  # (Keep Constants as is)
18
  # --- Constants ---
 
20
 
21
  # --- Basic Agent Definition ---
22
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
23
+ class ImageLoaderTool(Tool):
24
+ name = "image_loader"
25
+ description = (
26
+ "Loads an image from a given URL using cv2 and returns it as a numpy array. "
27
+ "Input: URL of the image."
28
+ "Output: Image as a numpy array."
29
+ "Note: This tool requires the 'cv2' library to be installed."
30
+ )
31
+ inputs = {
32
+ "image_url": {"type": "string", "description": "URL of the image."},
33
+ }
34
+ output_type = "numpy.ndarray"
35
+ def forward(self, image_url: str) -> str:
36
+ if not image_url.startswith("http"):
37
+ raise ValueError(f"Invalid URL: {image_url}")
38
+ try:
39
+ response = requests.get(image_url)
40
+ image = cv2.imdecode(np.frombuffer(response.content, np.uint8), cv2.IMREAD_COLOR)
41
+ return image
42
+ except Exception as e:
43
+ raise ValueError(f"Error loading image: {e}")
44
+
45
+
46
+ class SpeechToTextTool(Tool):
47
+ name = "speech_to_text"
48
+ description = (
49
+ "Converts an audio file to text. "
50
+ )
51
+ inputs = {
52
+ "audio_file_path": {"type": "string", "description": "Path to the audio file."},
53
+ }
54
+ output_type = "string"
55
+
56
+ def __init__(self):
57
+ super().__init__()
58
+ self.model = whisper.load_model("base")
59
+
60
+ def forward(self, audio_file_path: str) -> str:
61
+ if not os.path.exists(audio_file_path):
62
+ raise ValueError(f"Audio file not found: {audio_file_path}")
63
+ result = self.model.transcribe(audio_file_path)
64
+ return result.get("text", "")
65
+
66
+
67
+ class YoutubeSubtitlesTranscriptTool(Tool):
68
+ name = "youtube_subtitles_transcript"
69
+ description = (
70
+ "Fetches the transcript of a YouTube video. "
71
+ "Input: YouTube video URL."
72
+ "Output: Transcript text."
73
+ )
74
+ inputs = {
75
+ "video_url": {"type": "string", "description": "YouTube video URL."},
76
+ }
77
+ output_type = "string"
78
+
79
+ def forward(self, video_url: str) -> str:
80
+ if not video_url.startswith("https://www.youtube.com/watch?v="):
81
+ raise ValueError(f"Invalid YouTube URL: {video_url}")
82
+ video_id = video_url.split("v=")[-1]
83
+ try:
84
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
85
+ transcript_text = " ".join([entry["text"] for entry in transcript])
86
+ return transcript_text
87
+ except Exception as transcript_error:
88
+ print(f"Transcript not available: {transcript_error}")
89
+ try:
90
+ # Fallback: Download audio for processing
91
+ youtube_audio_transcript_tool = YoutubeAudioTranscriptTool()
92
+ transcript_text = youtube_audio_transcript_tool.forward(video_url)
93
+ print("Audio downloaded successfully.")
94
+ return transcript_text # Assuming the tool returns some text representation
95
+ except Exception as e:
96
+ raise ValueError(f"Error downloading audio or converting to text: {e}")
97
+
98
+
99
+ class YoutubeAudioTranscriptTool(Tool):
100
+ name = "youtube_audio_transcript"
101
+ description = (
102
+ "Downloads the audio from a YouTube video and converts it to text. "
103
+ "Input: YouTube video URL."
104
+ )
105
+ inputs = {
106
+ "video_url": {"type": "string", "description": "YouTube video URL."},
107
+ }
108
+ output_type = "string"
109
+
110
+ def forward(self, video_url: str) -> str:
111
+ if not video_url.startswith("https://www.youtube.com/watch?v="):
112
+ raise ValueError(f"Invalid YouTube URL: {video_url}")
113
+ try:
114
+ yt = YouTube(video_url, on_progress_callback=on_progress)
115
+ audio_stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
116
+ audio_file_path = audio_stream.download(filename_prefix="audio_")
117
+ speech_to_text_tool = SpeechToTextTool()
118
+ transcript = speech_to_text_tool.forward(audio_file_path)
119
+ os.remove(audio_file_path) # Clean up the downloaded file
120
+ return transcript
121
+ except Exception as e:
122
+ raise ValueError(f"Error downloading audio or converting to text: {e}")
123
+
124
+
125
+ class WikipediaSearchTool(Tool):
126
+ name = "wikipedia_search"
127
+ description = (
128
+ "Searches Wikipedia for a given query and returns the summary of the first result."
129
+ "Input: Search query."
130
+ "Output: Wikipedia article."
131
+ )
132
+ inputs = {
133
+ "query": {"type": "string", "description": "Search query."},
134
+ }
135
+ output_type = "string"
136
+
137
+ def forward(self, query: str) -> str:
138
+ wiki_wiki = wikipediaapi.Wikipedia(
139
+ user_agent='wikipedia_agent',
140
+ language='en',
141
+ extract_format=wikipediaapi.ExtractFormat.WIKI
142
+ )
143
+ p_wiki = wiki_wiki.page(query)
144
+ if not p_wiki.exists():
145
+ raise ValueError(f"No Wikipedia page found for query: {query}")
146
+ print(p_wiki.text)
147
+ return p_wiki.text
148
+
149
+
150
+ class ParseURLTool(Tool):
151
+ name = "parse_url"
152
+ description = (
153
+ "Parses a URL and returns the text content of the webpage."
154
+ "Input: URL."
155
+ "Output: Text content of the webpage."
156
+ )
157
+ inputs = {
158
+ "url": {"type": "string", "description": "URL to parse."},
159
+ }
160
+ output_type = "string"
161
+
162
+ def forward(self, url: str) -> str:
163
+ if not url:
164
+ raise ValueError("URL cannot be empty.")
165
+ # Fetch the HTML content
166
+ response = requests.get(url)
167
+ # Retrieve the HTML content
168
+ html = response.text
169
+ # Create a BesutifulSoup Object
170
+ soup = BeautifulSoup(html, 'html.parser')
171
+ # Select all <p> tags
172
+ paragraphs = soup.select("p")
173
+ webpage_text_list = []
174
+ for para in paragraphs:
175
+ # Get the text content of each <p> tag
176
+ text = para.text
177
+ webpage_text_list.append(text)
178
+
179
+ webpage_text = ",".join(webpage_text_list)
180
+ print(f"Webpage text:\n {webpage_text}")
181
+ return webpage_text
182
+
183
+
184
  class BasicAgent:
185
  def __init__(self):
186
  print("BasicAgent initialized.")
187
  self.agent = CodeAgent(
 
188
  model=OpenAIServerModel(model_id="gpt-4o"),
189
+ tools=[
190
+ DuckDuckGoSearchTool(),
191
+ VisitWebpageTool(),
192
+ WikipediaSearchTool(),
193
+ YoutubeSubtitlesTranscriptTool(),
194
+ YoutubeAudioTranscriptTool(),
195
+ SpeechToTextTool(),
196
+ ParseURLTool(),
197
+ ],
198
  add_base_tools=True,
199
+ additional_authorized_imports=[
200
+ "re",
201
+ "requests",
202
+ "bs4",
203
+ "urllib",
204
+ "pytubefix",
205
+ "pytubefix.cli",
206
+ "youtube_transcript_api",
207
+ "wikipediaapi",
208
+ "whisper",
209
+ "pandas",
210
+ "cv2",
211
+ "numpy",
212
+ ],
213
  )
214
  def __call__(self, question: str) -> str:
215
  print(f"Agent received question (first 50 chars): {question[:50]}...")
 
290
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
291
 
292
  # 4. Prepare Submission
293
+ submission_data = {"username": username.strip(), "agent_code": "", "answers": answers_payload}
294
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
295
  print(status_update)
296
 
requirements.txt CHANGED
@@ -1,3 +1,10 @@
1
  gradio
2
  requests
3
- smolagents[all]
 
 
 
 
 
 
 
 
1
  gradio
2
  requests
3
+ smolagents[all]
4
+ openai-whisper
5
+ wikepedia-api
6
+ youtube-transcript-api
7
+ pytubefix
8
+ opencv-python
9
+ numpy
10
+ pandas