Update app.py
Browse files
app.py
CHANGED
|
@@ -75,25 +75,6 @@ def read_webpage(url: str) -> str:
|
|
| 75 |
return text[:15000]
|
| 76 |
except Exception as e:
|
| 77 |
return f"ERROR: could not read page. {type(e).__name__}: {e}."
|
| 78 |
-
|
| 79 |
-
@tool
|
| 80 |
-
def download_video_audio(url: str) -> str:
|
| 81 |
-
"""
|
| 82 |
-
Download audio from a video URL and return local file path.
|
| 83 |
-
"""
|
| 84 |
-
unique_id = str(uuid.uuid4())[:8]
|
| 85 |
-
output = f"video_{unique_id}.%(ext)s"
|
| 86 |
-
|
| 87 |
-
ydl_opts = {
|
| 88 |
-
"format": "bestaudio/best",
|
| 89 |
-
"outtmpl": output,
|
| 90 |
-
"quiet": True,
|
| 91 |
-
"noplaylist": True,
|
| 92 |
-
}
|
| 93 |
-
|
| 94 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 95 |
-
info = ydl.extract_info(url, download=True)
|
| 96 |
-
return ydl.prepare_filename(info)
|
| 97 |
|
| 98 |
|
| 99 |
# Shared dictionary to act as the agent's "RAM"
|
|
@@ -152,22 +133,9 @@ class BasicAgent:
|
|
| 152 |
return text
|
| 153 |
except Exception as e:
|
| 154 |
return "Could not transcribe audio. " + f"{type(e).__name__}: {e}."
|
| 155 |
-
|
| 156 |
-
@tool
|
| 157 |
-
def video_to_text(video_url: str) -> str:
|
| 158 |
-
"""Download video audio and return transcript."""
|
| 159 |
-
audio_path = download_video_audio.invoke(video_url)
|
| 160 |
-
try:
|
| 161 |
-
transcript = transcribe_audio.invoke(audio_path)
|
| 162 |
-
return transcript
|
| 163 |
-
except Exception as e:
|
| 164 |
-
return f"Cannot access the video at {video_url} due to network restrictions."
|
| 165 |
-
finally:
|
| 166 |
-
# Clean up the file
|
| 167 |
-
if os.path.exists(audio_path):
|
| 168 |
-
os.remove(audio_path)
|
| 169 |
|
| 170 |
-
self.tools = [web_search_tool, list_local_files, read_local_file, read_webpage,
|
| 171 |
self.chat_with_tools = self.chat.bind_tools(self.tools)
|
| 172 |
|
| 173 |
# Build Graph
|
|
@@ -202,12 +170,17 @@ class BasicAgent:
|
|
| 202 |
|
| 203 |
Instructions for web search:
|
| 204 |
1) DON'T invent URLs! If you are not explicitly given the URL, use the web_search tool to find it first, then use read_webpage to get the information from the URL.
|
|
|
|
| 205 |
Instructions for manipulating files:
|
| 206 |
-
1) If you are given a filename,
|
| 207 |
-
2)
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
Providing the answer:
|
| 213 |
1) YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
|
@@ -239,6 +212,7 @@ class BasicAgent:
|
|
| 239 |
- If it's a number, output ONLY the digits (no commas, no units like $ or % unless specified).
|
| 240 |
- If it's a string, output ONLY the string (no articles, no abbreviations).
|
| 241 |
- If it's a list, output ONLY a comma-separated list.
|
|
|
|
| 242 |
|
| 243 |
Text to extract from:
|
| 244 |
---
|
|
|
|
| 75 |
return text[:15000]
|
| 76 |
except Exception as e:
|
| 77 |
return f"ERROR: could not read page. {type(e).__name__}: {e}."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
|
| 80 |
# Shared dictionary to act as the agent's "RAM"
|
|
|
|
| 133 |
return text
|
| 134 |
except Exception as e:
|
| 135 |
return "Could not transcribe audio. " + f"{type(e).__name__}: {e}."
|
| 136 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
+
self.tools = [web_search_tool, list_local_files, read_local_file, read_webpage, transcribe_audio, load_excel_to_memory, python_exec]
|
| 139 |
self.chat_with_tools = self.chat.bind_tools(self.tools)
|
| 140 |
|
| 141 |
# Build Graph
|
|
|
|
| 170 |
|
| 171 |
Instructions for web search:
|
| 172 |
1) DON'T invent URLs! If you are not explicitly given the URL, use the web_search tool to find it first, then use read_webpage to get the information from the URL.
|
| 173 |
+
|
| 174 |
Instructions for manipulating files:
|
| 175 |
+
1) If you are given a filename, generate the file_path the following way: filename + "." + extention
|
| 176 |
+
2) If you are not given a filename, but the user mentions that the file is attached or provided, use list_local_files tool to fine the file. If neccessary, use read_local_file tool to read its content.
|
| 177 |
+
3) If unsure, you can guess the most likely file based on its filename and extension.
|
| 178 |
+
4) Don't try to search local files on the web.
|
| 179 |
+
|
| 180 |
+
Instructions for audio transcription:
|
| 181 |
+
1) If you are asked to extract information from video or audio, find the corresponding file in the folder and use transcribe_audio tool.
|
| 182 |
+
2) Video files are already downloaded as audio, DON'T try to download them from the web, even if you are given a URL. Find corresponding audio files in local directory instead.
|
| 183 |
+
3) In your answer, provide full information from the transcription, don't summarize or shorten it. Don't remove meaning-forming adjectives.
|
| 184 |
|
| 185 |
Providing the answer:
|
| 186 |
1) YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
|
|
|
| 212 |
- If it's a number, output ONLY the digits (no commas, no units like $ or % unless specified).
|
| 213 |
- If it's a string, output ONLY the string (no articles, no abbreviations).
|
| 214 |
- If it's a list, output ONLY a comma-separated list.
|
| 215 |
+
- If text already satisfies conditions above, output the same text.
|
| 216 |
|
| 217 |
Text to extract from:
|
| 218 |
---
|