Spaces:
Runtime error
Runtime error
add transcribe_audio_file tool
Browse files
app.py
CHANGED
|
@@ -13,6 +13,8 @@ from youtube_transcript_api import YouTubeTranscriptApi
|
|
| 13 |
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
|
| 14 |
from urllib.parse import urlparse, parse_qs
|
| 15 |
import json
|
|
|
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
# (Keep Constants as is)
|
|
@@ -20,6 +22,57 @@ import json
|
|
| 20 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 21 |
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
@tool
|
| 24 |
def get_youtube_transcript(video_url: str) -> str:
|
| 25 |
"""
|
|
@@ -87,7 +140,7 @@ class BasicAgent:
|
|
| 87 |
model = OpenAIServerModel(api_key=os.environ.get("OPENAI_API_KEY"), model_id="gpt-4o")
|
| 88 |
|
| 89 |
self.code_agent = ToolCallingAgent(
|
| 90 |
-
tools=[PythonInterpreterTool(), DuckDuckGoSearchTool(), VisitWebpageTool(),
|
| 91 |
get_youtube_transcript,
|
| 92 |
FinalAnswerTool()],
|
| 93 |
model=model,
|
|
|
|
| 13 |
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
|
| 14 |
from urllib.parse import urlparse, parse_qs
|
| 15 |
import json
|
| 16 |
+
import whisper
|
| 17 |
+
|
| 18 |
|
| 19 |
|
| 20 |
# (Keep Constants as is)
|
|
|
|
| 22 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 23 |
|
| 24 |
|
| 25 |
+
@tool
|
| 26 |
+
def transcribe_audio_file(file_path: str) -> str:
|
| 27 |
+
"""
|
| 28 |
+
Transcribes a local MP3 audio file using Whisper.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
file_path: Full path to the .mp3 audio file.
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
A JSON-formatted string containing either the transcript or an error message.
|
| 35 |
+
|
| 36 |
+
{
|
| 37 |
+
"success": true,
|
| 38 |
+
"transcript": [
|
| 39 |
+
{"start": 0.0, "end": 5.2, "text": "Hello and welcome"},
|
| 40 |
+
...
|
| 41 |
+
]
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
OR
|
| 45 |
+
|
| 46 |
+
{
|
| 47 |
+
"success": false,
|
| 48 |
+
"error": "Reason why transcription failed"
|
| 49 |
+
}
|
| 50 |
+
"""
|
| 51 |
+
try:
|
| 52 |
+
if not os.path.exists(file_path):
|
| 53 |
+
return json.dumps({"success": False, "error": "File does not exist."})
|
| 54 |
+
|
| 55 |
+
if not file_path.lower().endswith(".mp3"):
|
| 56 |
+
return json.dumps({"success": False, "error": "Invalid file type. Only MP3 files are supported."})
|
| 57 |
+
|
| 58 |
+
model = whisper.load_model("base") # You can use 'tiny', 'base', 'small', 'medium', or 'large'
|
| 59 |
+
result = model.transcribe(file_path, verbose=False, word_timestamps=False)
|
| 60 |
+
|
| 61 |
+
transcript_data = [
|
| 62 |
+
{
|
| 63 |
+
"start": segment["start"],
|
| 64 |
+
"end": segment["end"],
|
| 65 |
+
"text": segment["text"].strip()
|
| 66 |
+
}
|
| 67 |
+
for segment in result["segments"]
|
| 68 |
+
]
|
| 69 |
+
|
| 70 |
+
return json.dumps({"success": True, "transcript": transcript_data})
|
| 71 |
+
|
| 72 |
+
except Exception as e:
|
| 73 |
+
return json.dumps({"success": False, "error": str(e)}})
|
| 74 |
+
|
| 75 |
+
|
| 76 |
@tool
|
| 77 |
def get_youtube_transcript(video_url: str) -> str:
|
| 78 |
"""
|
|
|
|
| 140 |
model = OpenAIServerModel(api_key=os.environ.get("OPENAI_API_KEY"), model_id="gpt-4o")
|
| 141 |
|
| 142 |
self.code_agent = ToolCallingAgent(
|
| 143 |
+
tools=[PythonInterpreterTool(), DuckDuckGoSearchTool(), VisitWebpageTool(), transcribe_audio_file,
|
| 144 |
get_youtube_transcript,
|
| 145 |
FinalAnswerTool()],
|
| 146 |
model=model,
|