Klass777 commited on
Commit
ff57f72
·
verified ·
1 Parent(s): e0d5650

add transcribe_audio_file tool

Browse files
Files changed (1) hide show
  1. app.py +54 -1
app.py CHANGED
@@ -13,6 +13,8 @@ from youtube_transcript_api import YouTubeTranscriptApi
13
  from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
14
  from urllib.parse import urlparse, parse_qs
15
  import json
 
 
16
 
17
 
18
  # (Keep Constants as is)
@@ -20,6 +22,57 @@ import json
20
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  @tool
24
  def get_youtube_transcript(video_url: str) -> str:
25
  """
@@ -87,7 +140,7 @@ class BasicAgent:
87
  model = OpenAIServerModel(api_key=os.environ.get("OPENAI_API_KEY"), model_id="gpt-4o")
88
 
89
  self.code_agent = ToolCallingAgent(
90
- tools=[PythonInterpreterTool(), DuckDuckGoSearchTool(), VisitWebpageTool(), SpeechToTextTool(),
91
  get_youtube_transcript,
92
  FinalAnswerTool()],
93
  model=model,
 
13
  from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
14
  from urllib.parse import urlparse, parse_qs
15
  import json
16
+ import whisper
17
+
18
 
19
 
20
  # (Keep Constants as is)
 
22
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
23
 
24
 
25
+ @tool
26
+ def transcribe_audio_file(file_path: str) -> str:
27
+ """
28
+ Transcribes a local MP3 audio file using Whisper.
29
+
30
+ Args:
31
+ file_path: Full path to the .mp3 audio file.
32
+
33
+ Returns:
34
+ A JSON-formatted string containing either the transcript or an error message.
35
+
36
+ {
37
+ "success": true,
38
+ "transcript": [
39
+ {"start": 0.0, "end": 5.2, "text": "Hello and welcome"},
40
+ ...
41
+ ]
42
+ }
43
+
44
+ OR
45
+
46
+ {
47
+ "success": false,
48
+ "error": "Reason why transcription failed"
49
+ }
50
+ """
51
+ try:
52
+ if not os.path.exists(file_path):
53
+ return json.dumps({"success": False, "error": "File does not exist."})
54
+
55
+ if not file_path.lower().endswith(".mp3"):
56
+ return json.dumps({"success": False, "error": "Invalid file type. Only MP3 files are supported."})
57
+
58
+ model = whisper.load_model("base") # You can use 'tiny', 'base', 'small', 'medium', or 'large'
59
+ result = model.transcribe(file_path, verbose=False, word_timestamps=False)
60
+
61
+ transcript_data = [
62
+ {
63
+ "start": segment["start"],
64
+ "end": segment["end"],
65
+ "text": segment["text"].strip()
66
+ }
67
+ for segment in result["segments"]
68
+ ]
69
+
70
+ return json.dumps({"success": True, "transcript": transcript_data})
71
+
72
+ except Exception as e:
73
+ return json.dumps({"success": False, "error": str(e)}})
74
+
75
+
76
  @tool
77
  def get_youtube_transcript(video_url: str) -> str:
78
  """
 
140
  model = OpenAIServerModel(api_key=os.environ.get("OPENAI_API_KEY"), model_id="gpt-4o")
141
 
142
  self.code_agent = ToolCallingAgent(
143
+ tools=[PythonInterpreterTool(), DuckDuckGoSearchTool(), VisitWebpageTool(), transcribe_audio_file,
144
  get_youtube_transcript,
145
  FinalAnswerTool()],
146
  model=model,