Fnu Mahnoor commited on
Commit
77a418a
·
1 Parent(s): 67b11dc

fix youtube video url

Browse files
Files changed (1) hide show
  1. src/handlers/transcription_handler.py +107 -38
src/handlers/transcription_handler.py CHANGED
@@ -29,52 +29,121 @@ def transcribe_file(file_path):
29
  logging.info(f"File transcription result: {transcript[:100]}...")
30
  return transcript
31
 
32
- def transcribe_video_url(url):
33
- logging.info(f"transcribe_video_url called with: {url}")
34
- if not url:
35
- return "No URL provided"
36
 
37
- # Define a clean output path
38
- # Using a fixed name in a temp directory prevents 'ffprobe' access errors
39
- output_dir = "temp_downloads"
40
- os.makedirs(output_dir, exist_ok=True)
41
- temp_audio_path = os.path.join(output_dir, "youtube_audio.wav")
42
 
43
- ffmpeg_bin = get_ffmpeg_path()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- ydl_opts = {
46
- 'format': 'bestaudio/best',
47
- # FIX: yt-dlp needs the template to NOT include the extension if using ExtractAudio
48
- 'outtmpl': os.path.join(output_dir, 'youtube_audio'),
49
- 'ffmpeg_location': ffmpeg_bin, # Explicitly tell yt-dlp where ffmpeg is
50
- 'postprocessors': [{
51
- 'key': 'FFmpegExtractAudio',
52
- 'preferredcodec': 'wav',
53
- 'preferredquality': '192',
54
- }],
55
- 'quiet': False, # Set to False temporarily to debug if it fails again
56
- }
57
 
58
  try:
59
- logging.info("Downloading and extracting audio")
60
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
61
- ydl.download([url])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- # After post-processing, the file will be 'youtube_audio.wav'
64
- final_wav_path = temp_audio_path
65
 
66
- if not os.path.exists(final_wav_path):
67
- raise FileNotFoundError(f"FFmpeg failed to create {final_wav_path}")
68
 
69
- logging.info("Audio downloaded, transcribing")
70
- transcript = transcribe_file(final_wav_path)
71
 
72
- # Clean up
73
- if os.path.exists(final_wav_path):
74
- os.remove(final_wav_path)
75
 
76
- return transcript
77
 
78
- except Exception as e:
79
- logging.error(f"Error processing video: {str(e)}")
80
- return f"Error processing video: {str(e)}"
 
29
  logging.info(f"File transcription result: {transcript[:100]}...")
30
  return transcript
31
 
 
 
 
 
32
 
 
 
 
 
 
33
 
34
+ import os
35
+ import logging
36
+ import yt_dlp
37
+ import tempfile
38
+
39
+ def transcribe_audio(source=None, is_url=False):
40
+ """
41
+ Transcribe audio either from a file upload or from a YouTube URL.
42
+
43
+ Parameters:
44
+ source (str): File path if `is_url=False` or YouTube URL if `is_url=True`.
45
+ is_url (bool): True if source is a YouTube URL, False if it's a local file.
46
+
47
+ Returns:
48
+ str: Transcription text or error message.
49
+ """
50
+ logging.info(f"transcribe_audio called with source: {source}, is_url={is_url}")
51
 
52
+ if not source:
53
+ return "No source provided"
 
 
 
 
 
 
 
 
 
 
54
 
55
  try:
56
+ if is_url:
57
+ # YouTube download mode
58
+ ffmpeg_bin = get_ffmpeg_path()
59
+
60
+ with tempfile.TemporaryDirectory() as temp_dir:
61
+ temp_audio_path = os.path.join(temp_dir, "youtube_audio.wav")
62
+
63
+ ydl_opts = {
64
+ 'format': 'bestaudio/best',
65
+ 'outtmpl': os.path.join(temp_dir, 'youtube_audio'),
66
+ 'ffmpeg_location': ffmpeg_bin,
67
+ 'postprocessors': [{
68
+ 'key': 'FFmpegExtractAudio',
69
+ 'preferredcodec': 'wav',
70
+ 'preferredquality': '192',
71
+ }],
72
+ 'quiet': False,
73
+ }
74
+
75
+ logging.info("Downloading and extracting audio from YouTube")
76
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
77
+ ydl.download([source])
78
+
79
+ if not os.path.exists(temp_audio_path):
80
+ raise FileNotFoundError(f"FFmpeg failed to create {temp_audio_path}")
81
+
82
+ logging.info("Audio downloaded, transcribing")
83
+ transcript = transcribe_file(temp_audio_path)
84
+ return transcript
85
+
86
+ else:
87
+ # Local file mode (works on Spaces and locally)
88
+ if not os.path.exists(source):
89
+ return f"File does not exist: {source}"
90
+
91
+ logging.info("Transcribing local audio file")
92
+ transcript = transcribe_file(source)
93
+ return transcript
94
+
95
+ except Exception as e:
96
+ logging.error(f"Error processing audio: {str(e)}")
97
+ return f"Error processing audio: {str(e)}"
98
+
99
+
100
+
101
+ # def transcribe_video_url(url):
102
+ # logging.info(f"transcribe_video_url called with: {url}")
103
+ # if not url:
104
+ # return "No URL provided"
105
+
106
+ # # Define a clean output path
107
+ # # Using a fixed name in a temp directory prevents 'ffprobe' access errors
108
+ # output_dir = "temp_downloads"
109
+ # os.makedirs(output_dir, exist_ok=True)
110
+ # temp_audio_path = os.path.join(output_dir, "youtube_audio.wav")
111
+
112
+ # ffmpeg_bin = get_ffmpeg_path()
113
+
114
+ # ydl_opts = {
115
+ # 'format': 'bestaudio/best',
116
+ # # FIX: yt-dlp needs the template to NOT include the extension if using ExtractAudio
117
+ # 'outtmpl': os.path.join(output_dir, 'youtube_audio'),
118
+ # 'ffmpeg_location': ffmpeg_bin, # Explicitly tell yt-dlp where ffmpeg is
119
+ # 'postprocessors': [{
120
+ # 'key': 'FFmpegExtractAudio',
121
+ # 'preferredcodec': 'wav',
122
+ # 'preferredquality': '192',
123
+ # }],
124
+ # 'quiet': False, # Set to False temporarily to debug if it fails again
125
+ # }
126
+
127
+ # try:
128
+ # logging.info("Downloading and extracting audio")
129
+ # with yt_dlp.YoutubeDL(ydl_opts) as ydl:
130
+ # ydl.download([url])
131
 
132
+ # # After post-processing, the file will be 'youtube_audio.wav'
133
+ # final_wav_path = temp_audio_path
134
 
135
+ # if not os.path.exists(final_wav_path):
136
+ # raise FileNotFoundError(f"FFmpeg failed to create {final_wav_path}")
137
 
138
+ # logging.info("Audio downloaded, transcribing")
139
+ # transcript = transcribe_file(final_wav_path)
140
 
141
+ # # Clean up
142
+ # if os.path.exists(final_wav_path):
143
+ # os.remove(final_wav_path)
144
 
145
+ # return transcript
146
 
147
+ # except Exception as e:
148
+ # logging.error(f"Error processing video: {str(e)}")
149
+ # return f"Error processing video: {str(e)}"