Spaces:
Sleeping
Sleeping
| import os | |
| import subprocess | |
| import tempfile | |
| import yt_dlp | |
| import torch | |
| from transformers import pipeline | |
| from logging_config import logger, log_buffer | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| def convert_audio_to_wav(input_file: str, output_file: str) -> str: | |
| logger.info(f"Converting {input_file} to WAV: {output_file}") | |
| cmd = [ | |
| "ffmpeg", | |
| "-y", | |
| "-i", input_file, | |
| "-ar", "16000", # sample rate | |
| "-ac", "1", # mono | |
| output_file | |
| ] | |
| subprocess.run(cmd, check=True) | |
| return output_file | |
| def fallback_whisper_transcription(youtube_url: str): | |
| # returns (transcript, logs). | |
| try: | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| # Create temp dir | |
| logger.info("") | |
| logger.info(f"Created temporary directory: {tmpdir}") | |
| logger.info("") | |
| yield "", log_buffer.getvalue() | |
| # Download best audio | |
| logger.info("Downloading best audio via yt-dlp...") | |
| logger.info("") | |
| yield "", log_buffer.getvalue() | |
| download_path = os.path.join(tmpdir, "audio.%(ext)s") | |
| ydl_opts = { | |
| 'format': 'bestaudio/best', | |
| 'outtmpl': download_path, | |
| 'quiet': True, | |
| 'postprocessors': [] | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([youtube_url]) | |
| logger.info("Audio downloaded. Locating the audio file in the temp folder...") | |
| logger.info("") | |
| yield "", log_buffer.getvalue() | |
| # confirm audio file | |
| downloaded_files = os.listdir(tmpdir) | |
| if not downloaded_files: | |
| raise RuntimeError("No audio file was downloaded via yt-dlp.") | |
| audio_file_path = os.path.join(tmpdir, downloaded_files[0]) | |
| logger.info(f"Found audio file: {audio_file_path}") | |
| logger.info("Video has downloaded!") | |
| logger.info("") | |
| yield "", log_buffer.getvalue() | |
| # Convert to wav | |
| wav_file_path = os.path.join(tmpdir, "audio.wav") | |
| convert_audio_to_wav(audio_file_path, wav_file_path) | |
| logger.info("Audio converted to WAV successfully.") | |
| logger.info("") | |
| yield "", log_buffer.getvalue() | |
| # Run whisper | |
| logger.info("Running Whisper ASR pipeline on the WAV file...") | |
| logger.info("") | |
| yield "", log_buffer.getvalue() | |
| asr_pipeline = pipeline( | |
| "automatic-speech-recognition", | |
| model="openai/whisper-small", | |
| return_timestamps=True, | |
| device=device, | |
| generate_kwargs={"task": "transcribe", "language": "<|en|>"} | |
| ) | |
| result = asr_pipeline(inputs=wav_file_path) | |
| transcription = result["text"] | |
| logger.info("Whisper transcription completed successfully.") | |
| logger.info("") | |
| yield transcription, log_buffer.getvalue() | |
| except Exception as e: | |
| err_msg = f"Error in fallback transcription: {str(e)}" | |
| logger.error(err_msg) | |
| yield err_msg, log_buffer.getvalue() | |