Spaces:
Sleeping
Sleeping
| import torch | |
| import os | |
| import gradio as gr | |
| from deep_translator import GoogleTranslator | |
| import whisper | |
| # Check if NVIDIA GPU is available | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Directories for transcripts | |
| BASE_DIR = os.getcwd() | |
| TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts') | |
| # Ensure transcripts directory exists | |
| def check_directory(path): | |
| if not os.path.exists(path): | |
| os.makedirs(path) | |
| check_directory(TRANSCRIPTS_FOLDER) | |
| def live_transcribe_and_translate(stream, selected_language, model_type="base"): | |
| """ | |
| Transcribe live audio using Whisper and translate it into English if required. | |
| :param stream: Stream of live audio data | |
| :param selected_language: Language code for transcription | |
| :param model_type: Whisper model type (default is 'base') | |
| :return: Transcription and translation | |
| """ | |
| try: | |
| # Load the Whisper model based on user selection | |
| model = whisper.load_model(model_type, device=DEVICE) | |
| except Exception as e: | |
| return f"Failed to load Whisper model ({model_type}): {e}" | |
| # Prepare audio processor | |
| audio_processor = whisper.audio.AudioProcessor(model, streaming=True) | |
| translated_text = [] | |
| transcript_file = os.path.join(TRANSCRIPTS_FOLDER, 'live_transcript.txt') | |
| with open(transcript_file, 'w', encoding='utf-8') as text_file: | |
| for chunk in stream: | |
| result = audio_processor.transcribe(chunk, return_timestamps=True) | |
| for segment in result['segments']: | |
| start_time = segment['start'] | |
| end_time = segment['end'] | |
| text = segment['text'] | |
| text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n") | |
| if selected_language in ['nl']: | |
| text_en = GoogleTranslator(source='auto', target='en').translate(text) | |
| translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}") | |
| text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n") | |
| return "\n".join(translated_text) if translated_text else "Live transcription completed." | |
| # Define the Gradio interface | |
| interface = gr.Interface( | |
| fn=live_transcribe_and_translate, | |
| inputs=[ | |
| gr.Audio(type="numpy", label="Upload Audio"), # Adjusted for pre-recorded or in-memory audio | |
| gr.Dropdown(label="Select Language", choices=["nl", "en"], value="en"), | |
| gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base") | |
| ], | |
| outputs="text", | |
| title="Live Transcription and Translation" | |
| ) | |
| if __name__ == '__main__': | |
| # Launch the Gradio interface | |
| interface.launch() | |