Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
| from googletrans import Translator | |
| from pydub import AudioSegment | |
| import io | |
| # Load the Whisper model and processor once | |
| processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2") | |
| model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2") | |
| def transcribe_and_translate(audio, target_language=None): | |
| try: | |
| # Convert the Gradio audio input to an AudioSegment | |
| audio_segment = AudioSegment.from_file(io.BytesIO(audio), format="wav") | |
| audio_segment = audio_segment.set_channels(1).set_frame_rate(16000) | |
| # Prepare audio for Whisper | |
| buffer = io.BytesIO() | |
| audio_segment.export(buffer, format="wav") | |
| waveform = torch.FloatTensor(np.frombuffer(buffer.getvalue(), dtype=np.int16) / 32768.0).unsqueeze(0) | |
| # Transcription | |
| inputs = processor(waveform, return_tensors="pt", sampling_rate=16000) | |
| with torch.no_grad(): | |
| logits = model.generate(inputs["input_features"]) | |
| transcription = processor.batch_decode(logits, skip_special_tokens=True)[0] | |
| if target_language: | |
| translator = Translator() | |
| translation = translator.translate(transcription, dest=target_language) | |
| return transcription, translation.text | |
| else: | |
| return transcription, None | |
| except Exception as e: | |
| return f"An error occurred: {e}", None | |
| # Gradio interface | |
| def process_audio(audio, target_language): | |
| transcription, translation = transcribe_and_translate(audio, target_language) | |
| if translation: | |
| return transcription, translation | |
| else: | |
| return transcription, "No translation requested." | |
| # Define the Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Audio Transcription and Translation") | |
| audio_input = gr.Audio(source="microphone", type="file", label="Record or Upload Audio") | |
| target_language = gr.Textbox(label="Target Language (e.g., 'es' for Spanish)", placeholder="Leave blank for no translation") | |
| transcription_output = gr.Textbox(label="Transcription") | |
| translation_output = gr.Textbox(label="Translation") | |
| submit_button = gr.Button("Transcribe and Translate") | |
| submit_button.click(process_audio, inputs=[audio_input, target_language], outputs=[transcription_output, translation_output]) | |
| # Launch the Gradio interface | |
| demo.launch() | |