Spaces:
Runtime error
Runtime error
| from transformers import pipeline | |
| import gradio as gr | |
| from pytube import YouTube | |
| from datasets import Dataset, Audio | |
| import os | |
| from moviepy.editor import AudioFileClip | |
| pipe1 = pipeline(model="khalidey/ID2223_Lab2_Whisper_SV") # change to "your-username/the-name-you-picked" | |
| pipe2 = pipeline('text-generation', model='birgermoell/swedish-gpt') | |
| def transcribe(audio): | |
| text = pipe1(audio)["text"] | |
| generated_text = pipe2(text, max_length=50, num_return_sequences=2)[0]['generated_text'] | |
| return text, generated_text | |
| def youtube_link(url): | |
| # Obtains the audio of the youtube video and returns the path of the mp4 file | |
| streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4') | |
| path = streams.first().download() | |
| return path | |
| def convert_to_wav(path): | |
| audio = AudioFileClip(path) | |
| audio_frame = audio.subclip(0, -2) | |
| audio_frame.write_audiofile(f"audio.wav") | |
| return f"audio.wav" | |
| def youtube_transcribe(url): | |
| path = youtube_link(url) | |
| path_wav = convert_to_wav(path) | |
| audio_dataset = Dataset.from_dict({"audio": [path_wav]}).cast_column("audio", Audio(sampling_rate=16000)) | |
| text = pipe1(audio_dataset["audio"]) | |
| return text[0]["text"] | |
| with gr.Blocks() as demo: | |
| gr.Markdown("Whisper Small Swedish + Swedish GPT") | |
| gr.Markdown("Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model & text generation with Swedish GPT.") | |
| with gr.TabItem("Upload from disk"): | |
| upload_file = gr.Audio(source="upload", type="filepath",label="Upload from disk") | |
| upload_button = gr.Button("Submit for recognition") | |
| upload_outputs = [ | |
| gr.Textbox(label="Recognized speech from uploaded file"), | |
| gr.Textbox(label="Swedish-gpt generated speech from uploaded file") | |
| ] | |
| with gr.TabItem("Record from microphone"): | |
| record_file = gr.Audio(source="microphone", type="filepath",label="Record from microphone") | |
| record_button = gr.Button("Submit for recognition") | |
| record_outputs = [ | |
| gr.Textbox(label="Recognized speech from recordings"), | |
| gr.Textbox(label="Swedish-gpt generated speech from recordings") | |
| ] | |
| with gr.TabItem("Transcribe from Youtube URL"): | |
| url = gr.Text(max_lines=1, label="Transcribe from YouTube URL") | |
| youtube_button = gr.Button("Submit for recognition") | |
| youtube_outputs = [ | |
| gr.Textbox(label="Recognized speech from URL") | |
| ] | |
| upload_button.click( | |
| fn=transcribe, | |
| inputs=upload_file, | |
| outputs=upload_outputs, | |
| ) | |
| record_button.click( | |
| fn=transcribe, | |
| inputs=record_file, | |
| outputs=record_outputs, | |
| ) | |
| youtube_button.click( | |
| fn=youtube_transcribe, | |
| inputs=url, | |
| outputs=youtube_outputs, | |
| ) | |
| demo.launch() |