Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| import gradio as gr | |
| from pytube import YouTube | |
| import os | |
| import requests | |
| import time | |
| from openai import OpenAI | |
| client = OpenAI() | |
| pipe = pipeline(model="dussen/whisper-small-nl-hc") | |
| print(pipe) | |
| def download_audio(url, output_path='downloads'): | |
| try: | |
| # Create a YouTube object | |
| yt = YouTube(url) | |
| # Get the audio stream with the highest quality | |
| audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first() | |
| audio_stream.download(output_path) | |
| # If a video.mp4 file already exists, delete it | |
| if os.path.exists(f"{output_path}/video.mp4"): | |
| os.remove(f"{output_path}/video.mp4") | |
| # Change the name of the file to video.mp4 | |
| default_filename = audio_stream.default_filename | |
| mp4_path = f"{output_path}/{default_filename}" | |
| mp3_path = f"{output_path}/video.mp3" | |
| os.rename(mp4_path, mp3_path) | |
| # Use the model to transcribe the audio | |
| text = pipe(mp3_path)["text"] | |
| # Delete the audio file | |
| os.remove(mp3_path) | |
| return text | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| def audio_to_text(audio): | |
| text = pipe(audio)["text"] | |
| print(text) | |
| return text | |
| def radio_to_text(radio_url): | |
| r = requests.get(radio_url, stream=True) | |
| # Open it and after 10 seconds close the connection | |
| with open('stream.mp3', 'wb') as f: | |
| # Get the stopping time as a UNIX timestamp | |
| stop_after = time.time() + 10 | |
| try: | |
| for block in r.iter_content(1024): | |
| f.write(block) | |
| if time.time() > stop_after: | |
| break | |
| except KeyboardInterrupt: | |
| pass | |
| text = pipe("stream.mp3")["text"] | |
| print(text) | |
| # Use chatGPT to summarise the text using a prompt that says whether it is news, an ad or a song | |
| prompt = f"Dit stuk komt uit een radio uitzending en is getranscribeerd door AI. Er kunnen fouten in zitten. Kan je eerst het categorie text geven uit `nieuws`, `muziek`, `advertentie` of rest`, en dan in max drie zinnen wat er gezegd is?{text}" | |
| # Limit the text to 3000 tokens | |
| prompt = prompt[:3584] | |
| response = client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.7, | |
| max_tokens=512, | |
| top_p=1 | |
| ) | |
| text = f"Tekst van de AI die is getranscribeerd: {text}\n\n---\n\nSamenvatting door AI:\n\n{response.choices[0].message.content}" | |
| return text | |
| iface_video_url = gr.Interface( | |
| fn=download_audio, | |
| inputs="text", | |
| outputs="text", | |
| title="Whisper Small Dutch - Use a YouTube URL", | |
| description="Demo for dutch speech recognition using a fine-tuned Whisper small model.", | |
| ) | |
| iface_audio = gr.Interface( | |
| fn=audio_to_text, | |
| inputs=gr.Audio(sources=["microphone"], type="filepath"), | |
| outputs="text", | |
| title="Whisper Small Dutch - Use your microphone", | |
| description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.", | |
| ) | |
| iface_radio = gr.Interface( | |
| fn=radio_to_text, | |
| inputs="text", | |
| outputs="text", | |
| title="Whisper Small Dutch - Use a radio URL", | |
| description="Demo for dutch speech recognition using a fine-tuned Whisper small model. It gets information on what is playing on the given radio URL. It transcribes it and then summarises it using chatGPT.", | |
| ) | |
| app = gr.TabbedInterface([iface_audio, iface_video_url, iface_radio], ["Audio to text", "Video to text", "Radio to text"]) | |
| if __name__ == "__main__": | |
| app.launch() | |