| # from transformers import pipeline | |
| # import gradio as gr | |
| # | |
| # pipe = pipeline(model="dacavi/whisper-small-hi") # change to "your-username/the-name-you-picked" | |
| # def transcribe(audio): | |
| # text = pipe(audio)["text"] | |
| # return text | |
| # | |
| # iface = gr.Interface( | |
| # fn=transcribe, | |
| # inputs=gr.Audio(sources="microphone", type="filepath"), | |
| # outputs="text", | |
| # title="Whisper Small Hindi", | |
| # description="Realtime demo for Hindi speech recognition using a fine-tuned Whisper small model.", | |
| # ) | |
| # | |
| # iface.launch() | |
| import gradio as gr | |
| from transformers import pipeline | |
| from moviepy.editor import VideoFileClip | |
| import tempfile | |
| import os | |
| from pydub import AudioSegment | |
| from huggingface_hub import login | |
| # with open("../../token.txt", "r") as file: | |
| # token = file.readline().strip() | |
| # | |
| # | |
| # login(token=token, add_to_git_credential=True) | |
| pipe = pipeline(model="dacavi/whisper-small-hi") | |
| def transcribe_video(video_url): | |
| # Download video and extract audio | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: | |
| # os.system(f"yt-dlp -o {temp_audio.name} -x --audio-format wav {video_url}") | |
| os.system(f"yt-dlp -o audioSample.wav -x --audio-format wav {video_url}") | |
| print("Downloaded audio:", temp_audio.name) | |
| # Transcribe audio | |
| text = pipe("audioSample.wav")["text"] | |
| # Clean up temporary files | |
| os.remove("audioSample.wav") | |
| return text | |
| iface = gr.Interface( | |
| fn=transcribe_video, | |
| inputs="text", | |
| outputs="text", | |
| live=True, | |
| title="Video Transcription", | |
| description="Paste the URL of a video to transcribe the spoken content.", | |
| ) | |
| iface.launch() | |