| import whisper |
| import openai |
| import gradio as gr |
| from gtts import gTTS |
| from moviepy.editor import VideoFileClip |
| import os |
|
|
| openai.api_key = "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" |
|
|
| def transcribe_video(video_path): |
| |
| video = VideoFileClip(video_path) |
| audio_path = "temp_audio.wav" |
| video.audio.write_audiofile(audio_path, codec='pcm_s16le') |
|
|
| |
| model = whisper.load_model("base") |
| result = model.transcribe(audio_path) |
| transcription = result["text"] |
| |
| |
| os.remove(audio_path) |
| return transcription |
|
|
| def summarize_text(text): |
| response = openai.Completion.create( |
| engine="text-davinci-003", |
| prompt=f"Summarize the following text:\n\n{text}", |
| max_tokens=150 |
| ) |
| summary = response.choices[0].text.strip() |
| return summary |
|
|
| def text_to_speech(text, language="en"): |
| tts = gTTS(text=text, lang=language) |
| tts.save("summary_audio.mp3") |
| return "summary_audio.mp3" |
|
|
| def process_video(video): |
| |
| transcription = transcribe_video(video) |
| |
| |
| summary = summarize_text(transcription) |
| |
| |
| audio_file = text_to_speech(summary) |
| |
| return transcription, summary, audio_file |
|
|
| |
| iface = gr.Interface( |
| fn=process_video, |
| inputs=gr.Video(label="Upload Video"), |
| outputs=[ |
| gr.Textbox(label="Transcription"), |
| gr.Textbox(label="Summary"), |
| gr.Audio(label="Summary Audio") |
| ], |
| title="Video Transcription and Summarization", |
| description="Upload a video file to transcribe and summarize its content." |
| ) |
|
|
| |
| iface.launch() |