|
|
import gradio as gr |
|
|
import whisper |
|
|
import os |
|
|
import yt_dlp |
|
|
|
|
|
|
|
|
print("Loading Whisper model...") |
|
|
model = whisper.load_model("tiny") |
|
|
|
|
|
def download_youtube_audio(url): |
|
|
"""Download audio from YouTube using yt-dlp""" |
|
|
try: |
|
|
ydl_opts = { |
|
|
'format': 'bestaudio/best', |
|
|
'postprocessors': [{ |
|
|
'key': 'FFmpegExtractAudio', |
|
|
'preferredcodec': 'mp3', |
|
|
'preferredquality': '192', |
|
|
}], |
|
|
'outtmpl': 'temp_audio.%(ext)s', |
|
|
'quiet': True, |
|
|
} |
|
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
|
ydl.download([url]) |
|
|
|
|
|
return "temp_audio.mp3" |
|
|
except Exception as e: |
|
|
raise Exception(f"Failed to download YouTube video: {str(e)}") |
|
|
|
|
|
def simple_summarize(text, max_sentences=5): |
|
|
"""Simple extractive summary""" |
|
|
sentences = text.replace('!', '.').replace('?', '.').split('.') |
|
|
sentences = [s.strip() for s in sentences if len(s.strip()) > 20] |
|
|
|
|
|
if len(sentences) <= max_sentences: |
|
|
return '. '.join(sentences) + '.' |
|
|
|
|
|
summary_sentences = [sentences[0]] |
|
|
step = len(sentences) // (max_sentences - 1) |
|
|
|
|
|
for i in range(1, max_sentences): |
|
|
idx = min(i * step, len(sentences) - 1) |
|
|
summary_sentences.append(sentences[idx]) |
|
|
|
|
|
return '. '.join(summary_sentences) + '.' |
|
|
|
|
|
def transcribe_video(youtube_url, video_file, progress=gr.Progress()): |
|
|
"""Main transcription function""" |
|
|
audio_path = None |
|
|
|
|
|
try: |
|
|
if youtube_url and youtube_url.strip(): |
|
|
progress(0.2, desc="Downloading YouTube audio...") |
|
|
audio_path = download_youtube_audio(youtube_url) |
|
|
elif video_file is not None: |
|
|
progress(0.2, desc="Processing uploaded video...") |
|
|
audio_path = video_file |
|
|
else: |
|
|
return "β Error", "Please provide a YouTube URL or upload a video file.", "", None |
|
|
|
|
|
progress(0.5, desc="Transcribing audio...") |
|
|
result = model.transcribe(audio_path, fp16=False) |
|
|
transcription = result["text"] |
|
|
|
|
|
if not transcription or len(transcription) < 10: |
|
|
return "β Error", "Transcription failed or audio had no speech.", "", None |
|
|
|
|
|
progress(0.8, desc="Generating summary...") |
|
|
summary = simple_summarize(transcription, max_sentences=5) |
|
|
|
|
|
progress(0.9, desc="Creating download file...") |
|
|
output_text = f"""VIDEO TRANSCRIPTION & SUMMARY |
|
|
{"="*60} |
|
|
|
|
|
FULL TRANSCRIPTION: |
|
|
{transcription} |
|
|
|
|
|
{"="*60} |
|
|
|
|
|
SUMMARY: |
|
|
{summary} |
|
|
|
|
|
{"="*60} |
|
|
Generated by Video Transcription App |
|
|
""" |
|
|
|
|
|
output_file = "transcript_output.txt" |
|
|
with open(output_file, "w", encoding="utf-8") as f: |
|
|
f.write(output_text) |
|
|
|
|
|
if youtube_url and audio_path and os.path.exists(audio_path): |
|
|
try: |
|
|
os.remove(audio_path) |
|
|
except: |
|
|
pass |
|
|
|
|
|
progress(1.0, desc="Done!") |
|
|
return "β
Success", transcription, summary, output_file |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"Error: {str(e)}" |
|
|
print(error_msg) |
|
|
return "β Error", error_msg, "", None |
|
|
|
|
|
with gr.Blocks(title="Video Transcription") as demo: |
|
|
gr.Markdown(""" |
|
|
# π₯ Video Transcription & Summary Generator |
|
|
Upload a video file or paste a YouTube link to get AI-powered transcription and summary. |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown("### Input") |
|
|
youtube_input = gr.Textbox( |
|
|
label="YouTube URL (Optional)", |
|
|
placeholder="https://www.youtube.com/watch?v=...", |
|
|
lines=1 |
|
|
) |
|
|
gr.Markdown("**OR**") |
|
|
video_input = gr.Video(label="Upload Video File (Optional)") |
|
|
process_btn = gr.Button("π Process Video", variant="primary", size="lg") |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("### Output") |
|
|
status_output = gr.Textbox(label="Status", interactive=False) |
|
|
|
|
|
with gr.Accordion("Full Transcription", open=True): |
|
|
transcription_output = gr.Textbox(label="", lines=10, max_lines=20) |
|
|
|
|
|
with gr.Accordion("Summary", open=True): |
|
|
summary_output = gr.Textbox(label="", lines=5) |
|
|
|
|
|
download_output = gr.File(label="π₯ Download Complete Transcript") |
|
|
|
|
|
process_btn.click( |
|
|
fn=transcribe_video, |
|
|
inputs=[youtube_input, video_input], |
|
|
outputs=[status_output, transcription_output, summary_output, download_output] |
|
|
) |
|
|
|
|
|
demo.launch() |