Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from moviepy.video.io.VideoFileClip import VideoFileClip | |
| from pydub import AudioSegment | |
| import whisper | |
| from transformers import pipeline, MarianMTModel, MarianTokenizer | |
| import yt_dlp as youtube_dl | |
| def download_youtube_video(url): | |
| try: | |
| os.system(f"yt-dlp -o video.mp4 {url}") | |
| return "video.mp4" | |
| except Exception as e: | |
| return str(e) | |
| def extract_audio(video_path): | |
| try: | |
| audio = AudioSegment.from_file(video_path) | |
| audio.export("extracted_audio.mp3", format="mp3") | |
| return "extracted_audio.mp3" | |
| except Exception as e: | |
| return str(e) | |
| def transcribe_audio(audio_path): | |
| try: | |
| model = whisper.load_model("base") | |
| result = model.transcribe(audio_path) | |
| return result['text'] | |
| except Exception as e: | |
| return str(e) | |
| def summarize_text(text): | |
| try: | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| summary = summarizer(text, max_length=150, min_length=30, do_sample=False) | |
| return summary[0]['summary_text'] | |
| except Exception as e: | |
| return str(e) | |
| def translate_text(text, tgt_lang="es"): | |
| try: | |
| model_name = f"Helsinki-NLP/opus-mt-en-{tgt_lang}" | |
| tokenizer = MarianTokenizer.from_pretrained(model_name) | |
| model = MarianMTModel.from_pretrained(model_name) | |
| translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True)) | |
| return tokenizer.decode(translated[0], skip_special_tokens=True) | |
| except Exception as e: | |
| return str(e) | |
| def process_video(video_file, youtube_url, video_source): | |
| video_path = "" | |
| # Handle Video Selection Based on Radio Button | |
| if video_source == "Local Video" and video_file: | |
| video_path = video_file.name | |
| elif video_source == "YouTube" and youtube_url: | |
| video_path = download_youtube_video(youtube_url) | |
| else: | |
| return "No valid input provided.", None | |
| # Extract Audio and Transcribe | |
| audio_path = extract_audio(video_path) | |
| transcription = transcribe_audio(audio_path) | |
| return transcription, video_path | |
| def summarize_and_translate(text, lang): | |
| summary = summarize_text(text) | |
| translation = translate_text(summary, lang) | |
| return summary, translation | |
| with gr.Blocks(css=""" | |
| .glass-card { | |
| background: rgba(255, 255, 255, 0.1); | |
| backdrop-filter: blur(10px); | |
| border: 1px solid rgba(255, 255, 255, 0.2); | |
| border-radius: 20px; | |
| padding: 2rem; | |
| transition: transform 0.3s ease, box-shadow 0.3s ease; | |
| } | |
| .btn-blue { | |
| background-color: #007BFF; | |
| color: white; | |
| border-radius: 10px; | |
| } | |
| .btn-blue:hover { | |
| background-color: #0056b3; | |
| } | |
| .gradient-font { | |
| background: linear-gradient(90deg, #ff7f50, #ff6347); | |
| -webkit-background-clip: text; | |
| color: transparent; | |
| } | |
| .gradio-container { | |
| background-color: black; | |
| color: white; | |
| } | |
| """) as app: | |
| gr.Markdown("<h1 class='gradient-font'>🎥 Smart Video-to-Text Summarization App</h1>") | |
| with gr.Row(): | |
| video_source = gr.Radio(["Local Video", "YouTube"], label="Choose Video Source", value="Local Video") | |
| with gr.Column(visible=True) as video_section: | |
| # Display tabs based on radio selection | |
| local_video_tab = gr.File(label="Upload Local Video File", type="filepath", visible=True) | |
| youtube_video_tab = gr.Textbox(label="YouTube URL", visible=False) | |
| def switch_tabs(source): | |
| if source == "Local Video": | |
| local_video_tab.visible = True | |
| youtube_video_tab.visible = False | |
| elif source == "YouTube": | |
| local_video_tab.visible = False | |
| youtube_video_tab.visible = True | |
| video_source.change(switch_tabs, inputs=video_source) | |
| video_display = gr.Video(label="Processed Video", visible=False) | |
| process_button = gr.Button("🚀 Process Video", elem_classes=["btn-blue"]) | |
| transcription_output = gr.Textbox(label="Transcription", interactive=False) | |
| # Show video and process it after selection | |
| def display_video(video_path): | |
| return video_path # Display the video selected/processed | |
| process_button.click(process_video, inputs=[local_video_tab, youtube_video_tab, video_source], outputs=[transcription_output, video_display]) | |
| process_button.click(display_video, inputs=[video_display], outputs=video_display) | |
| summarize_button = gr.Button("📝 Summarize Text", elem_classes=["btn-blue"]) | |
| summary_output = gr.Textbox(label="Summary", interactive=False) | |
| translate_button = gr.Button("🌍 Translate Summary", elem_classes=["btn-blue"]) | |
| language_dropdown = gr.Dropdown(choices=["es", "fr", "de", "zh"], label="Select Translation Language") | |
| translated_output = gr.Textbox(label="Translated Summary", interactive=False) | |
| summarize_button.click(summarize_text, inputs=transcription_output, outputs=summary_output) | |
| translate_button.click(summarize_and_translate, inputs=[transcription_output, language_dropdown], outputs=[summary_output, translated_output]) | |
| app.launch() | |