waigyi's picture
Update app.py
8d2033e verified
import os
import gradio as gr
import whisper
from moviepy.editor import VideoFileClip, AudioFileClip
from gtts import gTTS
from google import genai
# -----------------------
# Gemini Client Setup
# -----------------------
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
# -----------------------
# Load Whisper Model
# -----------------------
whisper_model = whisper.load_model("base")
def process_video(video_file):
# Load video
video = VideoFileClip(video_file)
# Remove original audio (Safe Mode)
muted_video = video.without_audio()
muted_path = "muted_video.mp4"
muted_video.write_videofile(muted_path, codec="libx264")
# Extract audio
audio_path = "temp_audio.wav"
video.audio.write_audiofile(audio_path)
# Transcribe
result = whisper_model.transcribe(audio_path)
transcript = result["text"]
# Generate recap
prompt = f"""
Rewrite this transcript into a short Myanmar movie recap narration.
Make it commentary style and transformative.
Keep it under 3 minutes reading time.
Transcript:
{transcript}
"""
response = client.models.generate_content(
model="gemini-1.5-flash",
contents=prompt,
)
recap_text = response.text
# Convert to Myanmar voice
tts = gTTS(recap_text, lang="my")
voice_path = "recap_voice.mp3"
tts.save(voice_path)
# Merge new voice
new_audio = AudioFileClip(voice_path)
final_video = muted_video.set_audio(new_audio)
output_path = "final_recap_video.mp4"
final_video.write_videofile(output_path, codec="libx264", audio_codec="aac")
return recap_text, output_path
interface = gr.Interface(
fn=process_video,
inputs=gr.Video(label="Upload Video (Max 5 min)"),
outputs=[
gr.Textbox(label="Myanmar Recap Text"),
gr.Video(label="Final Recap Video")
],
title="Auto Movie Recap Tool (Safe Mode)",
)
interface.launch()