| import gradio as gr |
| import os |
| import cv2 |
| import ffmpeg |
| import numpy as np |
| from gtts import gTTS |
| from diffusers import StableDiffusionPipeline |
| import torch |
|
|
| |
| os.makedirs("generated_images", exist_ok=True) |
| os.makedirs("output", exist_ok=True) |
|
|
| |
| model_id = "runwayml/stable-diffusion-v1-5" |
| pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float32) |
| pipe.safety_checker = None |
|
|
| |
| global_audio_path = None |
|
|
|
|
| |
| def text_to_speech(script_file): |
| if script_file is None: |
| return None, "β οΈ Please upload an Urdu script file!" |
| |
| with open(script_file.name, "r", encoding="utf-8") as f: |
| urdu_text = f.read().strip() |
|
|
| audio_path = "output/urdu_audio.mp3" |
| tts = gTTS(text=urdu_text, lang="ur") |
| tts.save(audio_path) |
|
|
| global global_audio_path |
| global_audio_path = audio_path |
|
|
| return audio_path, "β
Audio generated successfully!" |
|
|
|
|
| |
| def generate_images(script_file, num_images): |
| if script_file is None: |
| return None, "β οΈ Please upload a script file!" |
| |
| num_images = int(num_images) |
|
|
| with open(script_file.name, "r", encoding="utf-8") as f: |
| text_lines = f.read().split("\n\n") |
| |
| image_paths = [] |
| for i, scene in enumerate(text_lines[:num_images]): |
| prompt = f"Scene {i+1}: {scene.strip()}" |
| image = pipe(prompt).images[0] |
| image_path = f"generated_images/image_{i+1}.png" |
| image.save(image_path) |
| image_paths.append(image_path) |
| |
| return image_paths, "β
Images generated successfully!" |
|
|
|
|
| |
| def images_to_video(image_paths, fps=1): |
| if not image_paths: |
| return None |
| |
| frame = cv2.imread(image_paths[0]) |
| height, width, layers = frame.shape |
|
|
| video_path = "output/generated_video.mp4" |
| fourcc = cv2.VideoWriter_fourcc(*"mp4v") |
| video = cv2.VideoWriter(video_path, fourcc, fps, (width, height)) |
|
|
| for image in image_paths: |
| frame = cv2.imread(image) |
| video.write(frame) |
| |
| video.release() |
| return video_path |
|
|
|
|
| |
| def merge_audio_video(video_path): |
| if global_audio_path is None: |
| return None, "β οΈ No audio found! Please generate Urdu TTS first." |
| |
| final_video_path = "output/final_video.mp4" |
| |
| video = ffmpeg.input(video_path) |
| audio = ffmpeg.input(global_audio_path) |
| |
| ffmpeg.output(video, audio, final_video_path, vcodec="libx264", acodec="aac").run(overwrite_output=True) |
| |
| return final_video_path, "β
Video with Urdu voice-over generated successfully!" |
|
|
|
|
| |
| def generate_final_video(script_file, num_images): |
| if script_file is None: |
| return None, "β οΈ Please upload a script file for image generation!" |
| |
| image_paths, img_msg = generate_images(script_file, num_images) |
| if not image_paths: |
| return None, img_msg |
|
|
| video_path = images_to_video(image_paths, fps=1) |
| final_video_path, vid_msg = merge_audio_video(video_path) |
| |
| return final_video_path, vid_msg |
|
|
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown("## π€ Urdu Text-to-Speech & AI Video Generator") |
| |
| |
| with gr.Tab("π£οΈ Urdu Text-to-Speech"): |
| script_file_tts = gr.File(label="π Upload Urdu Script for Audio", type="filepath") |
| generate_audio_btn = gr.Button("ποΈ Generate Audio", variant="primary") |
| audio_output = gr.Audio(label="π Urdu Speech Output", interactive=False) |
| audio_status = gr.Textbox(label="βΉοΈ Status", interactive=False) |
| |
| generate_audio_btn.click(text_to_speech, inputs=[script_file_tts], outputs=[audio_output, audio_status]) |
| |
| |
| with gr.Tab("π₯ AI Video Generator"): |
| script_file_video = gr.File(label="π Upload Urdu Script for Images", type="filepath") |
| num_images = gr.Number(label="πΈ Number of Scenes", value=3, minimum=1, maximum=10, step=1) |
| generate_video_btn = gr.Button("π¬ Generate Video", variant="primary") |
| video_output = gr.Video(label="ποΈ Generated Video") |
| video_status = gr.Textbox(label="βΉοΈ Status", interactive=False) |
| |
| generate_video_btn.click(generate_final_video, inputs=[script_file_video, num_images], outputs=[video_output, video_status]) |
|
|
| demo.launch() |