Backened commited on
Commit
da99713
Β·
verified Β·
1 Parent(s): 19a55e1

Create urdu_tts_video.py

Browse files
Files changed (1) hide show
  1. urdu_tts_video.py +135 -0
urdu_tts_video.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import cv2
4
+ import ffmpeg
5
+ import numpy as np
6
+ from gtts import gTTS
7
+ from diffusers import StableDiffusionPipeline
8
+ import torch
9
+
10
+ # Ensure required folders exist
11
+ os.makedirs("generated_images", exist_ok=True)
12
+ os.makedirs("output", exist_ok=True)
13
+
14
+ # Load Stable Diffusion for image generation
15
+ model_id = "runwayml/stable-diffusion-v1-5"
16
+ pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float32)
17
+ pipe.safety_checker = None # Disable safety checker
18
+
19
+ # Global variable to store generated TTS audio path
20
+ global_audio_path = None
21
+
22
+
23
+ ### πŸ—£οΈ TEXT-TO-SPEECH FUNCTION ###
24
+ def text_to_speech(script_file):
25
+ if script_file is None:
26
+ return None, "⚠️ Please upload an Urdu script file!"
27
+
28
+ with open(script_file.name, "r", encoding="utf-8") as f:
29
+ urdu_text = f.read().strip()
30
+
31
+ audio_path = "output/urdu_audio.mp3"
32
+ tts = gTTS(text=urdu_text, lang="ur")
33
+ tts.save(audio_path)
34
+
35
+ global global_audio_path
36
+ global_audio_path = audio_path
37
+
38
+ return audio_path, "βœ… Audio generated successfully!"
39
+
40
+
41
+ ### 🏞️ IMAGE GENERATION FUNCTION ###
42
+ def generate_images(script_file, num_images):
43
+ if script_file is None:
44
+ return None, "⚠️ Please upload a script file!"
45
+
46
+ num_images = int(num_images)
47
+
48
+ with open(script_file.name, "r", encoding="utf-8") as f:
49
+ text_lines = f.read().split("\n\n") # Splitting scenes by double newlines
50
+
51
+ image_paths = []
52
+ for i, scene in enumerate(text_lines[:num_images]):
53
+ prompt = f"Scene {i+1}: {scene.strip()}"
54
+ image = pipe(prompt).images[0]
55
+ image_path = f"generated_images/image_{i+1}.png"
56
+ image.save(image_path)
57
+ image_paths.append(image_path)
58
+
59
+ return image_paths, "βœ… Images generated successfully!"
60
+
61
+
62
+ ### πŸŽ₯ VIDEO CREATION FUNCTION ###
63
+ def images_to_video(image_paths, fps=1):
64
+ if not image_paths:
65
+ return None
66
+
67
+ frame = cv2.imread(image_paths[0])
68
+ height, width, layers = frame.shape
69
+
70
+ video_path = "output/generated_video.mp4"
71
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
72
+ video = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
73
+
74
+ for image in image_paths:
75
+ frame = cv2.imread(image)
76
+ video.write(frame)
77
+
78
+ video.release()
79
+ return video_path
80
+
81
+
82
+ ### πŸ”Š AUDIO-VIDEO MERGE FUNCTION ###
83
+ def merge_audio_video(video_path):
84
+ if global_audio_path is None:
85
+ return None, "⚠️ No audio found! Please generate Urdu TTS first."
86
+
87
+ final_video_path = "output/final_video.mp4"
88
+
89
+ video = ffmpeg.input(video_path)
90
+ audio = ffmpeg.input(global_audio_path)
91
+
92
+ ffmpeg.output(video, audio, final_video_path, vcodec="libx264", acodec="aac").run(overwrite_output=True)
93
+
94
+ return final_video_path, "βœ… Video with Urdu voice-over generated successfully!"
95
+
96
+
97
+ ### 🎬 FINAL VIDEO GENERATION PIPELINE ###
98
+ def generate_final_video(script_file, num_images):
99
+ if script_file is None:
100
+ return None, "⚠️ Please upload a script file for image generation!"
101
+
102
+ image_paths, img_msg = generate_images(script_file, num_images)
103
+ if not image_paths:
104
+ return None, img_msg
105
+
106
+ video_path = images_to_video(image_paths, fps=1)
107
+ final_video_path, vid_msg = merge_audio_video(video_path)
108
+
109
+ return final_video_path, vid_msg
110
+
111
+
112
+ ### πŸš€ GRADIO UI ###
113
+ with gr.Blocks() as demo:
114
+ gr.Markdown("## 🎀 Urdu Text-to-Speech & AI Video Generator")
115
+
116
+ # TTS Section
117
+ with gr.Tab("πŸ—£οΈ Urdu Text-to-Speech"):
118
+ script_file_tts = gr.File(label="πŸ“‚ Upload Urdu Script for Audio", type="filepath")
119
+ generate_audio_btn = gr.Button("πŸŽ™οΈ Generate Audio", variant="primary")
120
+ audio_output = gr.Audio(label="πŸ”Š Urdu Speech Output", interactive=False)
121
+ audio_status = gr.Textbox(label="ℹ️ Status", interactive=False)
122
+
123
+ generate_audio_btn.click(text_to_speech, inputs=[script_file_tts], outputs=[audio_output, audio_status])
124
+
125
+ # Video Generation Section
126
+ with gr.Tab("πŸŽ₯ AI Video Generator"):
127
+ script_file_video = gr.File(label="πŸ“‚ Upload Urdu Script for Images", type="filepath")
128
+ num_images = gr.Number(label="πŸ“Έ Number of Scenes", value=3, minimum=1, maximum=10, step=1)
129
+ generate_video_btn = gr.Button("🎬 Generate Video", variant="primary")
130
+ video_output = gr.Video(label="🎞️ Generated Video")
131
+ video_status = gr.Textbox(label="ℹ️ Status", interactive=False)
132
+
133
+ generate_video_btn.click(generate_final_video, inputs=[script_file_video, num_images], outputs=[video_output, video_status])
134
+
135
+ demo.launch()