GLAkavya commited on
Commit
7112db6
ยท
verified ยท
1 Parent(s): fef93c0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +206 -0
app.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import tempfile
4
+ import torch
5
+ import gradio as gr
6
+ import google.generativeai as genai
7
+ from PIL import Image
8
+ from huggingface_hub import login
9
+ from diffusers import StableVideoDiffusionPipeline
10
+ from diffusers.utils import export_to_video
11
+
12
+ # โ”€โ”€ ENV SETUP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
13
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "")
14
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
15
+
16
+ if HF_TOKEN:
17
+ login(token=HF_TOKEN)
18
+
19
+ if GEMINI_API_KEY:
20
+ genai.configure(api_key=GEMINI_API_KEY)
21
+
22
+ # โ”€โ”€ LOAD SVD MODEL ONCE โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
23
+ print("โณ Loading Stable Video Diffusion โ€ฆ")
24
+ svd_pipe = StableVideoDiffusionPipeline.from_pretrained(
25
+ "stabilityai/stable-video-diffusion-img2vid-xt",
26
+ torch_dtype=torch.float16,
27
+ variant="fp16",
28
+ )
29
+ svd_pipe.enable_model_cpu_offload() # saves VRAM
30
+ print("โœ… SVD model ready.")
31
+
32
+ # โ”€โ”€ GEMINI HELPER โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
33
+ GEMINI_SYSTEM = (
34
+ "You are an expert ad copywriter. "
35
+ "Always respond with ONLY valid JSON โ€” no markdown, no extra text. "
36
+ "Keys required: hook, script, cta, video_prompt."
37
+ )
38
+
39
+ def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: str) -> dict:
40
+ """Send image + context to Gemini 2.5 Flash and get structured ad copy."""
41
+ model = genai.GenerativeModel(
42
+ model_name="gemini-2.5-flash",
43
+ system_instruction=GEMINI_SYSTEM,
44
+ )
45
+
46
+ lang_map = {
47
+ "English": "Write everything in English.",
48
+ "Hindi": "เคธเคฌ เค•เฅเค› เคนเคฟเค‚เคฆเฅ€ เคฎเฅ‡เค‚ เคฒเคฟเค–เฅ‡เค‚เฅค",
49
+ "Hinglish": "Write in Hinglish (mix of Hindi and English).",
50
+ }
51
+ style_map = {
52
+ "Fun": "tone: playful, witty, youthful",
53
+ "Premium": "tone: luxurious, sophisticated, aspirational",
54
+ "Energetic": "tone: high-energy, bold, action-packed",
55
+ }
56
+
57
+ prompt = f"""
58
+ Analyze this product image and create a compelling social-media video ad.
59
+
60
+ {f'Product description: {user_desc}' if user_desc.strip() else ''}
61
+ Language rule : {lang_map.get(language, lang_map['English'])}
62
+ Style rule : {style_map.get(style, style_map['Fun'])}
63
+
64
+ Return ONLY this JSON structure:
65
+ {{
66
+ "hook": "attention-grabbing opening line (1โ€“2 sentences)",
67
+ "script": "full 15โ€“20 second voiceover script",
68
+ "cta": "call-to-action phrase",
69
+ "video_prompt": "detailed cinematic advertising scene description for video generation"
70
+ }}
71
+ """
72
+ # Convert PIL โ†’ bytes for Gemini
73
+ import io
74
+ buf = io.BytesIO()
75
+ pil_image.save(buf, format="JPEG")
76
+ image_bytes = buf.getvalue()
77
+
78
+ response = model.generate_content(
79
+ [
80
+ {"mime_type": "image/jpeg", "data": image_bytes},
81
+ prompt,
82
+ ]
83
+ )
84
+
85
+ raw = response.text.strip()
86
+ # Strip possible markdown fences
87
+ if raw.startswith("```"):
88
+ raw = raw.split("```")[1]
89
+ if raw.startswith("json"):
90
+ raw = raw[4:]
91
+ raw = raw.strip()
92
+
93
+ return json.loads(raw)
94
+
95
+
96
+ # โ”€โ”€ VIDEO GENERATION โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
97
+ def generate_video(pil_image: Image.Image) -> str:
98
+ """Run SVD on the product image and return path to .mp4 file."""
99
+ # SVD works best with 1024ร—576
100
+ img = pil_image.convert("RGB").resize((1024, 576))
101
+
102
+ frames = svd_pipe(
103
+ image=img,
104
+ num_frames=14,
105
+ fps=7,
106
+ decode_chunk_size=4,
107
+ generator=torch.manual_seed(42),
108
+ ).frames[0]
109
+
110
+ tmp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
111
+ export_to_video(frames, tmp_file.name, fps=7)
112
+ return tmp_file.name
113
+
114
+
115
+ # โ”€โ”€ MAIN PIPELINE โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
116
+ def generate_ad(image, user_desc, language, style):
117
+ if image is None:
118
+ return None, "โš ๏ธ Please upload a product image.", "", ""
119
+
120
+ try:
121
+ pil_image = Image.fromarray(image) if not isinstance(image, Image.Image) else image
122
+ except Exception:
123
+ pil_image = image # already PIL
124
+
125
+ # Step 1 โ€” Gemini ad copy
126
+ try:
127
+ ad_data = call_gemini(pil_image, user_desc, language, style)
128
+ except Exception as e:
129
+ return None, f"โŒ Gemini error: {e}", "", ""
130
+
131
+ hook = ad_data.get("hook", "")
132
+ script = ad_data.get("script", "")
133
+ cta = ad_data.get("cta", "")
134
+ video_prompt = ad_data.get("video_prompt", "")
135
+
136
+ # Step 2 โ€” SVD video
137
+ try:
138
+ video_path = generate_video(pil_image)
139
+ except Exception as e:
140
+ return None, hook, f"โŒ Video error: {e}\n\n{script}", cta
141
+
142
+ # Step 3 โ€” Return everything
143
+ return video_path, hook, script, cta
144
+
145
+
146
+ # โ”€โ”€ GRADIO UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
147
+ css = """
148
+ #title { text-align: center; font-size: 2.2rem; font-weight: 800; margin-bottom: 0.2rem; }
149
+ #sub { text-align: center; color: #888; margin-bottom: 1.5rem; }
150
+ .card { border-radius: 12px; padding: 1rem; background: #1a1a2e; }
151
+ """
152
+
153
+ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
154
+
155
+ gr.Markdown("# ๐ŸŽฌ AI Reel Generator", elem_id="title")
156
+ gr.Markdown("Upload a product image โ†’ get a cinematic ad video + copy in seconds.", elem_id="sub")
157
+
158
+ with gr.Row():
159
+ # โ”€โ”€ LEFT COLUMN โ€” inputs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
160
+ with gr.Column(scale=1):
161
+ image_input = gr.Image(
162
+ label="๐Ÿ“ธ Upload Product Image",
163
+ type="pil",
164
+ height=300,
165
+ )
166
+ desc_input = gr.Textbox(
167
+ label="๐Ÿ“ Describe your product (optional)",
168
+ placeholder="e.g. Organic honey sourced from Himalayan farms โ€ฆ",
169
+ lines=3,
170
+ )
171
+ with gr.Row():
172
+ lang_dropdown = gr.Dropdown(
173
+ choices=["English", "Hindi", "Hinglish"],
174
+ value="English",
175
+ label="๐ŸŒ Language",
176
+ )
177
+ style_dropdown = gr.Dropdown(
178
+ choices=["Fun", "Premium", "Energetic"],
179
+ value="Fun",
180
+ label="๐ŸŽจ Style",
181
+ )
182
+ gen_btn = gr.Button("๐Ÿš€ Generate Ad", variant="primary", size="lg")
183
+
184
+ # โ”€โ”€ RIGHT COLUMN โ€” outputs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
185
+ with gr.Column(scale=1):
186
+ video_out = gr.Video(label="๐ŸŽฅ Generated Video", height=300)
187
+ hook_out = gr.Textbox(label="โšก Hook", lines=2, interactive=False)
188
+ script_out = gr.Textbox(label="๐Ÿ“„ Script", lines=5, interactive=False)
189
+ cta_out = gr.Textbox(label="๐ŸŽฏ CTA", lines=1, interactive=False)
190
+
191
+ gen_btn.click(
192
+ fn=generate_ad,
193
+ inputs=[image_input, desc_input, lang_dropdown, style_dropdown],
194
+ outputs=[video_out, hook_out, script_out, cta_out],
195
+ )
196
+
197
+ gr.Markdown(
198
+ "---\n"
199
+ "**How it works:** "
200
+ "1๏ธโƒฃ Gemini 2.5 Flash reads your image and writes ad copy + a cinematic prompt. "
201
+ "2๏ธโƒฃ Stable Video Diffusion turns your image into a short video. "
202
+ "3๏ธโƒฃ You get a ready-to-post reel!"
203
+ )
204
+
205
+ if __name__ == "__main__":
206
+ demo.launch()