hari7261 commited on
Commit
fd4d7fa
ยท
verified ยท
1 Parent(s): 3b7c58b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +409 -0
app.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import os
4
+ import base64
5
+ import io
6
+ import json
7
+ from PIL import Image
8
+ import numpy as np
9
+
10
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
11
+
12
+ HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
13
+
14
+ # โ”€โ”€ Model IDs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
15
+ TEXT_MODEL = "Qwen/Qwen2.5-0.5B-Instruct" # tiny chat LLM
16
+ IMAGE_MODEL = "black-forest-labs/FLUX.1-schnell" # fast image gen
17
+ AUDIO_MODEL = "facebook/musicgen-small" # audio/music gen
18
+ VIDEO_MODEL = "ali-vilab/text-to-video-ms-1.7b" # text-to-video
19
+ MULTIMODAL_MODEL = "Salesforce/blip2-opt-2.7b" # image+text (VQA / caption)
20
+
21
+ # โ”€โ”€ HF Inference API helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
22
+
23
+ def query_text(messages: list, system: str = "") -> str:
24
+ """Chat via HF serverless inference (text generation)."""
25
+ url = f"https://api-inference.huggingface.co/models/{TEXT_MODEL}/v1/chat/completions"
26
+ payload = {
27
+ "model": TEXT_MODEL,
28
+ "messages": messages,
29
+ "max_tokens": 1024,
30
+ "temperature": 0.7,
31
+ }
32
+ r = requests.post(url, headers=HEADERS, json=payload, timeout=60)
33
+ if r.status_code == 200:
34
+ return r.json()["choices"][0]["message"]["content"]
35
+ # fallback plain text-generation endpoint
36
+ url2 = f"https://api-inference.huggingface.co/models/{TEXT_MODEL}"
37
+ prompt = "\n".join(m["content"] for m in messages)
38
+ r2 = requests.post(url2, headers=HEADERS, json={"inputs": prompt, "parameters": {"max_new_tokens": 512}}, timeout=60)
39
+ if r2.status_code == 200:
40
+ result = r2.json()
41
+ if isinstance(result, list):
42
+ return result[0].get("generated_text", str(result))
43
+ return str(result)
44
+ return f"โš ๏ธ Error {r.status_code}: {r.text[:300]}"
45
+
46
+
47
+ def query_image(prompt: str) -> Image.Image | str:
48
+ """Generate image via HF inference."""
49
+ url = f"https://api-inference.huggingface.co/models/{IMAGE_MODEL}"
50
+ r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=120)
51
+ if r.status_code == 200:
52
+ return Image.open(io.BytesIO(r.content))
53
+ return f"โš ๏ธ Error {r.status_code}: {r.text[:300]}"
54
+
55
+
56
+ def query_audio(prompt: str) -> str | None:
57
+ """Generate audio via HF inference, returns a temp file path."""
58
+ url = f"https://api-inference.huggingface.co/models/{AUDIO_MODEL}"
59
+ r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=120)
60
+ if r.status_code == 200:
61
+ path = "/tmp/generated_audio.wav"
62
+ with open(path, "wb") as f:
63
+ f.write(r.content)
64
+ return path
65
+ return None
66
+
67
+
68
+ def query_video(prompt: str) -> str | None:
69
+ """Generate short video via HF inference, returns a temp file path."""
70
+ url = f"https://api-inference.huggingface.co/models/{VIDEO_MODEL}"
71
+ r = requests.post(url, headers=HEADERS, json={"inputs": prompt}, timeout=180)
72
+ if r.status_code == 200:
73
+ path = "/tmp/generated_video.mp4"
74
+ with open(path, "wb") as f:
75
+ f.write(r.content)
76
+ return path
77
+ return None
78
+
79
+
80
+ def query_multimodal(image: Image.Image | None, text: str) -> tuple[str, Image.Image | None]:
81
+ """VQA / image captioning with BLIP-2. Also returns the original image."""
82
+ if image is None:
83
+ # No image โ†’ just caption with a placeholder or echo
84
+ return "Please upload an image for multimodal analysis.", None
85
+ # Encode image to base64
86
+ buf = io.BytesIO()
87
+ image.save(buf, format="PNG")
88
+ b64 = base64.b64encode(buf.getvalue()).decode()
89
+ url = f"https://api-inference.huggingface.co/models/{MULTIMODAL_MODEL}"
90
+ payload = {"inputs": {"image": b64, "question": text or "Describe this image in detail."}}
91
+ r = requests.post(url, headers=HEADERS, json=payload, timeout=90)
92
+ if r.status_code == 200:
93
+ result = r.json()
94
+ if isinstance(result, list):
95
+ answer = result[0].get("answer", str(result[0]))
96
+ elif isinstance(result, dict):
97
+ answer = result.get("answer", str(result))
98
+ else:
99
+ answer = str(result)
100
+ return answer, image
101
+ return f"โš ๏ธ Error {r.status_code}: {r.text[:300]}", image
102
+
103
+
104
+ # โ”€โ”€ Chat state helper โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
105
+
106
+ def chat_respond(user_msg: str, history: list) -> tuple[str, list]:
107
+ if not user_msg.strip():
108
+ return "", history
109
+ messages = [{"role": "system", "content": "You are NEXUS, an advanced AI assistant. Be helpful, thorough, and thoughtful."}]
110
+ for human, bot in history:
111
+ messages.append({"role": "user", "content": human})
112
+ messages.append({"role": "assistant", "content": bot})
113
+ messages.append({"role": "user", "content": user_msg})
114
+ reply = query_text(messages)
115
+ history.append((user_msg, reply))
116
+ return "", history
117
+
118
+
119
+ def gen_image(prompt: str, progress=gr.Progress()):
120
+ progress(0.2, desc="Connecting to FLUXโ€ฆ")
121
+ result = query_image(prompt)
122
+ progress(1.0, desc="Done")
123
+ if isinstance(result, str): # error string
124
+ return None, result
125
+ return result, "โœ… Image generated successfully!"
126
+
127
+
128
+ def gen_audio(prompt: str, progress=gr.Progress()):
129
+ progress(0.2, desc="Composing musicโ€ฆ")
130
+ path = query_audio(prompt)
131
+ progress(1.0, desc="Done")
132
+ if path:
133
+ return path, "โœ… Audio generated!"
134
+ return None, "โš ๏ธ Audio generation failed. The model may be loading โ€“ try again in a moment."
135
+
136
+
137
+ def gen_video(prompt: str, progress=gr.Progress()):
138
+ progress(0.2, desc="Rendering framesโ€ฆ")
139
+ path = query_video(prompt)
140
+ progress(1.0, desc="Done")
141
+ if path:
142
+ return path, "โœ… Video generated!"
143
+ return None, "โš ๏ธ Video generation failed. The model may be loading โ€“ try again in a moment."
144
+
145
+
146
+ def gen_multimodal(image, question: str, progress=gr.Progress()):
147
+ progress(0.3, desc="Analyzing imageโ€ฆ")
148
+ answer, img_out = query_multimodal(image, question)
149
+ progress(1.0, desc="Done")
150
+ return answer, img_out
151
+
152
+
153
+ # โ”€โ”€ CSS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
154
+ CSS = """
155
+ @import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:ital,wght@0,300;0,500;0,700;1,300&display=swap');
156
+
157
+ :root {
158
+ --bg: #0a0a0f;
159
+ --panel: #111118;
160
+ --border: #1e1e2e;
161
+ --accent: #7c3aed;
162
+ --accent2: #06b6d4;
163
+ --accent3: #f59e0b;
164
+ --text: #e2e8f0;
165
+ --muted: #64748b;
166
+ --success: #10b981;
167
+ --danger: #ef4444;
168
+ }
169
+
170
+ body, .gradio-container { background: var(--bg) !important; font-family: 'DM Sans', sans-serif; color: var(--text); }
171
+
172
+ /* Header */
173
+ .nexus-header {
174
+ text-align: center;
175
+ padding: 2.5rem 1rem 1rem;
176
+ background: linear-gradient(135deg, #0a0a0f 0%, #130d22 50%, #0a0a0f 100%);
177
+ border-bottom: 1px solid var(--border);
178
+ margin-bottom: 1.5rem;
179
+ }
180
+ .nexus-title {
181
+ font-family: 'Space Mono', monospace;
182
+ font-size: clamp(2rem, 6vw, 3.5rem);
183
+ font-weight: 700;
184
+ background: linear-gradient(90deg, var(--accent) 0%, var(--accent2) 50%, var(--accent3) 100%);
185
+ -webkit-background-clip: text;
186
+ -webkit-text-fill-color: transparent;
187
+ letter-spacing: -0.02em;
188
+ margin: 0;
189
+ }
190
+ .nexus-sub {
191
+ color: var(--muted);
192
+ font-size: 0.95rem;
193
+ margin-top: 0.5rem;
194
+ letter-spacing: 0.08em;
195
+ text-transform: uppercase;
196
+ }
197
+ .badge-row { display: flex; justify-content: center; gap: 0.5rem; flex-wrap: wrap; margin-top: 1rem; }
198
+ .badge {
199
+ font-family: 'Space Mono', monospace;
200
+ font-size: 0.65rem;
201
+ padding: 0.25rem 0.75rem;
202
+ border-radius: 999px;
203
+ border: 1px solid;
204
+ letter-spacing: 0.05em;
205
+ }
206
+ .badge-chat { border-color: var(--accent); color: var(--accent); }
207
+ .badge-img { border-color: var(--accent2); color: var(--accent2); }
208
+ .badge-audio { border-color: var(--accent3); color: var(--accent3); }
209
+ .badge-video { border-color: var(--success); color: var(--success); }
210
+ .badge-mm { border-color: var(--danger); color: var(--danger); }
211
+
212
+ /* Tabs */
213
+ .tab-nav { border-bottom: 1px solid var(--border) !important; }
214
+ .tab-nav button {
215
+ font-family: 'Space Mono', monospace !important;
216
+ font-size: 0.8rem !important;
217
+ letter-spacing: 0.05em !important;
218
+ color: var(--muted) !important;
219
+ padding: 0.75rem 1.25rem !important;
220
+ border-bottom: 2px solid transparent !important;
221
+ transition: all 0.2s !important;
222
+ }
223
+ .tab-nav button.selected {
224
+ color: var(--accent2) !important;
225
+ border-bottom-color: var(--accent2) !important;
226
+ }
227
+
228
+ /* Chatbot */
229
+ .chatbot .message.user { background: rgba(124,58,237,0.15) !important; border-left: 3px solid var(--accent) !important; }
230
+ .chatbot .message.bot { background: rgba(6,182,212,0.08) !important; border-left: 3px solid var(--accent2) !important; }
231
+
232
+ /* Inputs */
233
+ textarea, input[type=text] {
234
+ background: var(--panel) !important;
235
+ border: 1px solid var(--border) !important;
236
+ color: var(--text) !important;
237
+ border-radius: 8px !important;
238
+ font-family: 'DM Sans', sans-serif !important;
239
+ }
240
+ textarea:focus, input:focus {
241
+ border-color: var(--accent2) !important;
242
+ box-shadow: 0 0 0 2px rgba(6,182,212,0.15) !important;
243
+ }
244
+
245
+ /* Buttons */
246
+ .btn-primary {
247
+ background: linear-gradient(135deg, var(--accent) 0%, var(--accent2) 100%) !important;
248
+ color: #fff !important;
249
+ font-family: 'Space Mono', monospace !important;
250
+ font-size: 0.8rem !important;
251
+ letter-spacing: 0.05em !important;
252
+ border: none !important;
253
+ border-radius: 8px !important;
254
+ padding: 0.65rem 1.5rem !important;
255
+ cursor: pointer !important;
256
+ transition: opacity 0.2s !important;
257
+ }
258
+ .btn-primary:hover { opacity: 0.85 !important; }
259
+
260
+ /* Status box */
261
+ .status-box {
262
+ background: var(--panel);
263
+ border: 1px solid var(--border);
264
+ border-radius: 8px;
265
+ padding: 0.75rem 1rem;
266
+ font-family: 'Space Mono', monospace;
267
+ font-size: 0.75rem;
268
+ color: var(--muted);
269
+ min-height: 2.5rem;
270
+ }
271
+
272
+ /* Section labels */
273
+ .section-label {
274
+ font-family: 'Space Mono', monospace;
275
+ font-size: 0.7rem;
276
+ letter-spacing: 0.1em;
277
+ color: var(--muted);
278
+ text-transform: uppercase;
279
+ margin-bottom: 0.4rem;
280
+ }
281
+ """
282
+
283
+ # โ”€โ”€ Build UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
284
+
285
+ with gr.Blocks(css=CSS, title="NEXUS AI Studio", theme=gr.themes.Base()) as demo:
286
+
287
+ # Header
288
+ gr.HTML("""
289
+ <div class="nexus-header">
290
+ <h1 class="nexus-title">โฌก NEXUS AI STUDIO</h1>
291
+ <p class="nexus-sub">Multi-Modal Intelligence Platform ยท Powered by Hugging Face</p>
292
+ <div class="badge-row">
293
+ <span class="badge badge-chat">๐Ÿ’ฌ CHAT</span>
294
+ <span class="badge badge-img">๐Ÿ–ผ IMAGE GEN</span>
295
+ <span class="badge badge-audio">๐ŸŽต AUDIO GEN</span>
296
+ <span class="badge badge-video">๐ŸŽฌ VIDEO GEN</span>
297
+ <span class="badge badge-mm">๐Ÿ”ฎ MULTIMODAL</span>
298
+ </div>
299
+ </div>
300
+ """)
301
+
302
+ with gr.Tabs(elem_classes="tab-nav"):
303
+
304
+ # โ”€โ”€ TAB 1: Chat โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
305
+ with gr.Tab("๐Ÿ’ฌ Chat"):
306
+ gr.HTML('<p class="section-label">Conversational AI ยท Qwen 2.5</p>')
307
+ chatbot = gr.Chatbot(
308
+ label="",
309
+ height=460,
310
+ bubble_full_width=False,
311
+ elem_classes="chatbot",
312
+ show_label=False,
313
+ avatar_images=(None, "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"),
314
+ )
315
+ with gr.Row():
316
+ chat_in = gr.Textbox(
317
+ placeholder="Ask me anything โ€” code, math, science, creative writingโ€ฆ",
318
+ show_label=False,
319
+ scale=8,
320
+ lines=1,
321
+ )
322
+ send_btn = gr.Button("SEND โ†’", elem_classes="btn-primary", scale=1)
323
+ clear_btn = gr.Button("Clear conversation", variant="secondary", size="sm")
324
+
325
+ send_btn.click(chat_respond, [chat_in, chatbot], [chat_in, chatbot])
326
+ chat_in.submit(chat_respond, [chat_in, chatbot], [chat_in, chatbot])
327
+ clear_btn.click(lambda: ([], ""), None, [chatbot, chat_in])
328
+
329
+ # โ”€โ”€ TAB 2: Image Generation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
330
+ with gr.Tab("๐Ÿ–ผ Image Gen"):
331
+ gr.HTML('<p class="section-label">Text โ†’ Image ยท FLUX.1-schnell</p>')
332
+ with gr.Row():
333
+ with gr.Column(scale=1):
334
+ img_prompt = gr.Textbox(
335
+ label="Prompt",
336
+ placeholder="A neon-lit cyberpunk city at midnight, rain reflections, ultra-detailedโ€ฆ",
337
+ lines=4,
338
+ )
339
+ img_btn = gr.Button("โœฆ GENERATE IMAGE", elem_classes="btn-primary")
340
+ img_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False)
341
+ with gr.Column(scale=1):
342
+ img_out = gr.Image(label="Generated Image", show_label=False)
343
+
344
+ img_btn.click(gen_image, [img_prompt], [img_out, img_status])
345
+
346
+ # โ”€โ”€ TAB 3: Audio Generation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
347
+ with gr.Tab("๐ŸŽต Audio Gen"):
348
+ gr.HTML('<p class="section-label">Text โ†’ Music ยท MusicGen Small</p>')
349
+ with gr.Row():
350
+ with gr.Column(scale=1):
351
+ audio_prompt = gr.Textbox(
352
+ label="Describe the music",
353
+ placeholder="Lo-fi hip hop beat, warm piano chords, gentle rain ambiance, 80 BPMโ€ฆ",
354
+ lines=4,
355
+ )
356
+ audio_btn = gr.Button("โ™ช GENERATE AUDIO", elem_classes="btn-primary")
357
+ audio_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False)
358
+ with gr.Column(scale=1):
359
+ audio_out = gr.Audio(label="Generated Audio", show_label=True)
360
+
361
+ audio_btn.click(gen_audio, [audio_prompt], [audio_out, audio_status])
362
+
363
+ # โ”€โ”€ TAB 4: Video Generation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
364
+ with gr.Tab("๐ŸŽฌ Video Gen"):
365
+ gr.HTML('<p class="section-label">Text โ†’ Video ยท ModelScope 1.7B</p>')
366
+ with gr.Row():
367
+ with gr.Column(scale=1):
368
+ video_prompt = gr.Textbox(
369
+ label="Describe the video",
370
+ placeholder="A lone astronaut walking on Mars at sunset, dust swirling around bootsโ€ฆ",
371
+ lines=4,
372
+ )
373
+ video_btn = gr.Button("โ–ถ GENERATE VIDEO", elem_classes="btn-primary")
374
+ video_status = gr.Textbox(label="Status", elem_classes="status-box", show_label=False)
375
+ with gr.Column(scale=1):
376
+ video_out = gr.Video(label="Generated Video", show_label=True)
377
+
378
+ video_btn.click(gen_video, [video_prompt], [video_out, video_status])
379
+
380
+ # โ”€โ”€ TAB 5: Multimodal โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
381
+ with gr.Tab("๐Ÿ”ฎ Multimodal"):
382
+ gr.HTML('<p class="section-label">Image + Text โ†’ Answer ยท BLIP-2</p>')
383
+ with gr.Row():
384
+ with gr.Column(scale=1):
385
+ mm_image = gr.Image(
386
+ label="Upload an Image",
387
+ type="pil",
388
+ show_label=True,
389
+ )
390
+ mm_question = gr.Textbox(
391
+ label="Your question about the image",
392
+ placeholder="What is happening in this image? What objects do you see?",
393
+ lines=3,
394
+ )
395
+ mm_btn = gr.Button("๐Ÿ”ฎ ANALYZE", elem_classes="btn-primary")
396
+ with gr.Column(scale=1):
397
+ mm_answer = gr.Textbox(label="AI Answer", lines=6, show_label=True)
398
+ mm_img_out = gr.Image(label="Processed Image", show_label=True)
399
+
400
+ mm_btn.click(gen_multimodal, [mm_image, mm_question], [mm_answer, mm_img_out])
401
+
402
+ # Footer
403
+ gr.HTML("""
404
+ <div style="text-align:center; padding: 1.5rem; border-top: 1px solid #1e1e2e; margin-top: 1.5rem; color: #475569; font-size: 0.75rem; font-family: 'Space Mono', monospace; letter-spacing: 0.05em;">
405
+ NEXUS AI STUDIO ยท Built with โค on Hugging Face Spaces ยท Models: Qwen2.5 ยท FLUX.1-schnell ยท MusicGen ยท ModelScope ยท BLIP-2
406
+ </div>
407
+ """)
408
+
409
+ demo.launch()