shukdevdattaEX commited on
Commit
514f4b0
Β·
verified Β·
1 Parent(s): ceca5d1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +726 -0
app.py ADDED
@@ -0,0 +1,726 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from openai import OpenAI
4
+ import base64
5
+ import json
6
+ from PIL import Image
7
+ import io
8
+
9
+ # Global variable to store the OpenAI client
10
+ client = None
11
+
12
+ def initialize_client(api_key):
13
+ """Initialize the OpenAI client with the provided API key"""
14
+ global client
15
+ if api_key and api_key.strip():
16
+ client = OpenAI(
17
+ base_url="https://openrouter.ai/api/v1",
18
+ api_key=api_key.strip(),
19
+ )
20
+ return True
21
+ return False
22
+
23
+ def encode_image(image):
24
+ """Encode image to base64 string"""
25
+ if image is None:
26
+ return None
27
+
28
+ # Convert to PIL Image if it's not already
29
+ if not isinstance(image, Image.Image):
30
+ image = Image.fromarray(image)
31
+
32
+ # Convert to RGB if needed
33
+ if image.mode != 'RGB':
34
+ image = image.convert('RGB')
35
+
36
+ # Save to bytes
37
+ buffered = io.BytesIO()
38
+ image.save(buffered, format="JPEG", quality=95)
39
+ img_bytes = buffered.getvalue()
40
+
41
+ # Encode to base64
42
+ return base64.b64encode(img_bytes).decode('utf-8')
43
+
44
+ def create_message_content(text, images=None):
45
+ """Create message content with text and optional images"""
46
+ content = []
47
+
48
+ # Add images first if provided
49
+ if images:
50
+ for img in images:
51
+ if img is not None:
52
+ img_base64 = encode_image(img)
53
+ if img_base64:
54
+ content.append({
55
+ "type": "image_url",
56
+ "image_url": {
57
+ "url": f"data:image/jpeg;base64,{img_base64}"
58
+ }
59
+ })
60
+
61
+ # Add text
62
+ if text and text.strip():
63
+ content.append({
64
+ "type": "text",
65
+ "text": text
66
+ })
67
+
68
+ return content if content else [{"type": "text", "text": "Please analyze this content."}]
69
+
70
+ def process_request(api_key, task_type, image1=None, image2=None, image3=None, image4=None, text_input="", enable_reasoning=False):
71
+ """Main processing function that handles all types of requests"""
72
+
73
+ if not initialize_client(api_key):
74
+ return json.dumps({
75
+ "success": False,
76
+ "error": "Please enter a valid OpenRouter API key.",
77
+ "response": "",
78
+ "reasoning": ""
79
+ })
80
+
81
+ try:
82
+ # Collect all valid images
83
+ images = [img for img in [image1, image2, image3, image4] if img is not None]
84
+
85
+ # Validate inputs based on task type
86
+ if task_type in ["ocr", "chart", "multimodal"] and not images and not text_input.strip():
87
+ return json.dumps({
88
+ "success": False,
89
+ "error": "Please upload at least one image or enter text.",
90
+ "response": "",
91
+ "reasoning": ""
92
+ })
93
+
94
+ if task_type == "reasoning" and not text_input.strip():
95
+ return json.dumps({
96
+ "success": False,
97
+ "error": "Please enter a question or problem to solve.",
98
+ "response": "",
99
+ "reasoning": ""
100
+ })
101
+
102
+ # Set default prompts based on task type
103
+ if not text_input.strip():
104
+ prompts = {
105
+ "ocr": "Extract and analyze all text from this image. Provide a detailed analysis of the content, structure, and any key information.",
106
+ "chart": "Analyze this chart in detail. Describe the type of chart, extract all data points, identify trends, and provide insights.",
107
+ "video": "Analyze this video content frame by frame. Describe what you see and provide comprehensive insights.",
108
+ "multimodal": f"Analyze these {len(images)} images. Compare and contrast them, identify relationships, and provide comprehensive insights."
109
+ }
110
+ text_input = prompts.get(task_type, "Please analyze this content.")
111
+
112
+ # Create message content
113
+ messages = [{
114
+ "role": "user",
115
+ "content": create_message_content(text_input, images if images else None)
116
+ }]
117
+
118
+ # Prepare API call parameters
119
+ api_params = {
120
+ "model": "nvidia/nemotron-nano-12b-v2-vl:free",
121
+ "messages": messages,
122
+ "max_tokens": 3000,
123
+ }
124
+
125
+ # Add reasoning if enabled
126
+ if enable_reasoning or task_type == "reasoning":
127
+ api_params["extra_body"] = {"reasoning": {"enabled": True}}
128
+
129
+ # Make API call
130
+ response = client.chat.completions.create(**api_params)
131
+
132
+ result = response.choices[0].message.content
133
+ reasoning_details = ""
134
+
135
+ # Extract reasoning details if available
136
+ if hasattr(response.choices[0].message, 'reasoning_details') and response.choices[0].message.reasoning_details:
137
+ reasoning_details = json.dumps(response.choices[0].message.reasoning_details, indent=2)
138
+
139
+ return json.dumps({
140
+ "success": True,
141
+ "error": "",
142
+ "response": result,
143
+ "reasoning": reasoning_details,
144
+ "task_type": task_type,
145
+ "image_count": len(images)
146
+ })
147
+
148
+ except Exception as e:
149
+ return json.dumps({
150
+ "success": False,
151
+ "error": f"Error: {str(e)}",
152
+ "response": "",
153
+ "reasoning": ""
154
+ })
155
+
156
+ # Enhanced custom CSS with the React design aesthetic
157
+ custom_css = """
158
+ /* Base styling */
159
+ :root {
160
+ --primary-purple: #7e22ce;
161
+ --primary-pink: #db2777;
162
+ --bg-dark: #0f172a;
163
+ --bg-darker: #020617;
164
+ --border-color: rgba(168, 85, 247, 0.3);
165
+ }
166
+
167
+ body, .gradio-container {
168
+ background: linear-gradient(135deg, #1e1b4b 0%, #7e22ce 50%, #1e1b4b 100%) !important;
169
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
170
+ }
171
+
172
+ /* Main container */
173
+ .main-container {
174
+ max-width: 1400px;
175
+ margin: 0 auto;
176
+ padding: 20px;
177
+ }
178
+
179
+ /* Header styling */
180
+ #header-section {
181
+ background: rgba(0, 0, 0, 0.3);
182
+ backdrop-filter: blur(20px);
183
+ border-radius: 24px;
184
+ padding: 32px;
185
+ margin-bottom: 24px;
186
+ border: 1px solid var(--border-color);
187
+ box-shadow: 0 8px 32px rgba(126, 34, 206, 0.2);
188
+ }
189
+
190
+ #header-section h1 {
191
+ color: white;
192
+ font-size: 2.5rem;
193
+ font-weight: 700;
194
+ margin: 0;
195
+ letter-spacing: -0.02em;
196
+ }
197
+
198
+ #header-section p {
199
+ color: #c084fc;
200
+ font-size: 1.1rem;
201
+ margin: 8px 0 0 0;
202
+ }
203
+
204
+ /* API Key Section */
205
+ #api-key-container {
206
+ background: linear-gradient(135deg, rgba(126, 34, 206, 0.4) 0%, rgba(219, 39, 119, 0.4) 100%);
207
+ backdrop-filter: blur(20px);
208
+ border-radius: 20px;
209
+ padding: 28px;
210
+ margin-bottom: 24px;
211
+ border: 1px solid rgba(168, 85, 247, 0.4);
212
+ box-shadow: 0 8px 32px rgba(219, 39, 119, 0.2);
213
+ }
214
+
215
+ #api-key-container .label-wrap {
216
+ color: white !important;
217
+ font-weight: 600;
218
+ }
219
+
220
+ /* Input fields */
221
+ .gr-textbox, .gr-file, .gr-image {
222
+ background: rgba(0, 0, 0, 0.4) !important;
223
+ border: 1px solid var(--border-color) !important;
224
+ border-radius: 16px !important;
225
+ color: white !important;
226
+ backdrop-filter: blur(10px);
227
+ }
228
+
229
+ .gr-textbox:focus, .gr-file:focus, .gr-image:focus {
230
+ border-color: #a855f7 !important;
231
+ box-shadow: 0 0 0 3px rgba(168, 85, 247, 0.2) !important;
232
+ }
233
+
234
+ /* Tabs */
235
+ .tab-nav {
236
+ background: rgba(0, 0, 0, 0.3) !important;
237
+ backdrop-filter: blur(20px) !important;
238
+ border-radius: 20px !important;
239
+ padding: 8px !important;
240
+ border: 1px solid rgba(168, 85, 247, 0.2) !important;
241
+ gap: 8px !important;
242
+ }
243
+
244
+ .tab-nav button {
245
+ background: transparent !important;
246
+ color: #c084fc !important;
247
+ border-radius: 14px !important;
248
+ padding: 14px 24px !important;
249
+ font-weight: 600 !important;
250
+ transition: all 0.3s ease !important;
251
+ border: none !important;
252
+ }
253
+
254
+ .tab-nav button:hover {
255
+ background: rgba(255, 255, 255, 0.05) !important;
256
+ color: white !important;
257
+ }
258
+
259
+ .tab-nav button.selected {
260
+ background: linear-gradient(135deg, #7e22ce 0%, #db2777 100%) !important;
261
+ color: white !important;
262
+ box-shadow: 0 4px 16px rgba(126, 34, 206, 0.5) !important;
263
+ }
264
+
265
+ /* Buttons */
266
+ .gr-button {
267
+ background: linear-gradient(135deg, #7e22ce 0%, #db2777 100%) !important;
268
+ color: white !important;
269
+ border: none !important;
270
+ border-radius: 14px !important;
271
+ padding: 14px 28px !important;
272
+ font-weight: 600 !important;
273
+ font-size: 1rem !important;
274
+ cursor: pointer !important;
275
+ transition: all 0.3s ease !important;
276
+ box-shadow: 0 4px 16px rgba(126, 34, 206, 0.4) !important;
277
+ }
278
+
279
+ .gr-button:hover {
280
+ transform: translateY(-2px);
281
+ box-shadow: 0 6px 24px rgba(126, 34, 206, 0.6) !important;
282
+ }
283
+
284
+ .gr-button:active {
285
+ transform: translateY(0px);
286
+ }
287
+
288
+ .gr-button.secondary {
289
+ background: rgba(255, 255, 255, 0.1) !important;
290
+ backdrop-filter: blur(10px);
291
+ }
292
+
293
+ /* Output boxes */
294
+ .output-container {
295
+ background: rgba(0, 0, 0, 0.5) !important;
296
+ backdrop-filter: blur(20px);
297
+ border-radius: 20px !important;
298
+ padding: 24px !important;
299
+ border: 1px solid var(--border-color) !important;
300
+ min-height: 400px;
301
+ box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3);
302
+ }
303
+
304
+ .output-container .label-wrap {
305
+ color: white !important;
306
+ font-weight: 600;
307
+ font-size: 1.1rem;
308
+ }
309
+
310
+ .output-container textarea {
311
+ background: rgba(0, 0, 0, 0.3) !important;
312
+ color: #e9d5ff !important;
313
+ border: none !important;
314
+ font-family: 'SF Mono', 'Monaco', 'Courier New', monospace;
315
+ font-size: 0.95rem;
316
+ line-height: 1.6;
317
+ }
318
+
319
+ /* Reasoning box */
320
+ .reasoning-container {
321
+ background: linear-gradient(135deg, rgba(219, 39, 119, 0.3) 0%, rgba(126, 34, 206, 0.3) 100%) !important;
322
+ backdrop-filter: blur(20px);
323
+ border-radius: 20px !important;
324
+ padding: 24px !important;
325
+ border: 1px solid rgba(236, 72, 153, 0.4) !important;
326
+ margin-top: 20px;
327
+ box-shadow: 0 8px 32px rgba(219, 39, 119, 0.2);
328
+ }
329
+
330
+ .reasoning-container .label-wrap {
331
+ color: #fda4af !important;
332
+ font-weight: 600;
333
+ font-size: 1.1rem;
334
+ }
335
+
336
+ /* Feature cards */
337
+ .feature-card {
338
+ background: rgba(0, 0, 0, 0.4);
339
+ backdrop-filter: blur(20px);
340
+ border-radius: 20px;
341
+ padding: 28px;
342
+ border: 1px solid rgba(168, 85, 247, 0.2);
343
+ transition: all 0.3s ease;
344
+ }
345
+
346
+ .feature-card:hover {
347
+ transform: translateY(-4px);
348
+ border-color: rgba(168, 85, 247, 0.5);
349
+ box-shadow: 0 12px 32px rgba(126, 34, 206, 0.3);
350
+ }
351
+
352
+ .feature-card h3 {
353
+ color: white;
354
+ font-size: 1.3rem;
355
+ margin-bottom: 12px;
356
+ font-weight: 700;
357
+ }
358
+
359
+ .feature-card p {
360
+ color: #c084fc;
361
+ font-size: 0.95rem;
362
+ line-height: 1.6;
363
+ }
364
+
365
+ /* Status badge */
366
+ .status-badge {
367
+ display: inline-block;
368
+ background: rgba(34, 197, 94, 0.2);
369
+ border: 1px solid rgba(34, 197, 94, 0.5);
370
+ padding: 8px 20px;
371
+ border-radius: 12px;
372
+ color: #86efac;
373
+ font-weight: 600;
374
+ font-size: 0.9rem;
375
+ }
376
+
377
+ /* Loading animation */
378
+ @keyframes spin {
379
+ 0% { transform: rotate(0deg); }
380
+ 100% { transform: rotate(360deg); }
381
+ }
382
+
383
+ .loading-spinner {
384
+ border: 4px solid rgba(168, 85, 247, 0.2);
385
+ border-top: 4px solid #a855f7;
386
+ border-radius: 50%;
387
+ width: 48px;
388
+ height: 48px;
389
+ animation: spin 1s linear infinite;
390
+ margin: 0 auto;
391
+ }
392
+
393
+ /* Footer */
394
+ #footer-section {
395
+ background: rgba(0, 0, 0, 0.3);
396
+ backdrop-filter: blur(20px);
397
+ border-radius: 20px;
398
+ padding: 24px;
399
+ margin-top: 32px;
400
+ text-align: center;
401
+ border: 1px solid rgba(168, 85, 247, 0.2);
402
+ color: #c084fc;
403
+ }
404
+
405
+ /* Markdown styling */
406
+ .markdown-content h1, .markdown-content h2, .markdown-content h3 {
407
+ color: white !important;
408
+ }
409
+
410
+ .markdown-content p {
411
+ color: #e9d5ff !important;
412
+ }
413
+
414
+ /* Scrollbar */
415
+ ::-webkit-scrollbar {
416
+ width: 10px;
417
+ }
418
+
419
+ ::-webkit-scrollbar-track {
420
+ background: rgba(0, 0, 0, 0.3);
421
+ border-radius: 10px;
422
+ }
423
+
424
+ ::-webkit-scrollbar-thumb {
425
+ background: linear-gradient(135deg, #7e22ce 0%, #db2777 100%);
426
+ border-radius: 10px;
427
+ }
428
+
429
+ ::-webkit-scrollbar-thumb:hover {
430
+ background: linear-gradient(135deg, #6b21a8 0%, #be185d 100%);
431
+ }
432
+
433
+ /* Responsive adjustments */
434
+ @media (max-width: 768px) {
435
+ #header-section h1 {
436
+ font-size: 1.8rem;
437
+ }
438
+
439
+ #header-section p {
440
+ font-size: 0.95rem;
441
+ }
442
+
443
+ .feature-card {
444
+ padding: 20px;
445
+ }
446
+ }
447
+ """
448
+
449
+ # Build the Gradio interface with React-inspired design
450
+ with gr.Blocks(css=custom_css, theme=gr.themes.Base(), title="NVIDIA Nemotron Nano 2 VL") as demo:
451
+
452
+ # Hidden state for API key
453
+ api_key_state = gr.State("")
454
+
455
+ # Header
456
+ with gr.Row(elem_id="header-section"):
457
+ with gr.Column(scale=8):
458
+ gr.Markdown("""
459
+ # ⚑ NVIDIA Nemotron Nano 2 VL
460
+ ### 12B Parameter Multimodal Reasoning Model
461
+ Advanced document intelligence, chart analysis, video understanding, and reasoning capabilities
462
+ """, elem_classes="markdown-content")
463
+ with gr.Column(scale=2):
464
+ gr.HTML("""
465
+ <div style='text-align: right; padding: 12px 20px; background: rgba(34, 197, 94, 0.2); border-radius: 12px; border: 1px solid rgba(34, 197, 94, 0.5);'>
466
+ <b style='color: #86efac; font-size: 0.9rem;'>βœ“ FREE ACCESS</b>
467
+ </div>
468
+ """)
469
+
470
+ # API Key Section
471
+ with gr.Row(elem_id="api-key-container"):
472
+ with gr.Column():
473
+ gr.Markdown("""
474
+ ### πŸ” OpenRouter API Key
475
+ Enter your OpenRouter API key to access the NVIDIA Nemotron model. Get yours at [openrouter.ai](https://openrouter.ai)
476
+ """, elem_classes="markdown-content")
477
+ api_key_input = gr.Textbox(
478
+ label="API Key",
479
+ placeholder="sk-or-v1-...",
480
+ type="password",
481
+ scale=4,
482
+ elem_classes="api-key-input"
483
+ )
484
+
485
+ # Tabs for different functionalities
486
+ with gr.Tabs(elem_classes="tab-nav"):
487
+
488
+ # OCR & Document Intelligence Tab
489
+ with gr.Tab("πŸ“„ OCR & Document", elem_classes="tab-item"):
490
+ with gr.Row():
491
+ with gr.Column(scale=1):
492
+ gr.Markdown("### πŸ“€ Upload Document")
493
+ ocr_image = gr.Image(type="pil", label="Upload Image/Document", height=300)
494
+ ocr_text = gr.Textbox(
495
+ label="Instructions (Optional)",
496
+ placeholder="Describe what you want to extract or analyze...",
497
+ lines=4
498
+ )
499
+ ocr_btn = gr.Button("πŸ” Analyze Document", variant="primary", size="lg")
500
+
501
+ with gr.Column(scale=1):
502
+ gr.Markdown("### πŸ“Š Analysis Result")
503
+ ocr_output = gr.Textbox(
504
+ label="Response",
505
+ lines=15,
506
+ elem_classes="output-container",
507
+ show_copy_button=True
508
+ )
509
+ ocr_reasoning = gr.Textbox(
510
+ label="Reasoning Details",
511
+ lines=5,
512
+ elem_classes="reasoning-container",
513
+ visible=False
514
+ )
515
+
516
+ def ocr_wrapper(api_key, image, text):
517
+ result = process_request(api_key, "ocr", image1=image, text_input=text)
518
+ data = json.loads(result)
519
+ if data["success"]:
520
+ return data["response"], data["reasoning"] if data["reasoning"] else ""
521
+ else:
522
+ return f"❌ {data['error']}", ""
523
+
524
+ ocr_btn.click(
525
+ fn=ocr_wrapper,
526
+ inputs=[api_key_input, ocr_image, ocr_text],
527
+ outputs=[ocr_output, ocr_reasoning]
528
+ )
529
+
530
+ # Chart Analysis Tab
531
+ with gr.Tab("πŸ“Š Chart Analysis", elem_classes="tab-item"):
532
+ with gr.Row():
533
+ with gr.Column(scale=1):
534
+ gr.Markdown("### πŸ“ˆ Upload Chart/Graph")
535
+ chart_image = gr.Image(type="pil", label="Upload Chart", height=300)
536
+ chart_question = gr.Textbox(
537
+ label="Question (Optional)",
538
+ placeholder="What insights do you want from this chart?",
539
+ lines=3
540
+ )
541
+ chart_btn = gr.Button("πŸ“ˆ Analyze Chart", variant="primary", size="lg")
542
+
543
+ with gr.Column(scale=1):
544
+ gr.Markdown("### πŸ“Š Chart Insights")
545
+ chart_output = gr.Textbox(
546
+ label="Response",
547
+ lines=15,
548
+ elem_classes="output-container",
549
+ show_copy_button=True
550
+ )
551
+
552
+ def chart_wrapper(api_key, image, question):
553
+ result = process_request(api_key, "chart", image1=image, text_input=question)
554
+ data = json.loads(result)
555
+ if data["success"]:
556
+ return data["response"]
557
+ else:
558
+ return f"❌ {data['error']}"
559
+
560
+ chart_btn.click(
561
+ fn=chart_wrapper,
562
+ inputs=[api_key_input, chart_image, chart_question],
563
+ outputs=[chart_output]
564
+ )
565
+
566
+ # Video Understanding Tab
567
+ with gr.Tab("πŸŽ₯ Video Understanding", elem_classes="tab-item"):
568
+ with gr.Row():
569
+ with gr.Column(scale=1):
570
+ gr.Markdown("### 🎬 Upload Video")
571
+ gr.Markdown("""
572
+ **Note**: Full video analysis requires frame extraction and EVS implementation.
573
+ Upload video frames as images in the Multi-Image tab for now.
574
+ """)
575
+ video_input = gr.Video(label="Upload Video")
576
+ video_question = gr.Textbox(
577
+ label="Question",
578
+ placeholder="What would you like to know about this video?",
579
+ lines=4
580
+ )
581
+ video_btn = gr.Button("🎬 Analyze Video", variant="primary", size="lg")
582
+
583
+ with gr.Column(scale=1):
584
+ gr.Markdown("### πŸŽ₯ Video Analysis")
585
+ video_output = gr.Textbox(
586
+ label="Response",
587
+ lines=15,
588
+ elem_classes="output-container"
589
+ )
590
+
591
+ def video_wrapper(api_key, video, question):
592
+ return "πŸŽ₯ **Video Analysis Placeholder**\n\nVideo analysis requires:\n\n1. Frame extraction from video\n2. EVS (Efficient Video Sampling) implementation\n3. Multi-frame context processing\n\nFor now, extract key frames and use the Multi-Image Analysis tab.\n\nFull implementation coming soon!"
593
+
594
+ video_btn.click(
595
+ fn=video_wrapper,
596
+ inputs=[api_key_input, video_input, video_question],
597
+ outputs=[video_output]
598
+ )
599
+
600
+ # Advanced Reasoning Tab
601
+ with gr.Tab("🧠 Advanced Reasoning", elem_classes="tab-item"):
602
+ with gr.Row():
603
+ with gr.Column(scale=1):
604
+ gr.Markdown("""
605
+ ### πŸ’‘ Complex Problem Solving
606
+ Ask complex questions and get detailed step-by-step reasoning
607
+ """)
608
+ reasoning_input = gr.Textbox(
609
+ label="Question",
610
+ placeholder="Ask a complex reasoning question...\n\nExamples:\n- How many R's are in 'strawberry'?\n- Solve this logic puzzle...\n- Calculate the average speed...",
611
+ lines=10
612
+ )
613
+ reasoning_btn = gr.Button("πŸ’‘ Start Reasoning", variant="primary", size="lg")
614
+
615
+ with gr.Column(scale=1):
616
+ gr.Markdown("### 🎯 Answer & Reasoning")
617
+ reasoning_output = gr.Textbox(
618
+ label="Response",
619
+ lines=12,
620
+ elem_classes="output-container",
621
+ show_copy_button=True
622
+ )
623
+ reasoning_details = gr.Textbox(
624
+ label="🧠 Reasoning Process",
625
+ lines=8,
626
+ elem_classes="reasoning-container",
627
+ show_copy_button=True
628
+ )
629
+
630
+ def reasoning_wrapper(api_key, question):
631
+ result = process_request(api_key, "reasoning", text_input=question, enable_reasoning=True)
632
+ data = json.loads(result)
633
+ if data["success"]:
634
+ reasoning_text = data["reasoning"] if data["reasoning"] else "Reasoning details not available for this response."
635
+ return data["response"], reasoning_text
636
+ else:
637
+ return f"❌ {data['error']}", ""
638
+
639
+ reasoning_btn.click(
640
+ fn=reasoning_wrapper,
641
+ inputs=[api_key_input, reasoning_input],
642
+ outputs=[reasoning_output, reasoning_details]
643
+ )
644
+
645
+ # Multi-Image Analysis Tab
646
+ with gr.Tab("πŸ–ΌοΈ Multi-Image Analysis", elem_classes="tab-item"):
647
+ with gr.Row():
648
+ with gr.Column(scale=1):
649
+ gr.Markdown("### πŸ–ΌοΈ Upload Multiple Images (1-4)")
650
+ with gr.Row():
651
+ multi_image1 = gr.Image(type="pil", label="Image 1", height=200)
652
+ multi_image2 = gr.Image(type="pil", label="Image 2", height=200)
653
+ with gr.Row():
654
+ multi_image3 = gr.Image(type="pil", label="Image 3", height=200)
655
+ multi_image4 = gr.Image(type="pil", label="Image 4", height=200)
656
+ multi_question = gr.Textbox(
657
+ label="Question (Optional)",
658
+ placeholder="Compare these images, find differences, identify patterns...",
659
+ lines=3
660
+ )
661
+ multi_btn = gr.Button("πŸ” Analyze Images", variant="primary", size="lg")
662
+
663
+ with gr.Column(scale=1):
664
+ gr.Markdown("### 🎨 Multi-Image Insights")
665
+ multi_output = gr.Textbox(
666
+ label="Response",
667
+ lines=20,
668
+ elem_classes="output-container",
669
+ show_copy_button=True
670
+ )
671
+
672
+ def multi_wrapper(api_key, img1, img2, img3, img4, question):
673
+ result = process_request(
674
+ api_key, "multimodal",
675
+ image1=img1, image2=img2, image3=img3, image4=img4,
676
+ text_input=question
677
+ )
678
+ data = json.loads(result)
679
+ if data["success"]:
680
+ return f"πŸ–ΌοΈ **Analyzing {data['image_count']} image(s)**\n\n{data['response']}"
681
+ else:
682
+ return f"❌ {data['error']}"
683
+
684
+ multi_btn.click(
685
+ fn=multi_wrapper,
686
+ inputs=[api_key_input, multi_image1, multi_image2, multi_image3, multi_image4, multi_question],
687
+ outputs=[multi_output]
688
+ )
689
+
690
+ # Features Section
691
+ gr.Markdown("## πŸš€ Key Features", elem_classes="markdown-content")
692
+ with gr.Row():
693
+ with gr.Column(elem_classes="feature-card"):
694
+ gr.Markdown("""
695
+ ### ⚑ Hybrid Architecture
696
+ Transformer-Mamba fusion for efficient processing with higher throughput and lower latency
697
+ """)
698
+
699
+ with gr.Column(elem_classes="feature-card"):
700
+ gr.Markdown("""
701
+ ### πŸ“Š 74% Benchmark Average
702
+ Leading performance across MMMU, MathVista, AI2D, OCRBench, ChartQA, DocVQA, and more
703
+ """)
704
+
705
+ with gr.Column(elem_classes="feature-card"):
706
+ gr.Markdown("""
707
+ ### πŸŽ₯ EVS Technology
708
+ Efficient Video Sampling for long-form video understanding with reduced inference cost
709
+ """)
710
+
711
+ # Footer
712
+ with gr.Row(elem_id="footer-section"):
713
+ gr.Markdown("""
714
+ Powered by **NVIDIA Nemotron Nano 12B 2 VL** via OpenRouter API | Open-weights model with permissive NVIDIA license
715
+
716
+ Built with ❀️ using Gradio | [Documentation](https://docs.nvidia.com) | [Report Issues](https://github.com)
717
+ """, elem_classes="markdown-content")
718
+
719
+ # Launch the app
720
+ if __name__ == "__main__":
721
+ demo.launch(
722
+ server_name="0.0.0.0",
723
+ server_port=7860,
724
+ share=True,
725
+ show_error=True
726
+ )