seawolf2357 commited on
Commit
b1e5e5b
Β·
verified Β·
1 Parent(s): e8db4b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +486 -605
app.py CHANGED
@@ -1,266 +1,255 @@
1
- import gradio as gr
2
- import cv2
3
- import numpy as np
4
- import tempfile
5
- import os
6
  from pathlib import Path
7
- from typing import Optional, Tuple
8
  import torch
9
- from PIL import Image
10
-
11
- # ==============================
12
- # Model loader
13
- # ==============================
14
- def load_model(model_path: str = "yolov8-face-hf.pt", device: Optional[str] = None):
15
- from ultralytics import YOLO
16
- if device is None:
17
- if torch.cuda.is_available():
18
- device = "cuda"
19
- elif torch.backends.mps.is_available():
20
- device = "mps"
21
- else:
22
- device = "cpu"
23
- model = YOLO(model_path)
24
- model.to(device)
25
- return model, device
26
-
27
- # Load model globally
28
- model, device = load_model()
29
-
30
- # ==============================
31
- # Helper functions
32
- # ==============================
33
- def _ensure_odd(x: int) -> int:
34
- return x if x % 2 == 1 else x + 1
35
-
36
- def _choose_writer_size(w: int, h: int) -> Tuple[int, int]:
37
- return (w if w % 2 == 0 else w - 1, h if h % 2 == 0 else h - 1)
38
-
39
- def _apply_anonymization(face_roi: np.ndarray, mode: str, blur_kernel: int, mosaic: int = 15) -> np.ndarray:
40
- if face_roi.size == 0:
41
- return face_roi
42
- if mode == "Gaussian Blur":
43
- k = _ensure_odd(max(blur_kernel, 15))
44
- return cv2.GaussianBlur(face_roi, (k, k), 0)
45
- else:
46
- m = max(2, mosaic)
47
- h, w = face_roi.shape[:2]
48
- face_small = cv2.resize(face_roi, (max(1, w // m), max(1, h // m)), interpolation=cv2.INTER_LINEAR)
49
- return cv2.resize(face_small, (w, h), interpolation=cv2.INTER_NEAREST)
50
-
51
- def blur_faces_image(image_bgr, conf, iou, expand_ratio, mode, blur_kernel, mosaic):
52
- h, w = image_bgr.shape[:2]
53
- face_count = 0
54
-
55
- with torch.no_grad():
56
- results = model.predict(image_bgr, conf=conf, iou=iou, verbose=False, device=device)
57
-
58
- for r in results:
59
- boxes = r.boxes.xyxy.cpu().numpy() if hasattr(r.boxes, "xyxy") else []
60
- face_count = len(boxes)
61
- for x1, y1, x2, y2 in boxes:
62
- x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
63
-
64
- if expand_ratio > 0:
65
- bw = x2 - x1
66
- bh = y2 - y1
67
- dx = int(bw * expand_ratio)
68
- dy = int(bh * expand_ratio)
69
- x1 -= dx; y1 -= dy; x2 += dx; y2 += dy
70
-
71
- x1 = max(0, min(w, x1))
72
- x2 = max(0, min(w, x2))
73
- y1 = max(0, min(h, y1))
74
- y2 = max(0, min(h, y2))
75
- if x2 <= x1 or y2 <= y1:
76
- continue
77
-
78
- roi = image_bgr[y1:y2, x1:x2]
79
- image_bgr[y1:y2, x1:x2] = _apply_anonymization(roi, mode, blur_kernel, mosaic)
80
-
81
- return image_bgr, face_count
82
-
83
- def blur_faces_video(input_path, conf, iou, expand_ratio, mode, blur_kernel, mosaic, progress=gr.Progress()):
84
- from moviepy.editor import VideoFileClip
85
-
86
- cap = cv2.VideoCapture(input_path)
87
- if not cap.isOpened():
88
- raise IOError("Cannot open video")
89
-
90
- in_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
91
- in_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
92
- fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
93
- frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
94
-
95
- out_w, out_h = _choose_writer_size(in_w, in_h)
96
- fourcc = cv2.VideoWriter_fourcc(*"mp4v")
97
-
98
- temp_video_path = tempfile.NamedTemporaryFile(delete=False, suffix="_temp.mp4").name
99
- output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_blurred.mp4").name
100
-
101
- out = cv2.VideoWriter(temp_video_path, fourcc, fps, (out_w, out_h))
102
-
103
- idx = 0
104
- total_faces = 0
105
-
106
  try:
107
- while True:
108
- ret, frame = cap.read()
109
- if not ret:
110
- break
111
- frame = cv2.resize(frame, (out_w, out_h))
112
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  with torch.no_grad():
114
- results = model.predict(frame, conf=conf, iou=iou, verbose=False, device=device)
115
-
116
- h, w = frame.shape[:2]
117
- r0 = results[0] if len(results) else None
118
- boxes = r0.boxes.xyxy if (r0 and hasattr(r0, "boxes")) else []
119
- total_faces += len(boxes)
120
-
121
- for b in boxes:
122
- x1, y1, x2, y2 = map(int, b)
123
- if expand_ratio > 0:
124
- bw = x2 - x1
125
- bh = y2 - y1
126
- dx = int(bw * expand_ratio)
127
- dy = int(bh * expand_ratio)
128
- x1 -= dx; y1 -= dy; x2 += dx; y2 += dy
129
-
130
- x1 = max(0, min(w, x1))
131
- x2 = max(0, min(w, x2))
132
- y1 = max(0, min(h, y1))
133
- y2 = max(0, min(h, y2))
134
- if x2 <= x1 or y2 <= y1:
135
- continue
136
-
137
- roi = frame[y1:y2, x1:x2]
138
- frame[y1:y2, x1:x2] = _apply_anonymization(roi, mode, blur_kernel, mosaic)
139
-
140
- out.write(frame)
141
- idx += 1
142
- if frames > 0:
143
- progress(idx / frames, desc=f"Processing frame {idx}/{frames}")
144
- finally:
145
- cap.release()
146
- out.release()
147
 
 
 
148
  try:
149
- progress(0.95, desc="Merging audio...")
150
- original = VideoFileClip(input_path)
151
- processed = VideoFileClip(temp_video_path).set_audio(original.audio)
152
- processed.write_videofile(
153
- output_path,
154
- codec="libx264",
155
- audio_codec="aac",
156
- threads=1,
157
- logger=None
158
- )
159
- original.close()
160
- processed.close()
161
- return output_path, total_faces, frames
162
  except Exception as e:
163
- print("Audio merging failed:", e)
164
- return temp_video_path, total_faces, frames
165
-
166
 
167
- # ==============================
168
- # Main Processing Functions
169
- # ==============================
170
- def process_image(image, conf, iou, expand_ratio, mode_choice, blur_intensity, mosaic_size):
171
- if image is None:
172
- return None, "⚠️ Please upload an image first!"
173
-
174
- # Convert PIL to BGR
175
- image_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
176
- h, w = image_bgr.shape[:2]
177
-
178
- # Determine blur settings
179
- if mode_choice == "Gaussian Blur":
180
- blur_kernel = blur_intensity
181
- mosaic = 15
182
- else:
183
- blur_kernel = 51
184
- mosaic = mosaic_size
185
-
186
- # Process
187
- result_bgr, face_count = blur_faces_image(
188
- image_bgr.copy(), conf, iou, expand_ratio,
189
- mode_choice, blur_kernel, mosaic
190
- )
191
-
192
- # Convert back to RGB
193
- result_rgb = cv2.cvtColor(result_bgr, cv2.COLOR_BGR2RGB)
194
- result_pil = Image.fromarray(result_rgb)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
- # Generate log
197
- info_log = f"""βœ… IMAGE PROCESSING COMPLETE!
198
- {'=' * 50}
199
- πŸ–ΌοΈ Image Info:
200
- β€’ Size: {w} x {h} pixels
201
- β€’ Format: RGB
202
  {'=' * 50}
203
- πŸ” Detection Settings:
204
- β€’ Confidence: {conf}
205
- β€’ IoU Threshold: {iou}
206
- β€’ Box Expansion: {expand_ratio}
207
  {'=' * 50}
208
- 🎨 Blur Settings:
209
- β€’ Style: {mode_choice}
210
- β€’ Intensity: {blur_intensity if mode_choice == "Gaussian Blur" else mosaic_size}
 
211
  {'=' * 50}
212
- πŸ‘€ Results:
213
- β€’ Faces Detected: {face_count}
214
- β€’ Faces Blurred: {face_count}
215
  {'=' * 50}
216
- πŸ’Ύ Ready to download!"""
217
 
218
- return result_pil, info_log
219
-
220
-
221
- def process_video(video, conf, iou, expand_ratio, mode_choice, blur_intensity, mosaic_size, progress=gr.Progress()):
222
- if video is None:
223
- return None, "⚠️ Please upload a video first!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
- # Determine blur settings
226
- if mode_choice == "Gaussian Blur":
227
- blur_kernel = blur_intensity
228
- mosaic = 15
229
- else:
230
- blur_kernel = 51
231
- mosaic = mosaic_size
232
-
233
- try:
234
- output_path, total_faces, total_frames = blur_faces_video(
235
- video, conf, iou, expand_ratio,
236
- mode_choice, blur_kernel, mosaic, progress
237
- )
238
-
239
- info_log = f"""βœ… VIDEO PROCESSING COMPLETE!
240
- {'=' * 50}
241
- πŸŽ₯ Video Info:
242
- β€’ Total Frames: {total_frames}
243
- β€’ Output Path: {os.path.basename(output_path)}
244
  {'=' * 50}
245
- πŸ” Detection Settings:
246
- β€’ Confidence: {conf}
247
- β€’ IoU Threshold: {iou}
248
- β€’ Box Expansion: {expand_ratio}
249
  {'=' * 50}
250
- 🎨 Blur Settings:
251
- β€’ Style: {mode_choice}
252
- β€’ Intensity: {blur_intensity if mode_choice == "Gaussian Blur" else mosaic_size}
 
253
  {'=' * 50}
254
- πŸ‘€ Results:
255
- β€’ Total Faces Detected: {total_faces}
256
- β€’ Frames Processed: {total_frames}
257
  {'=' * 50}
258
- πŸ’Ύ Ready to download!"""
259
-
260
- return output_path, info_log
261
-
262
- except Exception as e:
263
- return None, f"❌ Error: {str(e)}"
264
 
265
 
266
  # ============================================
@@ -324,7 +313,7 @@ a[href*="huggingface.co/spaces"] {
324
 
325
  /* ===== 메인 μ»¨ν…Œμ΄λ„ˆ ===== */
326
  #col-container {
327
- max-width: 1400px;
328
  margin: 0 auto;
329
  }
330
 
@@ -353,36 +342,42 @@ a[href*="huggingface.co/spaces"] {
353
  font-weight: 700 !important;
354
  }
355
 
356
- /* ===== 🎨 Stats μΉ΄λ“œ ===== */
357
- .stats-row {
358
- display: flex !important;
359
- justify-content: center !important;
360
- gap: 1rem !important;
361
- margin: 1.5rem 0 !important;
362
- flex-wrap: wrap !important;
363
- }
364
-
365
- .stat-card {
366
- background: linear-gradient(135deg, #3B82F6 0%, #8B5CF6 100%) !important;
367
  border: 3px solid #1F2937 !important;
368
  border-radius: 12px !important;
369
- padding: 1rem 1.5rem !important;
370
- text-align: center !important;
371
- box-shadow: 4px 4px 0px #1F2937 !important;
372
- min-width: 120px !important;
373
  }
374
 
375
- .stat-card .emoji {
376
- font-size: 2rem !important;
377
- display: block !important;
378
- margin-bottom: 0.3rem !important;
 
379
  }
380
 
381
- .stat-card .label {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  color: #FFFFFF !important;
383
- font-family: 'Comic Neue', cursive !important;
384
- font-weight: 700 !important;
385
- font-size: 0.9rem !important;
386
  }
387
 
388
  /* ===== 🎨 μΉ΄λ“œ/νŒ¨λ„ - λ§Œν™” ν”„λ ˆμž„ μŠ€νƒ€μΌ ===== */
@@ -404,41 +399,7 @@ a[href*="huggingface.co/spaces"] {
404
  box-shadow: 8px 8px 0px #1F2937 !important;
405
  }
406
 
407
- /* ===== 🎨 νƒ­ μŠ€νƒ€μΌ ===== */
408
- .gr-tabs {
409
- border: 3px solid #1F2937 !important;
410
- border-radius: 12px !important;
411
- overflow: hidden !important;
412
- box-shadow: 6px 6px 0px #1F2937 !important;
413
- }
414
-
415
- .gr-tab-nav {
416
- background: #FACC15 !important;
417
- border-bottom: 3px solid #1F2937 !important;
418
- }
419
-
420
- .gr-tab-nav button {
421
- font-family: 'Bangers', cursive !important;
422
- font-size: 1.2rem !important;
423
- letter-spacing: 1px !important;
424
- color: #1F2937 !important;
425
- padding: 12px 24px !important;
426
- border: none !important;
427
- background: transparent !important;
428
- transition: all 0.2s ease !important;
429
- }
430
-
431
- .gr-tab-nav button:hover {
432
- background: #FDE68A !important;
433
- }
434
-
435
- .gr-tab-nav button.selected {
436
- background: #3B82F6 !important;
437
- color: #FFFFFF !important;
438
- text-shadow: 1px 1px 0px #1F2937 !important;
439
- }
440
-
441
- /* ===== 🎨 μž…λ ₯ ν•„λ“œ ===== */
442
  textarea,
443
  input[type="text"],
444
  input[type="number"] {
@@ -460,54 +421,15 @@ input[type="number"]:focus {
460
  outline: none !important;
461
  }
462
 
463
- /* ===== 🎨 λ“œλ‘­λ‹€μš΄ μŠ€νƒ€μΌ ===== */
464
- .gr-dropdown {
465
- background: #FFFFFF !important;
466
- border: 3px solid #1F2937 !important;
467
- border-radius: 8px !important;
468
- box-shadow: 3px 3px 0px #1F2937 !important;
469
- }
470
-
471
- .gr-dropdown > div {
472
- background: #FFFFFF !important;
473
- border: none !important;
474
- }
475
-
476
- .gr-dropdown input {
477
- color: #1F2937 !important;
478
- font-family: 'Comic Neue', cursive !important;
479
- font-weight: 700 !important;
480
- }
481
-
482
- .gr-dropdown ul {
483
- background: #FFFFFF !important;
484
- border: 3px solid #1F2937 !important;
485
- border-radius: 8px !important;
486
- box-shadow: 4px 4px 0px #1F2937 !important;
487
- }
488
-
489
- .gr-dropdown ul li {
490
- color: #1F2937 !important;
491
- font-family: 'Comic Neue', cursive !important;
492
- font-weight: 700 !important;
493
- padding: 8px 12px !important;
494
- }
495
-
496
- .gr-dropdown ul li:hover {
497
- background: #FACC15 !important;
498
- color: #1F2937 !important;
499
- }
500
-
501
- .gr-dropdown ul li.selected {
502
- background: #3B82F6 !important;
503
- color: #FFFFFF !important;
504
  }
505
 
506
- /* ===== 🎨 Primary λ²„νŠΌ ===== */
507
  .gr-button-primary,
508
  button.primary,
509
- .gr-button.primary,
510
- .process-btn {
511
  background: #3B82F6 !important;
512
  border: 3px solid #1F2937 !important;
513
  border-radius: 8px !important;
@@ -524,8 +446,7 @@ button.primary,
524
 
525
  .gr-button-primary:hover,
526
  button.primary:hover,
527
- .gr-button.primary:hover,
528
- .process-btn:hover {
529
  background: #2563EB !important;
530
  transform: translate(-2px, -2px) !important;
531
  box-shadow: 7px 7px 0px #1F2937 !important;
@@ -533,12 +454,34 @@ button.primary:hover,
533
 
534
  .gr-button-primary:active,
535
  button.primary:active,
536
- .gr-button.primary:active,
537
- .process-btn:active {
538
  transform: translate(3px, 3px) !important;
539
  box-shadow: 2px 2px 0px #1F2937 !important;
540
  }
541
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  /* ===== 🎨 둜그 좜λ ₯ μ˜μ—­ ===== */
543
  .info-log textarea {
544
  background: #1F2937 !important;
@@ -551,11 +494,11 @@ button.primary:active,
551
  box-shadow: 4px 4px 0px #10B981 !important;
552
  }
553
 
554
- /* ===== 🎨 이미지/λΉ„λ””μ˜€ μ˜μ—­ ===== */
555
- .gr-image,
556
  .gr-video,
557
- .image-container,
558
- .video-container {
 
559
  border: 4px solid #1F2937 !important;
560
  border-radius: 8px !important;
561
  box-shadow: 8px 8px 0px #1F2937 !important;
@@ -563,6 +506,27 @@ button.primary:active,
563
  background: #FFFFFF !important;
564
  }
565
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
  /* ===== 🎨 μŠ¬λΌμ΄λ” μŠ€νƒ€μΌ ===== */
567
  input[type="range"] {
568
  accent-color: #3B82F6 !important;
@@ -572,21 +536,6 @@ input[type="range"] {
572
  background: #FFFFFF !important;
573
  }
574
 
575
- /* ===== 🎨 μ•„μ½”λ””μ–Έ ===== */
576
- .gr-accordion {
577
- background: #FACC15 !important;
578
- border: 3px solid #1F2937 !important;
579
- border-radius: 8px !important;
580
- box-shadow: 4px 4px 0px #1F2937 !important;
581
- }
582
-
583
- .gr-accordion-header {
584
- color: #1F2937 !important;
585
- font-family: 'Comic Neue', cursive !important;
586
- font-weight: 700 !important;
587
- font-size: 1.1rem !important;
588
- }
589
-
590
  /* ===== 🎨 라벨 μŠ€νƒ€μΌ ===== */
591
  label,
592
  .gr-input-label,
@@ -597,15 +546,30 @@ label,
597
  font-size: 1rem !important;
598
  }
599
 
600
- /* ===== 🎨 ν”„λ‘œκ·Έλ ˆμŠ€ λ°” ===== */
601
- .progress-bar,
602
- .gr-progress-bar {
603
- background: #3B82F6 !important;
604
- border: 2px solid #1F2937 !important;
605
- border-radius: 4px !important;
 
 
 
 
606
  }
607
 
608
- /* ===== 🎨 μŠ€ν¬λ‘€λ°” ===== */
 
 
 
 
 
 
 
 
 
 
 
609
  ::-webkit-scrollbar {
610
  width: 12px;
611
  height: 12px;
@@ -672,9 +636,9 @@ a:hover {
672
  box-shadow: 4px 4px 0px #1F2937 !important;
673
  }
674
 
675
- .stat-card {
676
- min-width: 100px !important;
677
- padding: 0.8rem 1rem !important;
678
  }
679
  }
680
 
@@ -687,10 +651,8 @@ a:hover {
687
  """
688
 
689
 
690
- # ============================================
691
- # Build the Gradio Interface
692
- # ============================================
693
- with gr.Blocks(fill_height=True, css=css, title="Ansim Blur - Face Privacy Protection") as demo:
694
 
695
  # HOME Badge
696
  gr.HTML("""
@@ -698,270 +660,189 @@ with gr.Blocks(fill_height=True, css=css, title="Ansim Blur - Face Privacy Prote
698
  <a href="https://www.humangen.ai" target="_blank" style="text-decoration: none;">
699
  <img src="https://img.shields.io/static/v1?label=🏠 HOME&message=HUMANGEN.AI&color=0000ff&labelColor=ffcc00&style=for-the-badge" alt="HOME">
700
  </a>
701
- <a href="https://discord.gg/openfreeai" target="_blank" style="text-decoration: none; margin-left: 10px;">
702
- <img src="https://img.shields.io/static/v1?label=Discord&message=OpenFree%20AI&color=5865F2&labelColor=1F2937&logo=discord&logoColor=white&style=for-the-badge" alt="Discord">
703
- </a>
704
  </div>
705
  """)
706
 
707
  # Header Title
708
  gr.Markdown(
709
  """
710
- # πŸ”’ ANSIM BLUR - FACE PRIVACY πŸ›‘οΈ
711
  """,
712
  elem_classes="header-text"
713
  )
714
 
715
  gr.Markdown(
716
  """
717
- <p class="subtitle">🎭 Advanced AI-Powered Face Detection & Privacy Protection! ✨</p>
718
  """,
719
  )
720
 
721
- # Stats Cards
722
- gr.HTML("""
723
- <div class="stats-row">
724
- <div class="stat-card">
725
- <span class="emoji">πŸ–ΌοΈ</span>
726
- <span class="label">Image Support</span>
727
- </div>
728
- <div class="stat-card">
729
- <span class="emoji">πŸŽ₯</span>
730
- <span class="label">Video Processing</span>
731
- </div>
732
- <div class="stat-card">
733
- <span class="emoji">⚑</span>
734
- <span class="label">Real-time AI</span>
735
- </div>
736
- <div class="stat-card">
737
- <span class="emoji">πŸ›‘οΈ</span>
738
- <span class="label">Privacy First</span>
739
- </div>
740
- </div>
741
- """)
742
-
743
- # Device Info
744
- gr.Markdown(f"""
745
- <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin: 1rem 0;">
746
- πŸ–₯️ Running on: <span style="color: #3B82F6;">{device.upper()}</span>
747
- </p>
748
- """)
749
-
750
- # Main Tabs
751
  with gr.Tabs():
752
- # ===== IMAGE TAB =====
753
- with gr.Tab("πŸ“Έ Image Processing"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754
  with gr.Row(equal_height=False):
755
- # Left Column - Input & Settings
756
- with gr.Column(scale=1, min_width=400):
757
- input_image = gr.Image(
758
- label="πŸ–ΌοΈ Upload Image",
759
- type="pil",
760
- height=350
 
 
 
 
 
761
  )
762
 
763
- with gr.Accordion("βš™οΈ Detection Settings", open=True):
764
- conf_img = gr.Slider(
765
- minimum=0.05,
766
- maximum=0.9,
767
- value=0.25,
768
- step=0.01,
769
- label="🎯 Confidence Threshold"
770
- )
771
- iou_img = gr.Slider(
772
- minimum=0.1,
773
- maximum=0.9,
774
- value=0.45,
775
- step=0.01,
776
- label="πŸ“ NMS IoU"
777
- )
778
- expand_img = gr.Slider(
779
- minimum=0.0,
780
- maximum=0.5,
781
- value=0.05,
782
- step=0.01,
783
- label="πŸ”² Box Expansion"
784
- )
785
 
786
- with gr.Accordion("🎨 Blur Settings", open=True):
787
- mode_img = gr.Dropdown(
788
- choices=["Gaussian Blur", "Mosaic Effect"],
789
- value="Gaussian Blur",
790
- label="πŸ–ŒοΈ Style"
791
- )
792
- blur_intensity_img = gr.Slider(
793
- minimum=15,
794
- maximum=151,
795
- value=51,
796
- step=2,
797
- label="πŸ’¨ Blur Intensity"
798
- )
799
- mosaic_size_img = gr.Slider(
800
- minimum=5,
801
- maximum=40,
802
- value=15,
803
- step=1,
804
- label="🧩 Mosaic Size"
805
- )
806
 
807
- process_img_btn = gr.Button(
808
- "πŸ” PROCESS IMAGE! 🎭",
809
  variant="primary",
810
- size="lg",
811
- elem_classes="process-btn"
812
- )
813
-
814
- # Right Column - Output
815
- with gr.Column(scale=1, min_width=400):
816
- output_image = gr.Image(
817
- label="πŸ–ΌοΈ Processed Result",
818
- type="pil",
819
- height=350
820
  )
821
 
822
- with gr.Accordion("πŸ“œ Processing Log", open=True):
823
- info_log_img = gr.Textbox(
824
  label="",
825
- placeholder="Upload an image and click process...",
826
  lines=12,
827
- max_lines=18,
828
  interactive=False,
829
  elem_classes="info-log"
830
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
831
 
832
- # ===== VIDEO TAB =====
833
- with gr.Tab("🎬 Video Processing"):
 
 
 
 
 
 
 
 
834
  with gr.Row(equal_height=False):
835
- # Left Column - Input & Settings
836
- with gr.Column(scale=1, min_width=400):
837
- input_video = gr.Video(
838
- label="πŸŽ₯ Upload Video",
839
- height=350
 
 
 
 
 
840
  )
841
 
842
- with gr.Accordion("βš™οΈ Detection Settings", open=True):
843
- conf_vid = gr.Slider(
844
- minimum=0.05,
845
- maximum=0.9,
846
- value=0.25,
847
- step=0.01,
848
- label="🎯 Confidence Threshold"
849
- )
850
- iou_vid = gr.Slider(
851
- minimum=0.1,
852
- maximum=0.9,
853
- value=0.45,
854
- step=0.01,
855
- label="πŸ“ NMS IoU"
856
- )
857
- expand_vid = gr.Slider(
858
- minimum=0.0,
859
- maximum=0.5,
860
- value=0.05,
861
- step=0.01,
862
- label="πŸ”² Box Expansion"
863
- )
864
 
865
- with gr.Accordion("🎨 Blur Settings", open=True):
866
- mode_vid = gr.Dropdown(
867
- choices=["Gaussian Blur", "Mosaic Effect"],
868
- value="Gaussian Blur",
869
- label="πŸ–ŒοΈ Style"
870
- )
871
- blur_intensity_vid = gr.Slider(
872
- minimum=15,
873
- maximum=151,
874
- value=51,
875
- step=2,
876
- label="πŸ’¨ Blur Intensity"
877
- )
878
- mosaic_size_vid = gr.Slider(
879
- minimum=5,
880
- maximum=40,
881
- value=15,
882
- step=1,
883
- label="🧩 Mosaic Size"
884
- )
885
 
886
- process_vid_btn = gr.Button(
887
- "🎬 PROCESS VIDEO! πŸ›‘οΈ",
888
  variant="primary",
889
- size="lg",
890
- elem_classes="process-btn"
891
- )
892
-
893
- # Right Column - Output
894
- with gr.Column(scale=1, min_width=400):
895
- output_video = gr.Video(
896
- label="πŸŽ₯ Processed Result",
897
- height=350
898
  )
899
 
900
- with gr.Accordion("πŸ“œ Processing Log", open=True):
901
- info_log_vid = gr.Textbox(
902
  label="",
903
- placeholder="Upload a video and click process...",
904
  lines=12,
905
- max_lines=18,
906
  interactive=False,
907
  elem_classes="info-log"
908
  )
909
-
910
- # Instructions
911
- gr.Markdown(
912
- """
913
- <div style="background: linear-gradient(135deg, #EFF6FF 0%, #DBEAFE 100%); border: 3px solid #3B82F6; border-radius: 12px; padding: 1.5rem; box-shadow: 6px 6px 0px #1F2937; margin-top: 2rem;">
914
- <h3 style="font-family: 'Bangers', cursive; color: #1F2937; font-size: 1.3rem; margin-bottom: 0.5rem;">πŸ“ HOW TO USE</h3>
915
- <ol style="font-family: 'Comic Neue', cursive; color: #1F2937; font-weight: 700;">
916
- <li>Upload an image or video containing faces</li>
917
- <li>Adjust detection settings (confidence, IoU, expansion)</li>
918
- <li>Choose blur style (Gaussian or Mosaic)</li>
919
- <li>Click the Process button and wait for results</li>
920
- <li>Download your privacy-protected media!</li>
921
- </ol>
922
- </div>
923
-
924
- <div style="background: linear-gradient(135deg, #FEF3C7 0%, #FDE68A 100%); border: 3px solid #F59E0B; border-radius: 12px; padding: 1.5rem; box-shadow: 6px 6px 0px #1F2937; margin-top: 1rem;">
925
- <h3 style="font-family: 'Bangers', cursive; color: #1F2937; font-size: 1.3rem; margin-bottom: 0.5rem;">πŸ’‘ TIPS</h3>
926
- <ul style="font-family: 'Comic Neue', cursive; color: #1F2937; font-weight: 700;">
927
- <li>Lower confidence = more faces detected (may include false positives)</li>
928
- <li>Higher blur intensity = stronger privacy protection</li>
929
- <li>Mosaic effect works better for artistic results</li>
930
- <li>Video processing may take time depending on length</li>
931
- </ul>
932
- </div>
933
- """
934
- )
935
-
936
- # Event Handlers
937
- process_img_btn.click(
938
- fn=process_image,
939
- inputs=[
940
- input_image,
941
- conf_img,
942
- iou_img,
943
- expand_img,
944
- mode_img,
945
- blur_intensity_img,
946
- mosaic_size_img
947
- ],
948
- outputs=[output_image, info_log_img]
949
- )
950
-
951
- process_vid_btn.click(
952
- fn=process_video,
953
- inputs=[
954
- input_video,
955
- conf_vid,
956
- iou_vid,
957
- expand_vid,
958
- mode_vid,
959
- blur_intensity_vid,
960
- mosaic_size_vid
961
- ],
962
- outputs=[output_video, info_log_vid]
963
- )
964
 
965
 
 
966
  if __name__ == "__main__":
967
- demo.launch()
 
 
 
 
1
+ import spaces
2
+ import logging
3
+ from datetime import datetime
 
 
4
  from pathlib import Path
5
+ import gradio as gr
6
  import torch
7
+ import torchaudio
8
+ import os
9
+ import requests
10
+ from transformers import pipeline
11
+ import tempfile
12
+ import numpy as np
13
+ from einops import rearrange
14
+ import cv2
15
+ from scipy.io import wavfile
16
+ import librosa
17
+ import json
18
+ from typing import Optional, Tuple, List
19
+ import atexit
20
+
21
+ # ν™˜κ²½ λ³€μˆ˜ μ„€μ •μœΌλ‘œ torch.load 체크 우회 (μž„μ‹œ ν•΄κ²°μ±…)
22
+ os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
23
+
24
+ try:
25
+ import mmaudio
26
+ except ImportError:
27
+ os.system("pip install -e .")
28
+ import mmaudio
29
+
30
+ from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video,
31
+ setup_eval_logging)
32
+ from mmaudio.model.flow_matching import FlowMatching
33
+ from mmaudio.model.networks import MMAudio, get_my_mmaudio
34
+ from mmaudio.model.sequence_config import SequenceConfig
35
+ from mmaudio.model.utils.features_utils import FeaturesUtils
36
+
37
+ # λ‘œκΉ… μ„€μ •
38
+ logging.basicConfig(
39
+ level=logging.INFO,
40
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
41
+ )
42
+ log = logging.getLogger()
43
+
44
+ # CUDA μ„€μ •
45
+ if torch.cuda.is_available():
46
+ device = torch.device("cuda")
47
+ torch.backends.cuda.matmul.allow_tf32 = True
48
+ torch.backends.cudnn.allow_tf32 = True
49
+ torch.backends.cudnn.benchmark = True
50
+ else:
51
+ device = torch.device("cpu")
52
+
53
+ dtype = torch.bfloat16
54
+
55
+ # λͺ¨λΈ μ„€μ •
56
+ model: ModelConfig = all_model_cfg['large_44k_v2']
57
+ model.download_if_needed()
58
+ output_dir = Path('./output/gradio')
59
+
60
+ setup_eval_logging()
61
+
62
+ # λ²ˆμ—­κΈ° μ„€μ •
63
+ try:
64
+ translator = pipeline("translation",
65
+ model="Helsinki-NLP/opus-mt-ko-en",
66
+ device="cpu",
67
+ use_fast=True,
68
+ trust_remote_code=False)
69
+ except Exception as e:
70
+ log.warning(f"Failed to load translation model with safetensors: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  try:
72
+ translator = pipeline("translation",
73
+ model="Helsinki-NLP/opus-mt-ko-en",
74
+ device="cpu")
75
+ except Exception as e2:
76
+ log.error(f"Failed to load translation model: {e2}")
77
+ translator = None
78
+
79
+ PIXABAY_API_KEY = "33492762-a28a596ec4f286f84cd328b17"
80
+
81
+ def cleanup_temp_files():
82
+ temp_dir = tempfile.gettempdir()
83
+ for file in os.listdir(temp_dir):
84
+ if file.endswith(('.mp4', '.flac')):
85
+ try:
86
+ os.remove(os.path.join(temp_dir, file))
87
+ except:
88
+ pass
89
+
90
+ atexit.register(cleanup_temp_files)
91
+
92
+ def get_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
93
+ with torch.cuda.device(device):
94
+ seq_cfg = model.seq_cfg
95
+ net: MMAudio = get_my_mmaudio(model.model_name).to(device, dtype).eval()
96
+ net.load_weights(torch.load(model.model_path, map_location=device, weights_only=True))
97
+ log.info(f'Loaded weights from {model.model_path}')
98
+
99
+ feature_utils = FeaturesUtils(
100
+ tod_vae_ckpt=model.vae_path,
101
+ synchformer_ckpt=model.synchformer_ckpt,
102
+ enable_conditions=True,
103
+ mode=model.mode,
104
+ bigvgan_vocoder_ckpt=model.bigvgan_16k_path,
105
+ need_vae_encoder=False
106
+ ).to(device, dtype).eval()
107
+
108
+ return net, feature_utils, seq_cfg
109
+
110
+ net, feature_utils, seq_cfg = get_model()
111
+
112
+ def translate_prompt(text):
113
+ try:
114
+ if translator is None:
115
+ return text
116
+
117
+ if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text):
118
  with torch.no_grad():
119
+ translation = translator(text)[0]['translation_text']
120
+ return translation
121
+ return text
122
+ except Exception as e:
123
+ logging.error(f"Translation error: {e}")
124
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
+ @torch.no_grad()
127
+ def search_videos(query):
128
  try:
129
+ query = translate_prompt(query)
130
+ return search_pixabay_videos(query, PIXABAY_API_KEY)
 
 
 
 
 
 
 
 
 
 
 
131
  except Exception as e:
132
+ logging.error(f"Video search error: {e}")
133
+ return []
 
134
 
135
+ def search_pixabay_videos(query, api_key):
136
+ try:
137
+ base_url = "https://pixabay.com/api/videos/"
138
+ params = {
139
+ "key": api_key,
140
+ "q": query,
141
+ "per_page": 40
142
+ }
143
+
144
+ response = requests.get(base_url, params=params)
145
+ if response.status_code == 200:
146
+ data = response.json()
147
+ return [video['videos']['large']['url'] for video in data.get('hits', [])]
148
+ return []
149
+ except Exception as e:
150
+ logging.error(f"Pixabay API error: {e}")
151
+ return []
152
+
153
+ @spaces.GPU
154
+ @torch.inference_mode()
155
+ def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
156
+ cfg_strength: float, duration: float):
157
+ prompt = translate_prompt(prompt)
158
+ negative_prompt = translate_prompt(negative_prompt)
159
+
160
+ rng = torch.Generator(device=device)
161
+ rng.manual_seed(seed)
162
+ fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
163
+
164
+ clip_frames, sync_frames, duration = load_video(video, duration)
165
+ clip_frames = clip_frames.unsqueeze(0)
166
+ sync_frames = sync_frames.unsqueeze(0)
167
+ seq_cfg.duration = duration
168
+ net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
169
+
170
+ audios = generate(clip_frames,
171
+ sync_frames, [prompt],
172
+ negative_text=[negative_prompt],
173
+ feature_utils=feature_utils,
174
+ net=net,
175
+ fm=fm,
176
+ rng=rng,
177
+ cfg_strength=cfg_strength)
178
+ audio = audios.float().cpu()[0]
179
+
180
+ video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
181
+ make_video(video,
182
+ video_save_path,
183
+ audio,
184
+ sampling_rate=seq_cfg.sampling_rate,
185
+ duration_sec=seq_cfg.duration)
186
 
187
+ # 정보 둜그 생성
188
+ info_log = f"""βœ… VIDEO TO AUDIO COMPLETE!
 
 
 
 
189
  {'=' * 50}
190
+ 🎬 Video Info:
191
+ β€’ Duration: {duration:.2f} seconds
 
 
192
  {'=' * 50}
193
+ βš™οΈ Generation Settings:
194
+ β€’ Seed: {seed}
195
+ β€’ Steps: {num_steps}
196
+ β€’ CFG Strength: {cfg_strength}
197
  {'=' * 50}
198
+ πŸ“ Prompts:
199
+ β€’ Prompt: {prompt[:40]}{'...' if len(prompt) > 40 else ''}
200
+ β€’ Negative: {negative_prompt[:30]}{'...' if len(negative_prompt) > 30 else ''}
201
  {'=' * 50}
202
+ πŸ’Ύ Video with audio ready!"""
203
 
204
+ return video_save_path, info_log
205
+
206
+ @spaces.GPU
207
+ @torch.inference_mode()
208
+ def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int, cfg_strength: float,
209
+ duration: float):
210
+ prompt = translate_prompt(prompt)
211
+ negative_prompt = translate_prompt(negative_prompt)
212
+
213
+ rng = torch.Generator(device=device)
214
+ rng.manual_seed(seed)
215
+ fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
216
+
217
+ clip_frames = sync_frames = None
218
+ seq_cfg.duration = duration
219
+ net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
220
+
221
+ audios = generate(clip_frames,
222
+ sync_frames, [prompt],
223
+ negative_text=[negative_prompt],
224
+ feature_utils=feature_utils,
225
+ net=net,
226
+ fm=fm,
227
+ rng=rng,
228
+ cfg_strength=cfg_strength)
229
+ audio = audios.float().cpu()[0]
230
+
231
+ audio_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.flac').name
232
+ torchaudio.save(audio_save_path, audio, seq_cfg.sampling_rate)
233
 
234
+ # 정보 둜그 생성
235
+ info_log = f"""βœ… TEXT TO AUDIO COMPLETE!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  {'=' * 50}
237
+ 🎡 Audio Info:
238
+ β€’ Duration: {duration:.2f} seconds
239
+ β€’ Sample Rate: {seq_cfg.sampling_rate} Hz
 
240
  {'=' * 50}
241
+ βš™οΈ Generation Settings:
242
+ β€’ Seed: {seed}
243
+ β€’ Steps: {num_steps}
244
+ β€’ CFG Strength: {cfg_strength}
245
  {'=' * 50}
246
+ πŸ“ Prompts:
247
+ β€’ Prompt: {prompt[:40]}{'...' if len(prompt) > 40 else ''}
248
+ β€’ Negative: {negative_prompt[:30]}{'...' if len(negative_prompt) > 30 else ''}
249
  {'=' * 50}
250
+ πŸ’Ύ Audio ready to download!"""
251
+
252
+ return audio_save_path, info_log
 
 
 
253
 
254
 
255
  # ============================================
 
313
 
314
  /* ===== 메인 μ»¨ν…Œμ΄λ„ˆ ===== */
315
  #col-container {
316
+ max-width: 1200px;
317
  margin: 0 auto;
318
  }
319
 
 
342
  font-weight: 700 !important;
343
  }
344
 
345
+ /* ===== 🎨 νƒ­ μŠ€νƒ€μΌ ===== */
346
+ .tabs {
347
+ background: #FFFFFF !important;
 
 
 
 
 
 
 
 
348
  border: 3px solid #1F2937 !important;
349
  border-radius: 12px !important;
350
+ box-shadow: 6px 6px 0px #1F2937 !important;
351
+ padding: 10px !important;
 
 
352
  }
353
 
354
+ .tab-nav {
355
+ background: #FACC15 !important;
356
+ border-radius: 8px !important;
357
+ padding: 5px !important;
358
+ border: 2px solid #1F2937 !important;
359
  }
360
 
361
+ .tab-nav button {
362
+ font-family: 'Bangers', cursive !important;
363
+ font-size: 1.1rem !important;
364
+ letter-spacing: 1px !important;
365
+ color: #1F2937 !important;
366
+ background: transparent !important;
367
+ border: none !important;
368
+ padding: 10px 20px !important;
369
+ border-radius: 6px !important;
370
+ transition: all 0.2s ease !important;
371
+ }
372
+
373
+ .tab-nav button:hover {
374
+ background: #FEF3C7 !important;
375
+ }
376
+
377
+ .tab-nav button.selected {
378
+ background: #3B82F6 !important;
379
  color: #FFFFFF !important;
380
+ box-shadow: 3px 3px 0px #1F2937 !important;
 
 
381
  }
382
 
383
  /* ===== 🎨 μΉ΄λ“œ/νŒ¨λ„ - λ§Œν™” ν”„λ ˆμž„ μŠ€νƒ€μΌ ===== */
 
399
  box-shadow: 8px 8px 0px #1F2937 !important;
400
  }
401
 
402
+ /* ===== 🎨 μž…λ ₯ ν•„λ“œ (Textbox) ===== */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  textarea,
404
  input[type="text"],
405
  input[type="number"] {
 
421
  outline: none !important;
422
  }
423
 
424
+ textarea::placeholder {
425
+ color: #9CA3AF !important;
426
+ font-weight: 400 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  }
428
 
429
+ /* ===== 🎨 Primary λ²„νŠΌ - μ½”λ―Ή 블루 ===== */
430
  .gr-button-primary,
431
  button.primary,
432
+ .gr-button.primary {
 
433
  background: #3B82F6 !important;
434
  border: 3px solid #1F2937 !important;
435
  border-radius: 8px !important;
 
446
 
447
  .gr-button-primary:hover,
448
  button.primary:hover,
449
+ .gr-button.primary:hover {
 
450
  background: #2563EB !important;
451
  transform: translate(-2px, -2px) !important;
452
  box-shadow: 7px 7px 0px #1F2937 !important;
 
454
 
455
  .gr-button-primary:active,
456
  button.primary:active,
457
+ .gr-button.primary:active {
 
458
  transform: translate(3px, 3px) !important;
459
  box-shadow: 2px 2px 0px #1F2937 !important;
460
  }
461
 
462
+ /* ===== 🎨 Secondary λ²„νŠΌ - μ½”λ―Ή λ ˆλ“œ ===== */
463
+ .gr-button-secondary,
464
+ button.secondary {
465
+ background: #EF4444 !important;
466
+ border: 3px solid #1F2937 !important;
467
+ border-radius: 8px !important;
468
+ color: #FFFFFF !important;
469
+ font-family: 'Bangers', cursive !important;
470
+ font-weight: 400 !important;
471
+ font-size: 1.1rem !important;
472
+ letter-spacing: 1px !important;
473
+ box-shadow: 4px 4px 0px #1F2937 !important;
474
+ transition: all 0.1s ease !important;
475
+ text-shadow: 1px 1px 0px #1F2937 !important;
476
+ }
477
+
478
+ .gr-button-secondary:hover,
479
+ button.secondary:hover {
480
+ background: #DC2626 !important;
481
+ transform: translate(-2px, -2px) !important;
482
+ box-shadow: 6px 6px 0px #1F2937 !important;
483
+ }
484
+
485
  /* ===== 🎨 둜그 좜λ ₯ μ˜μ—­ ===== */
486
  .info-log textarea {
487
  background: #1F2937 !important;
 
494
  box-shadow: 4px 4px 0px #10B981 !important;
495
  }
496
 
497
+ /* ===== 🎨 λΉ„λ””μ˜€/μ˜€λ””μ˜€ μ˜μ—­ ===== */
 
498
  .gr-video,
499
+ .gr-audio,
500
+ video,
501
+ audio {
502
  border: 4px solid #1F2937 !important;
503
  border-radius: 8px !important;
504
  box-shadow: 8px 8px 0px #1F2937 !important;
 
506
  background: #FFFFFF !important;
507
  }
508
 
509
+ /* ===== 🎨 가러리 μŠ€νƒ€μΌ ===== */
510
+ .gr-gallery {
511
+ background: #FFFFFF !important;
512
+ border: 3px solid #1F2937 !important;
513
+ border-radius: 8px !important;
514
+ box-shadow: 6px 6px 0px #1F2937 !important;
515
+ padding: 10px !important;
516
+ }
517
+
518
+ .gr-gallery .thumbnail-item {
519
+ border: 3px solid #1F2937 !important;
520
+ border-radius: 6px !important;
521
+ transition: all 0.2s ease !important;
522
+ overflow: hidden !important;
523
+ }
524
+
525
+ .gr-gallery .thumbnail-item:hover {
526
+ transform: scale(1.05) !important;
527
+ box-shadow: 4px 4px 0px #3B82F6 !important;
528
+ }
529
+
530
  /* ===== 🎨 μŠ¬λΌμ΄λ” μŠ€νƒ€μΌ ===== */
531
  input[type="range"] {
532
  accent-color: #3B82F6 !important;
 
536
  background: #FFFFFF !important;
537
  }
538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  /* ===== 🎨 라벨 μŠ€νƒ€μΌ ===== */
540
  label,
541
  .gr-input-label,
 
546
  font-size: 1rem !important;
547
  }
548
 
549
+ span.gr-label {
550
+ color: #1F2937 !important;
551
+ }
552
+
553
+ /* ===== 🎨 정보 ν…μŠ€νŠΈ ===== */
554
+ .gr-info,
555
+ .info {
556
+ color: #6B7280 !important;
557
+ font-family: 'Comic Neue', cursive !important;
558
+ font-size: 0.9rem !important;
559
  }
560
 
561
+ /* ===== 🎨 Number Input μŠ€νƒ€μΌ ===== */
562
+ .gr-number input {
563
+ background: #FFFFFF !important;
564
+ border: 3px solid #1F2937 !important;
565
+ border-radius: 8px !important;
566
+ color: #1F2937 !important;
567
+ font-family: 'Comic Neue', cursive !important;
568
+ font-weight: 700 !important;
569
+ box-shadow: 3px 3px 0px #1F2937 !important;
570
+ }
571
+
572
+ /* ===== 🎨 μŠ€ν¬λ‘€λ°” - μ½”λ―Ή μŠ€νƒ€μΌ ===== */
573
  ::-webkit-scrollbar {
574
  width: 12px;
575
  height: 12px;
 
636
  box-shadow: 4px 4px 0px #1F2937 !important;
637
  }
638
 
639
+ .tab-nav button {
640
+ font-size: 0.9rem !important;
641
+ padding: 8px 12px !important;
642
  }
643
  }
644
 
 
651
  """
652
 
653
 
654
+ # Gradio Blocks μΈν„°νŽ˜μ΄μŠ€ 생성
655
+ with gr.Blocks(fill_height=True, css=css, title="MMAudio Studio") as demo:
 
 
656
 
657
  # HOME Badge
658
  gr.HTML("""
 
660
  <a href="https://www.humangen.ai" target="_blank" style="text-decoration: none;">
661
  <img src="https://img.shields.io/static/v1?label=🏠 HOME&message=HUMANGEN.AI&color=0000ff&labelColor=ffcc00&style=for-the-badge" alt="HOME">
662
  </a>
 
 
 
663
  </div>
664
  """)
665
 
666
  # Header Title
667
  gr.Markdown(
668
  """
669
+ # 🎡 MMAUDIO STUDIO 🎬
670
  """,
671
  elem_classes="header-text"
672
  )
673
 
674
  gr.Markdown(
675
  """
676
+ <p class="subtitle">πŸ”Š Generate Audio from Text or Video β€’ Korean Supported! ν•œκΈ€μ§€μ› πŸ‡°πŸ‡·</p>
677
  """,
678
  )
679
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  with gr.Tabs():
681
+ # Tab 1: Video Search
682
+ with gr.TabItem("πŸ” Video Search"):
683
+ gr.Markdown(
684
+ """
685
+ <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;">
686
+ πŸ“Ή Search for videos from Pixabay to use as input!
687
+ </p>
688
+ """
689
+ )
690
+
691
+ with gr.Row():
692
+ with gr.Column(scale=1):
693
+ search_query = gr.Textbox(
694
+ label="πŸ”Ž Search Query (ν•œκΈ€μ§€μ›)" if translator else "πŸ”Ž Search Query",
695
+ placeholder="Enter search keywords...",
696
+ lines=1
697
+ )
698
+ search_btn = gr.Button(
699
+ "πŸ” SEARCH VIDEOS!",
700
+ variant="primary",
701
+ size="lg"
702
+ )
703
+
704
+ search_gallery = gr.Gallery(
705
+ label="πŸ“Ί Search Results",
706
+ columns=4,
707
+ rows=5,
708
+ height=500
709
+ )
710
+
711
+ search_btn.click(
712
+ fn=search_videos,
713
+ inputs=[search_query],
714
+ outputs=[search_gallery]
715
+ )
716
+
717
+ # Tab 2: Video to Audio
718
+ with gr.TabItem("🎬 Video-to-Audio"):
719
+ gr.Markdown(
720
+ """
721
+ <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;">
722
+ πŸŽ₯ Upload a video and generate matching audio!
723
+ </p>
724
+ """
725
+ )
726
+
727
  with gr.Row(equal_height=False):
728
+ with gr.Column(scale=1):
729
+ v2a_video = gr.Video(label="πŸ“Ή Input Video")
730
+ v2a_prompt = gr.Textbox(
731
+ label="✏️ Prompt (ν•œκΈ€μ§€μ›)" if translator else "✏️ Prompt",
732
+ placeholder="Describe the audio you want...",
733
+ lines=2
734
+ )
735
+ v2a_negative = gr.Textbox(
736
+ label="🚫 Negative Prompt",
737
+ value="music",
738
+ lines=1
739
  )
740
 
741
+ with gr.Row():
742
+ v2a_seed = gr.Number(label="🎲 Seed", value=0)
743
+ v2a_steps = gr.Number(label="πŸ”„ Steps", value=25)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
744
 
745
+ with gr.Row():
746
+ v2a_cfg = gr.Number(label="🎯 Guidance Scale", value=4.5)
747
+ v2a_duration = gr.Number(label="⏱️ Duration (sec)", value=8)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
748
 
749
+ v2a_btn = gr.Button(
750
+ "🎬 GENERATE AUDIO! πŸ”Š",
751
  variant="primary",
752
+ size="lg"
 
 
 
 
 
 
 
 
 
753
  )
754
 
755
+ with gr.Accordion("πŸ“œ Generation Log", open=True):
756
+ v2a_log = gr.Textbox(
757
  label="",
758
+ placeholder="Upload video and click generate...",
759
  lines=12,
 
760
  interactive=False,
761
  elem_classes="info-log"
762
  )
763
+
764
+ with gr.Column(scale=1):
765
+ v2a_output = gr.Video(label="πŸŽ₯ Generated Result", height=400)
766
+ gr.Markdown(
767
+ """
768
+ <p style="text-align: center; margin-top: 15px; font-weight: 700; color: #1F2937;">
769
+ πŸ’‘ Right-click on the video to save!
770
+ </p>
771
+ """
772
+ )
773
+
774
+ v2a_btn.click(
775
+ fn=video_to_audio,
776
+ inputs=[v2a_video, v2a_prompt, v2a_negative, v2a_seed, v2a_steps, v2a_cfg, v2a_duration],
777
+ outputs=[v2a_output, v2a_log]
778
+ )
779
 
780
+ # Tab 3: Text to Audio
781
+ with gr.TabItem("🎡 Text-to-Audio"):
782
+ gr.Markdown(
783
+ """
784
+ <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;">
785
+ ✨ Generate audio from text description!
786
+ </p>
787
+ """
788
+ )
789
+
790
  with gr.Row(equal_height=False):
791
+ with gr.Column(scale=1):
792
+ t2a_prompt = gr.Textbox(
793
+ label="✏️ Prompt (ν•œκΈ€μ§€μ›)" if translator else "✏️ Prompt",
794
+ placeholder="Describe the audio you want to generate...",
795
+ lines=3
796
+ )
797
+ t2a_negative = gr.Textbox(
798
+ label="🚫 Negative Prompt",
799
+ placeholder="What to avoid...",
800
+ lines=1
801
  )
802
 
803
+ with gr.Row():
804
+ t2a_seed = gr.Number(label="🎲 Seed", value=0)
805
+ t2a_steps = gr.Number(label="πŸ”„ Steps", value=25)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
806
 
807
+ with gr.Row():
808
+ t2a_cfg = gr.Number(label="🎯 Guidance Scale", value=4.5)
809
+ t2a_duration = gr.Number(label="⏱️ Duration (sec)", value=8)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
810
 
811
+ t2a_btn = gr.Button(
812
+ "🎡 GENERATE AUDIO! ✨",
813
  variant="primary",
814
+ size="lg"
 
 
 
 
 
 
 
 
815
  )
816
 
817
+ with gr.Accordion("πŸ“œ Generation Log", open=True):
818
+ t2a_log = gr.Textbox(
819
  label="",
820
+ placeholder="Enter prompt and click generate...",
821
  lines=12,
 
822
  interactive=False,
823
  elem_classes="info-log"
824
  )
825
+
826
+ with gr.Column(scale=1):
827
+ t2a_output = gr.Audio(label="πŸ”Š Generated Audio")
828
+ gr.Markdown(
829
+ """
830
+ <p style="text-align: center; margin-top: 15px; font-weight: 700; color: #1F2937;">
831
+ πŸ’‘ Click the download button to save!
832
+ </p>
833
+ """
834
+ )
835
+
836
+ t2a_btn.click(
837
+ fn=text_to_audio,
838
+ inputs=[t2a_prompt, t2a_negative, t2a_seed, t2a_steps, t2a_cfg, t2a_duration],
839
+ outputs=[t2a_output, t2a_log]
840
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
841
 
842
 
843
+ # 메인 μ‹€ν–‰
844
  if __name__ == "__main__":
845
+ if translator is None:
846
+ log.warning("Translation model failed to load. Korean translation will be disabled.")
847
+
848
+ demo.launch(allowed_paths=[output_dir])