seawolf2357 commited on
Commit
528db87
·
verified ·
1 Parent(s): b1e5e5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +662 -690
app.py CHANGED
@@ -1,848 +1,820 @@
1
- import spaces
2
- import logging
3
- from datetime import datetime
4
- from pathlib import Path
 
 
 
5
  import gradio as gr
6
- import torch
7
- import torchaudio
8
- import os
9
- import requests
10
- from transformers import pipeline
11
- import tempfile
12
- import numpy as np
13
- from einops import rearrange
14
  import cv2
15
- from scipy.io import wavfile
16
- import librosa
17
- import json
 
18
  from typing import Optional, Tuple, List
19
- import atexit
20
-
21
- # 환경 변수 설정으로 torch.load 체크 우회 (임시 해결책)
22
- os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
23
-
24
- try:
25
- import mmaudio
26
- except ImportError:
27
- os.system("pip install -e .")
28
- import mmaudio
29
-
30
- from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video,
31
- setup_eval_logging)
32
- from mmaudio.model.flow_matching import FlowMatching
33
- from mmaudio.model.networks import MMAudio, get_my_mmaudio
34
- from mmaudio.model.sequence_config import SequenceConfig
35
- from mmaudio.model.utils.features_utils import FeaturesUtils
36
-
37
- # 로깅 설정
38
- logging.basicConfig(
39
- level=logging.INFO,
40
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
41
- )
42
- log = logging.getLogger()
43
-
44
- # CUDA 설정
45
- if torch.cuda.is_available():
46
- device = torch.device("cuda")
47
- torch.backends.cuda.matmul.allow_tf32 = True
48
- torch.backends.cudnn.allow_tf32 = True
49
- torch.backends.cudnn.benchmark = True
50
- else:
51
- device = torch.device("cpu")
52
-
53
- dtype = torch.bfloat16
54
-
55
- # 모델 설정
56
- model: ModelConfig = all_model_cfg['large_44k_v2']
57
- model.download_if_needed()
58
- output_dir = Path('./output/gradio')
59
-
60
- setup_eval_logging()
61
-
62
- # 번역기 설정
63
- try:
64
- translator = pipeline("translation",
65
- model="Helsinki-NLP/opus-mt-ko-en",
66
- device="cpu",
67
- use_fast=True,
68
- trust_remote_code=False)
69
- except Exception as e:
70
- log.warning(f"Failed to load translation model with safetensors: {e}")
71
- try:
72
- translator = pipeline("translation",
73
- model="Helsinki-NLP/opus-mt-ko-en",
74
- device="cpu")
75
- except Exception as e2:
76
- log.error(f"Failed to load translation model: {e2}")
77
- translator = None
78
-
79
- PIXABAY_API_KEY = "33492762-a28a596ec4f286f84cd328b17"
80
-
81
- def cleanup_temp_files():
82
- temp_dir = tempfile.gettempdir()
83
- for file in os.listdir(temp_dir):
84
- if file.endswith(('.mp4', '.flac')):
85
- try:
86
- os.remove(os.path.join(temp_dir, file))
87
- except:
88
- pass
89
-
90
- atexit.register(cleanup_temp_files)
91
-
92
- def get_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
93
- with torch.cuda.device(device):
94
- seq_cfg = model.seq_cfg
95
- net: MMAudio = get_my_mmaudio(model.model_name).to(device, dtype).eval()
96
- net.load_weights(torch.load(model.model_path, map_location=device, weights_only=True))
97
- log.info(f'Loaded weights from {model.model_path}')
98
-
99
- feature_utils = FeaturesUtils(
100
- tod_vae_ckpt=model.vae_path,
101
- synchformer_ckpt=model.synchformer_ckpt,
102
- enable_conditions=True,
103
- mode=model.mode,
104
- bigvgan_vocoder_ckpt=model.bigvgan_16k_path,
105
- need_vae_encoder=False
106
- ).to(device, dtype).eval()
107
-
108
- return net, feature_utils, seq_cfg
109
-
110
- net, feature_utils, seq_cfg = get_model()
111
-
112
- def translate_prompt(text):
113
- try:
114
- if translator is None:
115
- return text
116
-
117
- if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text):
118
- with torch.no_grad():
119
- translation = translator(text)[0]['translation_text']
120
- return translation
121
- return text
122
- except Exception as e:
123
- logging.error(f"Translation error: {e}")
124
- return text
125
 
126
- @torch.no_grad()
127
- def search_videos(query):
128
- try:
129
- query = translate_prompt(query)
130
- return search_pixabay_videos(query, PIXABAY_API_KEY)
131
- except Exception as e:
132
- logging.error(f"Video search error: {e}")
133
- return []
134
 
135
- def search_pixabay_videos(query, api_key):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  try:
137
- base_url = "https://pixabay.com/api/videos/"
138
- params = {
139
- "key": api_key,
140
- "q": query,
141
- "per_page": 40
142
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
- response = requests.get(base_url, params=params)
145
- if response.status_code == 200:
146
- data = response.json()
147
- return [video['videos']['large']['url'] for video in data.get('hits', [])]
148
- return []
149
  except Exception as e:
150
- logging.error(f"Pixabay API error: {e}")
151
- return []
152
-
153
- @spaces.GPU
154
- @torch.inference_mode()
155
- def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
156
- cfg_strength: float, duration: float):
157
- prompt = translate_prompt(prompt)
158
- negative_prompt = translate_prompt(negative_prompt)
159
-
160
- rng = torch.Generator(device=device)
161
- rng.manual_seed(seed)
162
- fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
163
-
164
- clip_frames, sync_frames, duration = load_video(video, duration)
165
- clip_frames = clip_frames.unsqueeze(0)
166
- sync_frames = sync_frames.unsqueeze(0)
167
- seq_cfg.duration = duration
168
- net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
169
-
170
- audios = generate(clip_frames,
171
- sync_frames, [prompt],
172
- negative_text=[negative_prompt],
173
- feature_utils=feature_utils,
174
- net=net,
175
- fm=fm,
176
- rng=rng,
177
- cfg_strength=cfg_strength)
178
- audio = audios.float().cpu()[0]
179
-
180
- video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
181
- make_video(video,
182
- video_save_path,
183
- audio,
184
- sampling_rate=seq_cfg.sampling_rate,
185
- duration_sec=seq_cfg.duration)
186
-
187
- # 정보 로그 생성
188
- info_log = f"""✅ VIDEO TO AUDIO COMPLETE!
 
 
189
  {'=' * 50}
190
- 🎬 Video Info:
191
- Duration: {duration:.2f} seconds
 
192
  {'=' * 50}
193
- ⚙️ Generation Settings:
194
- Seed: {seed}
195
- Steps: {num_steps}
196
- CFG Strength: {cfg_strength}
197
  {'=' * 50}
198
- 📝 Prompts:
199
- Prompt: {prompt[:40]}{'...' if len(prompt) > 40 else ''}
200
- Negative: {negative_prompt[:30]}{'...' if len(negative_prompt) > 30 else ''}
201
  {'=' * 50}
202
- 💾 Video with audio ready!"""
203
-
204
- return video_save_path, info_log
205
-
206
- @spaces.GPU
207
- @torch.inference_mode()
208
- def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int, cfg_strength: float,
209
- duration: float):
210
- prompt = translate_prompt(prompt)
211
- negative_prompt = translate_prompt(negative_prompt)
212
-
213
- rng = torch.Generator(device=device)
214
- rng.manual_seed(seed)
215
- fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
216
-
217
- clip_frames = sync_frames = None
218
- seq_cfg.duration = duration
219
- net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
220
-
221
- audios = generate(clip_frames,
222
- sync_frames, [prompt],
223
- negative_text=[negative_prompt],
224
- feature_utils=feature_utils,
225
- net=net,
226
- fm=fm,
227
- rng=rng,
228
- cfg_strength=cfg_strength)
229
- audio = audios.float().cpu()[0]
230
-
231
- audio_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.flac').name
232
- torchaudio.save(audio_save_path, audio, seq_cfg.sampling_rate)
233
-
234
- # 정보 로그 생성
235
- info_log = f"""✅ TEXT TO AUDIO COMPLETE!
236
  {'=' * 50}
237
- 🎵 Audio Info:
238
- • Duration: {duration:.2f} seconds
239
- Sample Rate: {seq_cfg.sampling_rate} Hz
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  {'=' * 50}
241
- ⚙️ Generation Settings:
242
- Seed: {seed}
243
- Steps: {num_steps}
244
- • CFG Strength: {cfg_strength}
245
  {'=' * 50}
246
- 📝 Prompts:
247
- Prompt: {prompt[:40]}{'...' if len(prompt) > 40 else ''}
248
- Negative: {negative_prompt[:30]}{'...' if len(negative_prompt) > 30 else ''}
 
249
  {'=' * 50}
250
- 💾 Audio ready to download!"""
 
 
 
 
 
 
 
 
 
 
251
 
252
- return audio_save_path, info_log
 
253
 
254
 
255
  # ============================================
256
- # 🎨 Comic Classic Theme - Toon Playground
257
  # ============================================
258
-
259
- css = """
260
- /* ===== 🎨 Google Fonts Import ===== */
261
  @import url('https://fonts.googleapis.com/css2?family=Bangers&family=Comic+Neue:wght@400;700&display=swap');
262
 
263
- /* ===== 🎨 Comic Classic 배경 - 빈티지 페이퍼 + 도트 패턴 ===== */
264
  .gradio-container {
265
  background-color: #FEF9C3 !important;
266
- background-image:
267
- radial-gradient(#1F2937 1px, transparent 1px) !important;
268
  background-size: 20px 20px !important;
269
  min-height: 100vh !important;
270
  font-family: 'Comic Neue', cursive, sans-serif !important;
271
  }
272
 
273
- /* ===== 허깅페이스 상단 요소 숨김 ===== */
274
- .huggingface-space-header,
275
- #space-header,
276
- .space-header,
277
- [class*="space-header"],
278
- .svelte-1ed2p3z,
279
- .space-header-badge,
280
- .header-badge,
281
- [data-testid="space-header"],
282
- .svelte-kqij2n,
283
- .svelte-1ax1toq,
284
- .embed-container > div:first-child {
285
  display: none !important;
286
- visibility: hidden !important;
287
- height: 0 !important;
288
- width: 0 !important;
289
- overflow: hidden !important;
290
- opacity: 0 !important;
291
- pointer-events: none !important;
292
  }
293
 
294
- /* ===== Footer 완전 숨김 ===== */
295
- footer,
296
- .footer,
297
- .gradio-container footer,
298
- .built-with,
299
- [class*="footer"],
300
- .gradio-footer,
301
- .main-footer,
302
- div[class*="footer"],
303
- .show-api,
304
- .built-with-gradio,
305
- a[href*="gradio.app"],
306
- a[href*="huggingface.co/spaces"] {
307
  display: none !important;
308
- visibility: hidden !important;
309
- height: 0 !important;
310
- padding: 0 !important;
311
- margin: 0 !important;
312
  }
313
 
314
- /* ===== 메인 컨테이너 ===== */
315
  #col-container {
316
- max-width: 1200px;
317
  margin: 0 auto;
318
  }
319
 
320
- /* ===== 🎨 헤더 타이틀 - 코믹 스타일 ===== */
321
  .header-text h1 {
322
  font-family: 'Bangers', cursive !important;
323
  color: #1F2937 !important;
324
  font-size: 3.5rem !important;
325
- font-weight: 400 !important;
326
  text-align: center !important;
327
- margin-bottom: 0.5rem !important;
328
- text-shadow:
329
- 4px 4px 0px #FACC15,
330
- 6px 6px 0px #1F2937 !important;
331
  letter-spacing: 3px !important;
332
  -webkit-text-stroke: 2px #1F2937 !important;
333
  }
334
 
335
- /* ===== 🎨 서브타이틀 ===== */
336
  .subtitle {
337
  text-align: center !important;
338
  font-family: 'Comic Neue', cursive !important;
339
  font-size: 1.2rem !important;
340
  color: #1F2937 !important;
341
- margin-bottom: 1.5rem !important;
342
  font-weight: 700 !important;
343
  }
344
 
345
- /* ===== 🎨 탭 스타일 ===== */
346
- .tabs {
347
- background: #FFFFFF !important;
 
 
 
 
 
 
 
 
348
  border: 3px solid #1F2937 !important;
349
  border-radius: 12px !important;
350
- box-shadow: 6px 6px 0px #1F2937 !important;
351
- padding: 10px !important;
 
 
352
  }
353
 
354
- .tab-nav {
355
- background: #FACC15 !important;
 
 
 
 
 
356
  border-radius: 8px !important;
357
- padding: 5px !important;
358
- border: 2px solid #1F2937 !important;
359
  }
360
 
361
- .tab-nav button {
362
- font-family: 'Bangers', cursive !important;
363
- font-size: 1.1rem !important;
364
- letter-spacing: 1px !important;
365
- color: #1F2937 !important;
366
- background: transparent !important;
367
- border: none !important;
368
- padding: 10px 20px !important;
369
- border-radius: 6px !important;
370
- transition: all 0.2s ease !important;
371
  }
372
 
373
- .tab-nav button:hover {
374
- background: #FEF3C7 !important;
 
 
 
375
  }
376
 
377
- .tab-nav button.selected {
378
- background: #3B82F6 !important;
379
- color: #FFFFFF !important;
380
- box-shadow: 3px 3px 0px #1F2937 !important;
381
  }
382
 
383
- /* ===== 🎨 카드/패널 - 만화 프레임 스타일 ===== */
384
- .gr-panel,
385
- .gr-box,
386
- .gr-form,
387
- .block,
388
- .gr-group {
389
- background: #FFFFFF !important;
390
- border: 3px solid #1F2937 !important;
391
- border-radius: 8px !important;
392
- box-shadow: 6px 6px 0px #1F2937 !important;
393
- transition: all 0.2s ease !important;
394
  }
395
 
396
- .gr-panel:hover,
397
- .block:hover {
398
- transform: translate(-2px, -2px) !important;
399
- box-shadow: 8px 8px 0px #1F2937 !important;
400
  }
401
 
402
- /* ===== 🎨 입력 필드 (Textbox) ===== */
403
- textarea,
404
- input[type="text"],
405
- input[type="number"] {
406
  background: #FFFFFF !important;
407
  border: 3px solid #1F2937 !important;
408
  border-radius: 8px !important;
409
- color: #1F2937 !important;
410
  font-family: 'Comic Neue', cursive !important;
411
- font-size: 1rem !important;
412
  font-weight: 700 !important;
413
- transition: all 0.2s ease !important;
414
  }
415
 
416
- textarea:focus,
417
- input[type="text"]:focus,
418
- input[type="number"]:focus {
419
  border-color: #3B82F6 !important;
420
  box-shadow: 4px 4px 0px #3B82F6 !important;
421
- outline: none !important;
422
  }
423
 
424
- textarea::placeholder {
425
- color: #9CA3AF !important;
426
- font-weight: 400 !important;
 
 
 
427
  }
428
 
429
- /* ===== 🎨 Primary 버튼 - 코믹 블루 ===== */
430
- .gr-button-primary,
431
- button.primary,
432
- .gr-button.primary {
433
  background: #3B82F6 !important;
434
  border: 3px solid #1F2937 !important;
435
  border-radius: 8px !important;
436
  color: #FFFFFF !important;
437
  font-family: 'Bangers', cursive !important;
438
- font-weight: 400 !important;
439
  font-size: 1.3rem !important;
440
  letter-spacing: 2px !important;
441
  padding: 14px 28px !important;
442
  box-shadow: 5px 5px 0px #1F2937 !important;
443
- transition: all 0.1s ease !important;
444
- text-shadow: 1px 1px 0px #1F2937 !important;
445
  }
446
 
447
- .gr-button-primary:hover,
448
- button.primary:hover,
449
- .gr-button.primary:hover {
450
  background: #2563EB !important;
451
  transform: translate(-2px, -2px) !important;
452
  box-shadow: 7px 7px 0px #1F2937 !important;
453
  }
454
 
455
- .gr-button-primary:active,
456
- button.primary:active,
457
- .gr-button.primary:active {
458
  transform: translate(3px, 3px) !important;
459
  box-shadow: 2px 2px 0px #1F2937 !important;
460
  }
461
 
462
- /* ===== 🎨 Secondary 버튼 - 코믹 레드 ===== */
463
- .gr-button-secondary,
464
- button.secondary {
465
- background: #EF4444 !important;
466
- border: 3px solid #1F2937 !important;
467
- border-radius: 8px !important;
468
- color: #FFFFFF !important;
469
- font-family: 'Bangers', cursive !important;
470
- font-weight: 400 !important;
471
- font-size: 1.1rem !important;
472
- letter-spacing: 1px !important;
473
- box-shadow: 4px 4px 0px #1F2937 !important;
474
- transition: all 0.1s ease !important;
475
- text-shadow: 1px 1px 0px #1F2937 !important;
476
- }
477
-
478
- .gr-button-secondary:hover,
479
- button.secondary:hover {
480
- background: #DC2626 !important;
481
- transform: translate(-2px, -2px) !important;
482
- box-shadow: 6px 6px 0px #1F2937 !important;
483
- }
484
-
485
- /* ===== 🎨 로그 출력 영역 ===== */
486
  .info-log textarea {
487
  background: #1F2937 !important;
488
  color: #10B981 !important;
489
  font-family: 'Courier New', monospace !important;
490
- font-size: 0.9rem !important;
491
- font-weight: 400 !important;
492
  border: 3px solid #10B981 !important;
493
- border-radius: 8px !important;
494
  box-shadow: 4px 4px 0px #10B981 !important;
495
  }
496
 
497
- /* ===== 🎨 비디오/오디오 영역 ===== */
498
- .gr-video,
499
- .gr-audio,
500
- video,
501
- audio {
502
  border: 4px solid #1F2937 !important;
503
  border-radius: 8px !important;
504
  box-shadow: 8px 8px 0px #1F2937 !important;
505
- overflow: hidden !important;
506
- background: #FFFFFF !important;
507
  }
508
 
509
- /* ===== 🎨 갤러리 스타일 ===== */
510
- .gr-gallery {
511
- background: #FFFFFF !important;
512
  border: 3px solid #1F2937 !important;
513
  border-radius: 8px !important;
514
- box-shadow: 6px 6px 0px #1F2937 !important;
515
- padding: 10px !important;
516
- }
517
-
518
- .gr-gallery .thumbnail-item {
519
- border: 3px solid #1F2937 !important;
520
- border-radius: 6px !important;
521
- transition: all 0.2s ease !important;
522
- overflow: hidden !important;
523
- }
524
-
525
- .gr-gallery .thumbnail-item:hover {
526
- transform: scale(1.05) !important;
527
- box-shadow: 4px 4px 0px #3B82F6 !important;
528
- }
529
-
530
- /* ===== 🎨 슬라이더 스타일 ===== */
531
- input[type="range"] {
532
- accent-color: #3B82F6 !important;
533
- }
534
-
535
- .gr-slider {
536
- background: #FFFFFF !important;
537
- }
538
-
539
- /* ===== 🎨 라벨 스타일 ===== */
540
- label,
541
- .gr-input-label,
542
- .gr-block-label {
543
- color: #1F2937 !important;
544
- font-family: 'Comic Neue', cursive !important;
545
- font-weight: 700 !important;
546
- font-size: 1rem !important;
547
- }
548
-
549
- span.gr-label {
550
- color: #1F2937 !important;
551
- }
552
-
553
- /* ===== 🎨 정보 텍스트 ===== */
554
- .gr-info,
555
- .info {
556
- color: #6B7280 !important;
557
- font-family: 'Comic Neue', cursive !important;
558
- font-size: 0.9rem !important;
559
  }
560
 
561
- /* ===== 🎨 Number Input 스타일 ===== */
562
- .gr-number input {
563
- background: #FFFFFF !important;
564
- border: 3px solid #1F2937 !important;
565
- border-radius: 8px !important;
566
  color: #1F2937 !important;
567
  font-family: 'Comic Neue', cursive !important;
568
  font-weight: 700 !important;
569
- box-shadow: 3px 3px 0px #1F2937 !important;
570
  }
571
 
572
- /* ===== 🎨 스크롤바 - 코믹 스타일 ===== */
573
- ::-webkit-scrollbar {
574
- width: 12px;
575
- height: 12px;
576
- }
577
 
578
- ::-webkit-scrollbar-track {
579
- background: #FEF9C3;
580
- border: 2px solid #1F2937;
581
- }
582
 
583
- ::-webkit-scrollbar-thumb {
584
- background: #3B82F6;
585
- border: 2px solid #1F2937;
586
- border-radius: 0px;
587
- }
588
-
589
- ::-webkit-scrollbar-thumb:hover {
590
- background: #EF4444;
591
- }
592
 
593
- /* ===== 🎨 선택 하이라이트 ===== */
594
- ::selection {
595
- background: #FACC15;
596
- color: #1F2937;
597
- }
598
-
599
- /* ===== 🎨 링크 스타일 ===== */
600
- a {
601
- color: #3B82F6 !important;
602
- text-decoration: none !important;
603
- font-weight: 700 !important;
604
- }
605
-
606
- a:hover {
607
- color: #EF4444 !important;
608
- }
609
-
610
- /* ===== 🎨 Row/Column 간격 ===== */
611
- .gr-row {
612
- gap: 1.5rem !important;
613
- }
614
-
615
- .gr-column {
616
- gap: 1rem !important;
617
- }
618
-
619
- /* ===== 반응형 조정 ===== */
620
  @media (max-width: 768px) {
621
- .header-text h1 {
622
- font-size: 2.2rem !important;
623
- text-shadow:
624
- 3px 3px 0px #FACC15,
625
- 4px 4px 0px #1F2937 !important;
626
- }
627
-
628
- .gr-button-primary,
629
- button.primary {
630
- padding: 12px 20px !important;
631
- font-size: 1.1rem !important;
632
- }
633
-
634
- .gr-panel,
635
- .block {
636
- box-shadow: 4px 4px 0px #1F2937 !important;
637
- }
638
-
639
- .tab-nav button {
640
- font-size: 0.9rem !important;
641
- padding: 8px 12px !important;
642
- }
643
- }
644
-
645
- /* ===== 🎨 다크모드 비활성화 ===== */
646
- @media (prefers-color-scheme: dark) {
647
- .gradio-container {
648
- background-color: #FEF9C3 !important;
649
- }
650
  }
651
  """
652
 
653
 
654
- # Gradio Blocks 인터페이스 생성
655
- with gr.Blocks(fill_height=True, css=css, title="MMAudio Studio") as demo:
656
-
657
- # HOME Badge
 
658
  gr.HTML("""
659
  <div style="text-align: center; margin: 20px 0 10px 0;">
660
- <a href="https://www.humangen.ai" target="_blank" style="text-decoration: none;">
661
  <img src="https://img.shields.io/static/v1?label=🏠 HOME&message=HUMANGEN.AI&color=0000ff&labelColor=ffcc00&style=for-the-badge" alt="HOME">
662
  </a>
 
 
 
663
  </div>
664
  """)
665
 
666
- # Header Title
667
- gr.Markdown(
668
- """
669
- # 🎵 MMAUDIO STUDIO 🎬
670
- """,
671
- elem_classes="header-text"
672
- )
673
 
674
- gr.Markdown(
675
- """
676
- <p class="subtitle">🔊 Generate Audio from Text or Video • Korean Supported! 한글지원 🇰🇷</p>
677
- """,
678
- )
 
 
 
679
 
680
- with gr.Tabs():
681
- # Tab 1: Video Search
682
- with gr.TabItem("🔍 Video Search"):
683
- gr.Markdown(
684
- """
685
- <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;">
686
- 📹 Search for videos from Pixabay to use as input!
687
- </p>
688
- """
689
- )
690
-
691
- with gr.Row():
692
- with gr.Column(scale=1):
693
- search_query = gr.Textbox(
694
- label="🔎 Search Query (한글지원)" if translator else "🔎 Search Query",
695
- placeholder="Enter search keywords...",
696
- lines=1
697
- )
698
- search_btn = gr.Button(
699
- "🔍 SEARCH VIDEOS!",
700
- variant="primary",
701
- size="lg"
702
- )
703
-
704
- search_gallery = gr.Gallery(
705
- label="📺 Search Results",
706
- columns=4,
707
- rows=5,
708
- height=500
709
- )
710
-
711
- search_btn.click(
712
- fn=search_videos,
713
- inputs=[search_query],
714
- outputs=[search_gallery]
715
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716
 
717
- # Tab 2: Video to Audio
718
- with gr.TabItem("🎬 Video-to-Audio"):
719
- gr.Markdown(
720
- """
721
- <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;">
722
- 🎥 Upload a video and generate matching audio!
723
- </p>
724
- """
725
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
 
727
- with gr.Row(equal_height=False):
728
- with gr.Column(scale=1):
729
- v2a_video = gr.Video(label="📹 Input Video")
730
- v2a_prompt = gr.Textbox(
731
- label="✏️ Prompt (한글지원)" if translator else "✏️ Prompt",
732
- placeholder="Describe the audio you want...",
733
- lines=2
734
- )
735
- v2a_negative = gr.Textbox(
736
- label="🚫 Negative Prompt",
737
- value="music",
738
- lines=1
739
- )
740
 
741
- with gr.Row():
742
- v2a_seed = gr.Number(label="🎲 Seed", value=0)
743
- v2a_steps = gr.Number(label="🔄 Steps", value=25)
744
-
745
- with gr.Row():
746
- v2a_cfg = gr.Number(label="🎯 Guidance Scale", value=4.5)
747
- v2a_duration = gr.Number(label="⏱️ Duration (sec)", value=8)
748
-
749
- v2a_btn = gr.Button(
750
- "🎬 GENERATE AUDIO! 🔊",
751
- variant="primary",
752
- size="lg"
753
- )
 
 
 
 
754
 
755
- with gr.Accordion("📜 Generation Log", open=True):
756
- v2a_log = gr.Textbox(
757
- label="",
758
- placeholder="Upload video and click generate...",
759
- lines=12,
760
- interactive=False,
761
- elem_classes="info-log"
762
  )
763
-
764
- with gr.Column(scale=1):
765
- v2a_output = gr.Video(label="🎥 Generated Result", height=400)
766
- gr.Markdown(
767
- """
768
- <p style="text-align: center; margin-top: 15px; font-weight: 700; color: #1F2937;">
769
- 💡 Right-click on the video to save!
770
- </p>
771
- """
772
- )
773
 
774
- v2a_btn.click(
775
- fn=video_to_audio,
776
- inputs=[v2a_video, v2a_prompt, v2a_negative, v2a_seed, v2a_steps, v2a_cfg, v2a_duration],
777
- outputs=[v2a_output, v2a_log]
778
- )
779
-
780
- # Tab 3: Text to Audio
781
- with gr.TabItem("🎵 Text-to-Audio"):
782
- gr.Markdown(
783
- """
784
- <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;">
785
- ✨ Generate audio from text description!
786
- </p>
787
- """
788
- )
789
-
790
- with gr.Row(equal_height=False):
791
- with gr.Column(scale=1):
792
- t2a_prompt = gr.Textbox(
793
- label="✏️ Prompt (한글지원)" if translator else "✏️ Prompt",
794
- placeholder="Describe the audio you want to generate...",
795
- lines=3
796
- )
797
- t2a_negative = gr.Textbox(
798
- label="🚫 Negative Prompt",
799
- placeholder="What to avoid...",
800
- lines=1
801
- )
802
-
803
- with gr.Row():
804
- t2a_seed = gr.Number(label="🎲 Seed", value=0)
805
- t2a_steps = gr.Number(label="🔄 Steps", value=25)
806
 
807
- with gr.Row():
808
- t2a_cfg = gr.Number(label="🎯 Guidance Scale", value=4.5)
809
- t2a_duration = gr.Number(label="⏱️ Duration (sec)", value=8)
810
-
811
- t2a_btn = gr.Button(
812
- "🎵 GENERATE AUDIO! ✨",
813
- variant="primary",
814
- size="lg"
815
- )
 
 
 
 
 
 
 
816
 
817
- with gr.Accordion("📜 Generation Log", open=True):
818
- t2a_log = gr.Textbox(
819
- label="",
820
- placeholder="Enter prompt and click generate...",
821
- lines=12,
822
- interactive=False,
823
- elem_classes="info-log"
824
  )
825
-
826
- with gr.Column(scale=1):
827
- t2a_output = gr.Audio(label="🔊 Generated Audio")
828
- gr.Markdown(
829
- """
830
- <p style="text-align: center; margin-top: 15px; font-weight: 700; color: #1F2937;">
831
- 💡 Click the download button to save!
832
- </p>
833
- """
834
- )
835
-
836
- t2a_btn.click(
837
- fn=text_to_audio,
838
- inputs=[t2a_prompt, t2a_negative, t2a_seed, t2a_steps, t2a_cfg, t2a_duration],
839
- outputs=[t2a_output, t2a_log]
840
- )
 
 
 
 
 
 
 
 
 
 
 
 
841
 
842
 
843
- # 메인 실행
 
 
844
  if __name__ == "__main__":
845
- if translator is None:
846
- log.warning("Translation model failed to load. Korean translation will be disabled.")
847
-
848
- demo.launch(allowed_paths=[output_dir])
 
1
+ """
2
+ ANSIM BLUR - Face Privacy Protection
3
+ =====================================
4
+ Advanced AI-Powered Face Detection & Privacy Protection
5
+ Using YOLOv8 for face detection with Gaussian/Mosaic blur options
6
+ """
7
+
8
  import gradio as gr
 
 
 
 
 
 
 
 
9
  import cv2
10
+ import numpy as np
11
+ import tempfile
12
+ import os
13
+ from pathlib import Path
14
  from typing import Optional, Tuple, List
15
+ import torch
16
+ from PIL import Image
17
+
18
+ # ============================================
19
+ # Constants & Configuration
20
+ # ============================================
21
+ BLUR_MODES = ["Gaussian Blur", "Mosaic Effect"]
22
+
23
+ DEFAULT_CONFIG = {
24
+ "confidence": 0.25,
25
+ "iou": 0.45,
26
+ "expand_ratio": 0.05,
27
+ "blur_intensity": 51,
28
+ "mosaic_size": 15,
29
+ }
30
+
31
+ SLIDER_CONFIG = {
32
+ "confidence": {"minimum": 0.05, "maximum": 0.9, "step": 0.01},
33
+ "iou": {"minimum": 0.1, "maximum": 0.9, "step": 0.01},
34
+ "expand": {"minimum": 0.0, "maximum": 0.5, "step": 0.01},
35
+ "blur": {"minimum": 15, "maximum": 151, "step": 2},
36
+ "mosaic": {"minimum": 5, "maximum": 40, "step": 1},
37
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
 
 
 
 
 
 
 
 
39
 
40
+ # ============================================
41
+ # Model Manager
42
+ # ============================================
43
+ class FaceDetector:
44
+ """YOLOv8 Face Detection Model Manager"""
45
+
46
+ def __init__(self, model_path: str = "yolov8-face-hf.pt"):
47
+ self.model = None
48
+ self.device = self._get_device()
49
+ self._load_model(model_path)
50
+
51
+ def _get_device(self) -> str:
52
+ """Determine the best available device"""
53
+ if torch.cuda.is_available():
54
+ return "cuda"
55
+ elif torch.backends.mps.is_available():
56
+ return "mps"
57
+ return "cpu"
58
+
59
+ def _load_model(self, model_path: str) -> None:
60
+ """Load YOLO model"""
61
+ from ultralytics import YOLO
62
+ self.model = YOLO(model_path)
63
+ self.model.to(self.device)
64
+
65
+ def detect(self, image: np.ndarray, conf: float, iou: float) -> List:
66
+ """Run face detection on image"""
67
+ with torch.no_grad():
68
+ results = self.model.predict(
69
+ image,
70
+ conf=conf,
71
+ iou=iou,
72
+ verbose=False,
73
+ device=self.device
74
+ )
75
+ return results
76
+
77
+
78
+ # Initialize global detector
79
+ detector = FaceDetector()
80
+
81
+
82
+ # ============================================
83
+ # Image Processing Functions
84
+ # ============================================
85
+ def ensure_odd(x: int) -> int:
86
+ """Ensure kernel size is odd for OpenCV"""
87
+ return x if x % 2 == 1 else x + 1
88
+
89
+
90
+ def get_even_dimensions(w: int, h: int) -> Tuple[int, int]:
91
+ """Ensure video dimensions are even for codec compatibility"""
92
+ return (w if w % 2 == 0 else w - 1, h if h % 2 == 0 else h - 1)
93
+
94
+
95
+ def apply_blur(
96
+ face_roi: np.ndarray,
97
+ mode: str,
98
+ blur_kernel: int,
99
+ mosaic_size: int = 15
100
+ ) -> np.ndarray:
101
+ """Apply blur or mosaic effect to face region"""
102
+ if face_roi.size == 0:
103
+ return face_roi
104
+
105
+ if mode == "Gaussian Blur":
106
+ k = ensure_odd(max(blur_kernel, 15))
107
+ return cv2.GaussianBlur(face_roi, (k, k), 0)
108
+ else: # Mosaic Effect
109
+ m = max(2, mosaic_size)
110
+ h, w = face_roi.shape[:2]
111
+ small = cv2.resize(
112
+ face_roi,
113
+ (max(1, w // m), max(1, h // m)),
114
+ interpolation=cv2.INTER_LINEAR
115
+ )
116
+ return cv2.resize(small, (w, h), interpolation=cv2.INTER_NEAREST)
117
+
118
+
119
+ def expand_bbox(
120
+ x1: int, y1: int, x2: int, y2: int,
121
+ expand_ratio: float,
122
+ img_w: int, img_h: int
123
+ ) -> Tuple[int, int, int, int]:
124
+ """Expand bounding box by ratio and clip to image bounds"""
125
+ if expand_ratio > 0:
126
+ bw, bh = x2 - x1, y2 - y1
127
+ dx, dy = int(bw * expand_ratio), int(bh * expand_ratio)
128
+ x1, y1 = x1 - dx, y1 - dy
129
+ x2, y2 = x2 + dx, y2 + dy
130
+
131
+ # Clip to image bounds
132
+ x1 = max(0, min(img_w, x1))
133
+ x2 = max(0, min(img_w, x2))
134
+ y1 = max(0, min(img_h, y1))
135
+ y2 = max(0, min(img_h, y2))
136
+
137
+ return x1, y1, x2, y2
138
+
139
+
140
+ def blur_faces_in_image(
141
+ image_bgr: np.ndarray,
142
+ conf: float,
143
+ iou: float,
144
+ expand_ratio: float,
145
+ mode: str,
146
+ blur_kernel: int,
147
+ mosaic_size: int
148
+ ) -> Tuple[np.ndarray, int]:
149
+ """Detect and blur faces in a single image"""
150
+ h, w = image_bgr.shape[:2]
151
+ face_count = 0
152
+
153
+ results = detector.detect(image_bgr, conf, iou)
154
+
155
+ for r in results:
156
+ boxes = r.boxes.xyxy.cpu().numpy() if hasattr(r.boxes, "xyxy") else []
157
+ face_count = len(boxes)
158
+
159
+ for box in boxes:
160
+ x1, y1, x2, y2 = map(int, box[:4])
161
+ x1, y1, x2, y2 = expand_bbox(x1, y1, x2, y2, expand_ratio, w, h)
162
+
163
+ if x2 <= x1 or y2 <= y1:
164
+ continue
165
+
166
+ roi = image_bgr[y1:y2, x1:x2]
167
+ image_bgr[y1:y2, x1:x2] = apply_blur(roi, mode, blur_kernel, mosaic_size)
168
+
169
+ return image_bgr, face_count
170
+
171
+
172
+ def blur_faces_in_video(
173
+ input_path: str,
174
+ conf: float,
175
+ iou: float,
176
+ expand_ratio: float,
177
+ mode: str,
178
+ blur_kernel: int,
179
+ mosaic_size: int,
180
+ progress: gr.Progress
181
+ ) -> Tuple[str, int, int]:
182
+ """Process video file and blur all detected faces"""
183
+ from moviepy.editor import VideoFileClip
184
+
185
+ cap = cv2.VideoCapture(input_path)
186
+ if not cap.isOpened():
187
+ raise IOError("Cannot open video file")
188
+
189
+ # Get video properties
190
+ in_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
191
+ in_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
192
+ fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
193
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
194
+
195
+ out_w, out_h = get_even_dimensions(in_w, in_h)
196
+
197
+ # Create temp files
198
+ temp_video = tempfile.NamedTemporaryFile(delete=False, suffix="_temp.mp4")
199
+ output_video = tempfile.NamedTemporaryFile(delete=False, suffix="_blurred.mp4")
200
+ temp_path = temp_video.name
201
+ output_path = output_video.name
202
+ temp_video.close()
203
+ output_video.close()
204
+
205
+ # Setup video writer
206
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
207
+ writer = cv2.VideoWriter(temp_path, fourcc, fps, (out_w, out_h))
208
+
209
+ frame_idx = 0
210
+ total_faces = 0
211
+
212
  try:
213
+ while True:
214
+ ret, frame = cap.read()
215
+ if not ret:
216
+ break
217
+
218
+ frame = cv2.resize(frame, (out_w, out_h))
219
+ h, w = frame.shape[:2]
220
+
221
+ # Detect faces
222
+ results = detector.detect(frame, conf, iou)
223
+
224
+ if results:
225
+ r = results[0]
226
+ boxes = r.boxes.xyxy.cpu().numpy() if hasattr(r.boxes, "xyxy") else []
227
+ total_faces += len(boxes)
228
+
229
+ for box in boxes:
230
+ x1, y1, x2, y2 = map(int, box[:4])
231
+ x1, y1, x2, y2 = expand_bbox(x1, y1, x2, y2, expand_ratio, w, h)
232
+
233
+ if x2 <= x1 or y2 <= y1:
234
+ continue
235
+
236
+ roi = frame[y1:y2, x1:x2]
237
+ frame[y1:y2, x1:x2] = apply_blur(roi, mode, blur_kernel, mosaic_size)
238
+
239
+ writer.write(frame)
240
+ frame_idx += 1
241
+
242
+ if total_frames > 0:
243
+ progress(frame_idx / total_frames, desc=f"Processing frame {frame_idx}/{total_frames}")
244
+
245
+ finally:
246
+ cap.release()
247
+ writer.release()
248
+
249
+ # Merge audio from original video
250
+ try:
251
+ progress(0.95, desc="Merging audio...")
252
+ original = VideoFileClip(input_path)
253
+ processed = VideoFileClip(temp_path).set_audio(original.audio)
254
+ processed.write_videofile(
255
+ output_path,
256
+ codec="libx264",
257
+ audio_codec="aac",
258
+ threads=1,
259
+ logger=None
260
+ )
261
+ original.close()
262
+ processed.close()
263
+
264
+ # Clean up temp file
265
+ if os.path.exists(temp_path):
266
+ os.remove(temp_path)
267
 
268
+ return output_path, total_faces, total_frames
269
+
 
 
 
270
  except Exception as e:
271
+ print(f"Audio merging failed: {e}")
272
+ return temp_path, total_faces, total_frames
273
+
274
+
275
+ # ============================================
276
+ # Gradio Processing Handlers
277
+ # ============================================
278
+ def process_image(
279
+ image: Optional[Image.Image],
280
+ conf: float,
281
+ iou: float,
282
+ expand_ratio: float,
283
+ mode_choice: str,
284
+ blur_intensity: int,
285
+ mosaic_size: int
286
+ ) -> Tuple[Optional[Image.Image], str]:
287
+ """Main image processing handler"""
288
+ if image is None:
289
+ return None, "⚠️ Please upload an image first!"
290
+
291
+ # Convert PIL to BGR
292
+ image_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
293
+ h, w = image_bgr.shape[:2]
294
+
295
+ # Set blur parameters based on mode
296
+ blur_kernel = blur_intensity if mode_choice == "Gaussian Blur" else 51
297
+ mosaic = mosaic_size if mode_choice == "Mosaic Effect" else 15
298
+
299
+ # Process image
300
+ result_bgr, face_count = blur_faces_in_image(
301
+ image_bgr.copy(), conf, iou, expand_ratio,
302
+ mode_choice, blur_kernel, mosaic
303
+ )
304
+
305
+ # Convert back to RGB
306
+ result_rgb = cv2.cvtColor(result_bgr, cv2.COLOR_BGR2RGB)
307
+ result_pil = Image.fromarray(result_rgb)
308
+
309
+ # Generate log
310
+ intensity_value = blur_intensity if mode_choice == "Gaussian Blur" else mosaic_size
311
+ info_log = f"""✅ IMAGE PROCESSING COMPLETE!
312
  {'=' * 50}
313
+ 🖼️ Image Info:
314
+ Size: {w} x {h} pixels
315
+ • Format: RGB
316
  {'=' * 50}
317
+ 🔍 Detection Settings:
318
+ Confidence: {conf}
319
+ IoU Threshold: {iou}
320
+ Box Expansion: {expand_ratio}
321
  {'=' * 50}
322
+ 🎨 Blur Settings:
323
+ Style: {mode_choice}
324
+ Intensity: {intensity_value}
325
  {'=' * 50}
326
+ 👤 Results:
327
+ • Faces Detected: {face_count}
328
+ Faces Blurred: {face_count}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  {'=' * 50}
330
+ 💾 Ready to download!"""
331
+
332
+ return result_pil, info_log
333
+
334
+
335
+ def process_video(
336
+ video: Optional[str],
337
+ conf: float,
338
+ iou: float,
339
+ expand_ratio: float,
340
+ mode_choice: str,
341
+ blur_intensity: int,
342
+ mosaic_size: int,
343
+ progress: gr.Progress = gr.Progress()
344
+ ) -> Tuple[Optional[str], str]:
345
+ """Main video processing handler"""
346
+ if video is None:
347
+ return None, "⚠️ Please upload a video first!"
348
+
349
+ # Set blur parameters based on mode
350
+ blur_kernel = blur_intensity if mode_choice == "Gaussian Blur" else 51
351
+ mosaic = mosaic_size if mode_choice == "Mosaic Effect" else 15
352
+
353
+ try:
354
+ output_path, total_faces, total_frames = blur_faces_in_video(
355
+ video, conf, iou, expand_ratio,
356
+ mode_choice, blur_kernel, mosaic, progress
357
+ )
358
+
359
+ intensity_value = blur_intensity if mode_choice == "Gaussian Blur" else mosaic_size
360
+ info_log = f"""✅ VIDEO PROCESSING COMPLETE!
361
  {'=' * 50}
362
+ 🎥 Video Info:
363
+ Total Frames: {total_frames}
364
+ Output Path: {os.path.basename(output_path)}
 
365
  {'=' * 50}
366
+ 🔍 Detection Settings:
367
+ Confidence: {conf}
368
+ IoU Threshold: {iou}
369
+ • Box Expansion: {expand_ratio}
370
  {'=' * 50}
371
+ 🎨 Blur Settings:
372
+ • Style: {mode_choice}
373
+ • Intensity: {intensity_value}
374
+ {'=' * 50}
375
+ 👤 Results:
376
+ • Total Faces Detected: {total_faces}
377
+ • Frames Processed: {total_frames}
378
+ {'=' * 50}
379
+ 💾 Ready to download!"""
380
+
381
+ return output_path, info_log
382
 
383
+ except Exception as e:
384
+ return None, f"❌ Error: {str(e)}"
385
 
386
 
387
  # ============================================
388
+ # CSS Styling - Comic Classic Theme
389
  # ============================================
390
+ CSS = """
391
+ /* Google Fonts */
 
392
  @import url('https://fonts.googleapis.com/css2?family=Bangers&family=Comic+Neue:wght@400;700&display=swap');
393
 
394
+ /* Background */
395
  .gradio-container {
396
  background-color: #FEF9C3 !important;
397
+ background-image: radial-gradient(#1F2937 1px, transparent 1px) !important;
 
398
  background-size: 20px 20px !important;
399
  min-height: 100vh !important;
400
  font-family: 'Comic Neue', cursive, sans-serif !important;
401
  }
402
 
403
+ /* Hide HuggingFace header */
404
+ .huggingface-space-header, #space-header, .space-header,
405
+ [class*="space-header"], .svelte-1ed2p3z, .space-header-badge {
 
 
 
 
 
 
 
 
 
406
  display: none !important;
 
 
 
 
 
 
407
  }
408
 
409
+ /* Hide footer */
410
+ footer, .footer, .gradio-container footer, .built-with,
411
+ [class*="footer"], .show-api, .built-with-gradio {
 
 
 
 
 
 
 
 
 
 
412
  display: none !important;
 
 
 
 
413
  }
414
 
415
+ /* Main container */
416
  #col-container {
417
+ max-width: 1400px;
418
  margin: 0 auto;
419
  }
420
 
421
+ /* Header */
422
  .header-text h1 {
423
  font-family: 'Bangers', cursive !important;
424
  color: #1F2937 !important;
425
  font-size: 3.5rem !important;
 
426
  text-align: center !important;
427
+ text-shadow: 4px 4px 0px #FACC15, 6px 6px 0px #1F2937 !important;
 
 
 
428
  letter-spacing: 3px !important;
429
  -webkit-text-stroke: 2px #1F2937 !important;
430
  }
431
 
 
432
  .subtitle {
433
  text-align: center !important;
434
  font-family: 'Comic Neue', cursive !important;
435
  font-size: 1.2rem !important;
436
  color: #1F2937 !important;
 
437
  font-weight: 700 !important;
438
  }
439
 
440
+ /* Stats cards */
441
+ .stats-row {
442
+ display: flex !important;
443
+ justify-content: center !important;
444
+ gap: 1rem !important;
445
+ margin: 1.5rem 0 !important;
446
+ flex-wrap: wrap !important;
447
+ }
448
+
449
+ .stat-card {
450
+ background: linear-gradient(135deg, #3B82F6 0%, #8B5CF6 100%) !important;
451
  border: 3px solid #1F2937 !important;
452
  border-radius: 12px !important;
453
+ padding: 1rem 1.5rem !important;
454
+ text-align: center !important;
455
+ box-shadow: 4px 4px 0px #1F2937 !important;
456
+ min-width: 120px !important;
457
  }
458
 
459
+ .stat-card .emoji { font-size: 2rem !important; display: block !important; }
460
+ .stat-card .label { color: #FFFFFF !important; font-family: 'Comic Neue', cursive !important; font-weight: 700 !important; }
461
+
462
+ /* Panels */
463
+ .gr-panel, .gr-box, .gr-form, .block, .gr-group {
464
+ background: #FFFFFF !important;
465
+ border: 3px solid #1F2937 !important;
466
  border-radius: 8px !important;
467
+ box-shadow: 6px 6px 0px #1F2937 !important;
468
+ transition: all 0.2s ease !important;
469
  }
470
 
471
+ .gr-panel:hover, .block:hover {
472
+ transform: translate(-2px, -2px) !important;
473
+ box-shadow: 8px 8px 0px #1F2937 !important;
 
 
 
 
 
 
 
474
  }
475
 
476
+ /* Tabs */
477
+ .gr-tabs {
478
+ border: 3px solid #1F2937 !important;
479
+ border-radius: 12px !important;
480
+ box-shadow: 6px 6px 0px #1F2937 !important;
481
  }
482
 
483
+ .gr-tab-nav {
484
+ background: #FACC15 !important;
485
+ border-bottom: 3px solid #1F2937 !important;
 
486
  }
487
 
488
+ .gr-tab-nav button {
489
+ font-family: 'Bangers', cursive !important;
490
+ font-size: 1.2rem !important;
491
+ color: #1F2937 !important;
492
+ padding: 12px 24px !important;
 
 
 
 
 
 
493
  }
494
 
495
+ .gr-tab-nav button.selected {
496
+ background: #3B82F6 !important;
497
+ color: #FFFFFF !important;
 
498
  }
499
 
500
+ /* Inputs */
501
+ textarea, input[type="text"], input[type="number"] {
 
 
502
  background: #FFFFFF !important;
503
  border: 3px solid #1F2937 !important;
504
  border-radius: 8px !important;
 
505
  font-family: 'Comic Neue', cursive !important;
 
506
  font-weight: 700 !important;
 
507
  }
508
 
509
+ textarea:focus, input:focus {
 
 
510
  border-color: #3B82F6 !important;
511
  box-shadow: 4px 4px 0px #3B82F6 !important;
 
512
  }
513
 
514
+ /* Dropdown */
515
+ .gr-dropdown {
516
+ background: #FFFFFF !important;
517
+ border: 3px solid #1F2937 !important;
518
+ border-radius: 8px !important;
519
+ box-shadow: 3px 3px 0px #1F2937 !important;
520
  }
521
 
522
+ /* Primary button */
523
+ .gr-button-primary, button.primary, .process-btn {
 
 
524
  background: #3B82F6 !important;
525
  border: 3px solid #1F2937 !important;
526
  border-radius: 8px !important;
527
  color: #FFFFFF !important;
528
  font-family: 'Bangers', cursive !important;
 
529
  font-size: 1.3rem !important;
530
  letter-spacing: 2px !important;
531
  padding: 14px 28px !important;
532
  box-shadow: 5px 5px 0px #1F2937 !important;
 
 
533
  }
534
 
535
+ .gr-button-primary:hover, button.primary:hover {
 
 
536
  background: #2563EB !important;
537
  transform: translate(-2px, -2px) !important;
538
  box-shadow: 7px 7px 0px #1F2937 !important;
539
  }
540
 
541
+ .gr-button-primary:active, button.primary:active {
 
 
542
  transform: translate(3px, 3px) !important;
543
  box-shadow: 2px 2px 0px #1F2937 !important;
544
  }
545
 
546
+ /* Log output */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
  .info-log textarea {
548
  background: #1F2937 !important;
549
  color: #10B981 !important;
550
  font-family: 'Courier New', monospace !important;
 
 
551
  border: 3px solid #10B981 !important;
 
552
  box-shadow: 4px 4px 0px #10B981 !important;
553
  }
554
 
555
+ /* Image/Video containers */
556
+ .gr-image, .gr-video {
 
 
 
557
  border: 4px solid #1F2937 !important;
558
  border-radius: 8px !important;
559
  box-shadow: 8px 8px 0px #1F2937 !important;
 
 
560
  }
561
 
562
+ /* Accordion */
563
+ .gr-accordion {
564
+ background: #FACC15 !important;
565
  border: 3px solid #1F2937 !important;
566
  border-radius: 8px !important;
567
+ box-shadow: 4px 4px 0px #1F2937 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
568
  }
569
 
570
+ /* Labels */
571
+ label, .gr-input-label, .gr-block-label {
 
 
 
572
  color: #1F2937 !important;
573
  font-family: 'Comic Neue', cursive !important;
574
  font-weight: 700 !important;
 
575
  }
576
 
577
+ /* Slider */
578
+ input[type="range"] { accent-color: #3B82F6 !important; }
 
 
 
579
 
580
+ /* Scrollbar */
581
+ ::-webkit-scrollbar { width: 12px; }
582
+ ::-webkit-scrollbar-track { background: #FEF9C3; border: 2px solid #1F2937; }
583
+ ::-webkit-scrollbar-thumb { background: #3B82F6; border: 2px solid #1F2937; }
584
 
585
+ /* Selection */
586
+ ::selection { background: #FACC15; color: #1F2937; }
 
 
 
 
 
 
 
587
 
588
+ /* Responsive */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
  @media (max-width: 768px) {
590
+ .header-text h1 { font-size: 2.2rem !important; }
591
+ .gr-button-primary { padding: 12px 20px !important; font-size: 1.1rem !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
  }
593
  """
594
 
595
 
596
+ # ============================================
597
+ # UI Component Builders
598
+ # ============================================
599
+ def create_header() -> None:
600
+ """Create header section"""
601
  gr.HTML("""
602
  <div style="text-align: center; margin: 20px 0 10px 0;">
603
+ <a href="https://www.humangen.ai" target="_blank">
604
  <img src="https://img.shields.io/static/v1?label=🏠 HOME&message=HUMANGEN.AI&color=0000ff&labelColor=ffcc00&style=for-the-badge" alt="HOME">
605
  </a>
606
+ <a href="https://discord.gg/openfreeai" target="_blank" style="margin-left: 10px;">
607
+ <img src="https://img.shields.io/static/v1?label=Discord&message=OpenFree%20AI&color=5865F2&labelColor=1F2937&logo=discord&logoColor=white&style=for-the-badge" alt="Discord">
608
+ </a>
609
  </div>
610
  """)
611
 
612
+ gr.Markdown("# 🔒 ANSIM BLUR - FACE PRIVACY 🛡️", elem_classes="header-text")
613
+ gr.Markdown('<p class="subtitle">🎭 Advanced AI-Powered Face Detection & Privacy Protection! ✨</p>')
 
 
 
 
 
614
 
615
+ gr.HTML("""
616
+ <div class="stats-row">
617
+ <div class="stat-card"><span class="emoji">🖼️</span><span class="label">Image Support</span></div>
618
+ <div class="stat-card"><span class="emoji">🎥</span><span class="label">Video Processing</span></div>
619
+ <div class="stat-card"><span class="emoji">⚡</span><span class="label">Real-time AI</span></div>
620
+ <div class="stat-card"><span class="emoji">🛡️</span><span class="label">Privacy First</span></div>
621
+ </div>
622
+ """)
623
 
624
+ gr.Markdown(f"""
625
+ <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937;">
626
+ 🖥️ Running on: <span style="color: #3B82F6;">{detector.device.upper()}</span>
627
+ </p>
628
+ """)
629
+
630
+
631
+ def create_detection_settings(suffix: str = "") -> Tuple[gr.Slider, gr.Slider, gr.Slider]:
632
+ """Create detection settings accordion"""
633
+ with gr.Accordion("⚙️ Detection Settings", open=True):
634
+ conf = gr.Slider(
635
+ **SLIDER_CONFIG["confidence"],
636
+ value=DEFAULT_CONFIG["confidence"],
637
+ label="🎯 Confidence Threshold"
638
+ )
639
+ iou = gr.Slider(
640
+ **SLIDER_CONFIG["iou"],
641
+ value=DEFAULT_CONFIG["iou"],
642
+ label="📐 NMS IoU"
643
+ )
644
+ expand = gr.Slider(
645
+ **SLIDER_CONFIG["expand"],
646
+ value=DEFAULT_CONFIG["expand_ratio"],
647
+ label="🔲 Box Expansion"
648
+ )
649
+ return conf, iou, expand
650
+
651
+
652
+ def create_blur_settings(suffix: str = "") -> Tuple[gr.Dropdown, gr.Slider, gr.Slider]:
653
+ """Create blur settings accordion"""
654
+ with gr.Accordion("🎨 Blur Settings", open=True):
655
+ mode = gr.Dropdown(
656
+ choices=BLUR_MODES,
657
+ value=BLUR_MODES[0],
658
+ label="🖌️ Style"
659
+ )
660
+ blur_intensity = gr.Slider(
661
+ **SLIDER_CONFIG["blur"],
662
+ value=DEFAULT_CONFIG["blur_intensity"],
663
+ label="💨 Blur Intensity"
664
+ )
665
+ mosaic_size = gr.Slider(
666
+ **SLIDER_CONFIG["mosaic"],
667
+ value=DEFAULT_CONFIG["mosaic_size"],
668
+ label="🧩 Mosaic Size"
669
+ )
670
+ return mode, blur_intensity, mosaic_size
671
+
672
+
673
+ def create_footer() -> None:
674
+ """Create footer with instructions"""
675
+ gr.Markdown("""
676
+ <div style="background: linear-gradient(135deg, #EFF6FF 0%, #DBEAFE 100%); border: 3px solid #3B82F6; border-radius: 12px; padding: 1.5rem; box-shadow: 6px 6px 0px #1F2937; margin-top: 2rem;">
677
+ <h3 style="font-family: 'Bangers', cursive; color: #1F2937; font-size: 1.3rem;">📝 HOW TO USE</h3>
678
+ <ol style="font-family: 'Comic Neue', cursive; color: #1F2937; font-weight: 700;">
679
+ <li>Upload an image or video containing faces</li>
680
+ <li>Adjust detection settings (confidence, IoU, expansion)</li>
681
+ <li>Choose blur style (Gaussian or Mosaic)</li>
682
+ <li>Click the Process button and wait for results</li>
683
+ <li>Download your privacy-protected media!</li>
684
+ </ol>
685
+ </div>
686
 
687
+ <div style="background: linear-gradient(135deg, #FEF3C7 0%, #FDE68A 100%); border: 3px solid #F59E0B; border-radius: 12px; padding: 1.5rem; box-shadow: 6px 6px 0px #1F2937; margin-top: 1rem;">
688
+ <h3 style="font-family: 'Bangers', cursive; color: #1F2937; font-size: 1.3rem;">💡 TIPS</h3>
689
+ <ul style="font-family: 'Comic Neue', cursive; color: #1F2937; font-weight: 700;">
690
+ <li>Lower confidence = more faces detected (may include false positives)</li>
691
+ <li>Higher blur intensity = stronger privacy protection</li>
692
+ <li>Mosaic effect works better for artistic results</li>
693
+ <li>Video processing may take time depending on length</li>
694
+ </ul>
695
+ </div>
696
+ """)
697
+
698
+
699
+ # ============================================
700
+ # Main Application Builder
701
+ # ============================================
702
+ def create_app() -> gr.Blocks:
703
+ """Build and return the Gradio application"""
704
+
705
+ with gr.Blocks(
706
+ fill_height=True,
707
+ css=CSS,
708
+ title="Ansim Blur - Face Privacy Protection",
709
+ theme=gr.themes.Default()
710
+ ) as app:
711
+
712
+ # Header
713
+ create_header()
714
+
715
+ # Main tabs
716
+ with gr.Tabs():
717
 
718
+ # === IMAGE TAB ===
719
+ with gr.Tab("📸 Image Processing"):
720
+ with gr.Row(equal_height=False):
 
 
 
 
 
 
 
 
 
 
721
 
722
+ # Left column - Input
723
+ with gr.Column(scale=1, min_width=400):
724
+ input_image = gr.Image(
725
+ label="🖼️ Upload Image",
726
+ type="pil",
727
+ height=350
728
+ )
729
+
730
+ conf_img, iou_img, expand_img = create_detection_settings("img")
731
+ mode_img, blur_img, mosaic_img = create_blur_settings("img")
732
+
733
+ process_img_btn = gr.Button(
734
+ "🔍 PROCESS IMAGE! 🎭",
735
+ variant="primary",
736
+ size="lg",
737
+ elem_classes="process-btn"
738
+ )
739
 
740
+ # Right column - Output
741
+ with gr.Column(scale=1, min_width=400):
742
+ output_image = gr.Image(
743
+ label="🖼️ Processed Result",
744
+ type="pil",
745
+ height=350
 
746
  )
747
+
748
+ with gr.Accordion("📜 Processing Log", open=True):
749
+ info_log_img = gr.Textbox(
750
+ label="",
751
+ placeholder="Upload an image and click process...",
752
+ lines=12,
753
+ max_lines=18,
754
+ interactive=False,
755
+ elem_classes="info-log"
756
+ )
757
 
758
+ # === VIDEO TAB ===
759
+ with gr.Tab("🎬 Video Processing"):
760
+ with gr.Row(equal_height=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
761
 
762
+ # Left column - Input
763
+ with gr.Column(scale=1, min_width=400):
764
+ input_video = gr.Video(
765
+ label="🎥 Upload Video",
766
+ height=350
767
+ )
768
+
769
+ conf_vid, iou_vid, expand_vid = create_detection_settings("vid")
770
+ mode_vid, blur_vid, mosaic_vid = create_blur_settings("vid")
771
+
772
+ process_vid_btn = gr.Button(
773
+ "🎬 PROCESS VIDEO! 🛡️",
774
+ variant="primary",
775
+ size="lg",
776
+ elem_classes="process-btn"
777
+ )
778
 
779
+ # Right column - Output
780
+ with gr.Column(scale=1, min_width=400):
781
+ output_video = gr.Video(
782
+ label="🎥 Processed Result",
783
+ height=350
 
 
784
  )
785
+
786
+ with gr.Accordion("📜 Processing Log", open=True):
787
+ info_log_vid = gr.Textbox(
788
+ label="",
789
+ placeholder="Upload a video and click process...",
790
+ lines=12,
791
+ max_lines=18,
792
+ interactive=False,
793
+ elem_classes="info-log"
794
+ )
795
+
796
+ # Footer
797
+ create_footer()
798
+
799
+ # === EVENT HANDLERS ===
800
+ process_img_btn.click(
801
+ fn=process_image,
802
+ inputs=[input_image, conf_img, iou_img, expand_img, mode_img, blur_img, mosaic_img],
803
+ outputs=[output_image, info_log_img]
804
+ )
805
+
806
+ process_vid_btn.click(
807
+ fn=process_video,
808
+ inputs=[input_video, conf_vid, iou_vid, expand_vid, mode_vid, blur_vid, mosaic_vid],
809
+ outputs=[output_video, info_log_vid]
810
+ )
811
+
812
+ return app
813
 
814
 
815
+ # ============================================
816
+ # Entry Point
817
+ # ============================================
818
  if __name__ == "__main__":
819
+ app = create_app()
820
+ app.launch()