citoreh commited on
Commit
60ec455
Β·
verified Β·
1 Parent(s): e128d93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +323 -609
app.py CHANGED
@@ -1,667 +1,387 @@
1
- # app.py - Lightweight version for Hugging Face Spaces
2
 
3
  import os
4
  import sys
5
- import torch
6
- import numpy as np
7
- from PIL import Image
8
- import gradio as gr
9
  import tempfile
10
- import soundfile as sf
11
  import warnings
12
- import traceback
13
  warnings.filterwarnings('ignore')
14
 
15
- # System info logging
16
- print(f"πŸ” Python version: {sys.version}")
17
- print(f"πŸ” PyTorch version: {torch.__version__}")
18
- print(f"πŸ” CUDA available: {torch.cuda.is_available()}")
19
- print(f"πŸ” Device count: {torch.cuda.device_count() if torch.cuda.is_available() else 'CPU only'}")
 
 
 
 
 
 
 
20
 
21
- # Try imports with detailed error reporting
22
- BLIP_AVAILABLE = False
23
- MUSICGEN_AVAILABLE = False
24
- VIDEO_AVAILABLE = False
 
 
25
 
26
  try:
27
- from transformers import BlipProcessor, BlipForConditionalGeneration
28
- BLIP_AVAILABLE = True
29
- print("βœ… BLIP models imported successfully")
30
  except Exception as e:
31
- print(f"❌ BLIP import failed: {e}")
32
 
33
  try:
34
- from transformers import MusicgenForConditionalGeneration, AutoProcessor
35
- MUSICGEN_AVAILABLE = True
36
- print("βœ… MusicGen models imported successfully")
37
  except Exception as e:
38
- print(f"❌ MusicGen import failed: {e}")
39
 
40
  try:
41
- import imageio
42
- VIDEO_AVAILABLE = True
43
- print("βœ… Video creation available")
44
  except Exception as e:
45
- print(f"❌ Video imports failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- class LightweightPhotoSoundtrackGenerator:
48
  def __init__(self):
49
- print("🎡 Initializing Lightweight Photo-Soundtrack Generator...")
 
50
 
51
- self.caption_processor = None
52
- self.caption_model = None
53
- self.music_processor = None
54
- self.music_model = None
55
-
56
- # Try to load models with error handling
57
- self.load_models()
58
-
59
- def test_string_operations(self, test_input):
60
- """Test function to debug string operation issues"""
61
- print(f"πŸ§ͺ Testing input: {test_input} (type: {type(test_input)})")
62
-
63
- try:
64
- # Test string conversion
65
- str_version = str(test_input) if test_input is not None else "default"
66
- print(f"βœ… String conversion: {str_version}")
67
-
68
- # Test lower() operation
69
- lower_version = str_version.lower()
70
- print(f"βœ… Lower operation: {lower_version}")
71
-
72
- # Test 'in' operation
73
- test_result = 'test' in lower_version
74
- print(f"βœ… 'in' operation: {test_result}")
75
-
76
- return True
77
- except Exception as e:
78
- print(f"❌ String operation failed: {e}")
79
- return False
80
-
81
- def load_models(self):
82
- """Load models with comprehensive error handling"""
83
- try:
84
- if BLIP_AVAILABLE:
85
- print("πŸ“Έ Loading BLIP model (lightweight)...")
86
- self.caption_processor = BlipProcessor.from_pretrained(
87
- "Salesforce/blip-image-captioning-base",
88
- cache_dir="./cache"
89
- )
90
- self.caption_model = BlipForConditionalGeneration.from_pretrained(
91
- "Salesforce/blip-image-captioning-base",
92
- cache_dir="./cache",
93
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
94
- )
95
-
96
- # Move to appropriate device
97
- device = "cuda" if torch.cuda.is_available() else "cpu"
98
- self.caption_model = self.caption_model.to(device)
99
- print(f"βœ… BLIP loaded on {device}")
100
- else:
101
- print("⚠️ BLIP not available - using fallback descriptions")
102
-
103
- except Exception as e:
104
- print(f"❌ Error loading BLIP: {e}")
105
- self.caption_processor = None
106
- self.caption_model = None
107
 
108
  try:
109
- if MUSICGEN_AVAILABLE:
110
- print("🎼 Loading MusicGen model (small)...")
111
- self.music_processor = AutoProcessor.from_pretrained(
112
- "facebook/musicgen-small",
113
- cache_dir="./cache"
114
- )
115
- self.music_model = MusicgenForConditionalGeneration.from_pretrained(
116
- "facebook/musicgen-small",
117
- cache_dir="./cache",
118
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
119
- )
120
-
121
- # Move to appropriate device
122
- device = "cuda" if torch.cuda.is_available() else "cpu"
123
- self.music_model = self.music_model.to(device)
124
- print(f"βœ… MusicGen loaded on {device}")
125
  else:
126
- print("⚠️ MusicGen not available - will use placeholder audio")
127
-
128
- except Exception as e:
129
- print(f"❌ Error loading MusicGen: {e}")
130
- self.music_processor = None
131
- self.music_model = None
132
-
133
- def get_fallback_description(self, image):
134
- """Generate a simple fallback description when BLIP is not available"""
135
- try:
136
- # Simple heuristic based on image properties
137
- img_array = np.array(image)
138
-
139
- # Analyze dominant colors
140
- mean_color = np.mean(img_array, axis=(0,1))
141
- brightness = np.mean(mean_color)
142
-
143
- # Analyze color distribution
144
- red_dominant = mean_color[0] > mean_color[1] and mean_color[0] > mean_color[2]
145
- blue_dominant = mean_color[2] > mean_color[0] and mean_color[2] > mean_color[1]
146
- green_dominant = mean_color[1] > mean_color[0] and mean_color[1] > mean_color[2]
147
 
148
- descriptions = []
149
 
150
- if brightness > 200:
151
- descriptions.append("bright")
152
- elif brightness < 100:
153
- descriptions.append("dark")
154
 
155
- if red_dominant:
156
- descriptions.append("warm tones")
157
- elif blue_dominant:
158
- descriptions.append("cool tones")
159
- elif green_dominant:
160
- descriptions.append("natural colors")
161
 
162
- # Basic description - ensure it's always a string
163
- if descriptions:
164
- base_desc = "image with " + ", ".join(descriptions)
165
- else:
166
- base_desc = "colorful image"
167
-
168
- # Ensure both returns are strings
169
- base_desc = str(base_desc)
170
- enhanced_desc = f"{base_desc}, cinematic, atmospheric"
171
 
172
- return base_desc, enhanced_desc
173
 
174
  except Exception as e:
175
- print(f"Error in fallback description: {e}")
176
- # Return safe default strings
177
- return "uploaded image", "uploaded image, cinematic, atmospheric"
178
 
179
- def generate_description(self, image):
180
- """Generate description using BLIP or fallback"""
181
  try:
182
- if self.caption_model is not None and self.caption_processor is not None:
183
- print("πŸ” Analyzing image with BLIP...")
184
-
185
- # Process the image
186
- inputs = self.caption_processor(image, return_tensors="pt")
187
-
188
- # Move inputs to same device as model
189
- device = next(self.caption_model.parameters()).device
190
- inputs = {k: v.to(device) for k, v in inputs.items()}
191
-
192
- # Generate caption
193
- with torch.no_grad():
194
- out = self.caption_model.generate(**inputs, max_length=30, num_beams=3)
195
-
196
- description = self.caption_processor.decode(out[0], skip_special_tokens=True)
197
-
198
- # Ensure description is a string
199
- if not isinstance(description, str):
200
- print(f"Warning: BLIP returned non-string: {type(description)}")
201
- description = str(description) if description is not None else "image"
202
-
203
- # Clean up the description
204
- description = description.strip()
205
- if not description:
206
- description = "image"
207
-
208
- enhanced_description = self.enhance_description_for_music(description)
209
-
210
- return description, enhanced_description
211
- else:
212
- print("πŸ” Using fallback description method...")
213
- return self.get_fallback_description(image)
214
-
215
- except Exception as e:
216
- print(f"❌ Error in description generation: {e}")
217
- traceback.print_exc()
218
- return self.get_fallback_description(image)
219
-
220
- def enhance_description_for_music(self, description):
221
- """Enhance description for music generation"""
222
- try:
223
- # Ensure description is a string
224
- if not isinstance(description, str):
225
- print(f"Warning: enhance_description received non-string: {type(description)}")
226
- description = str(description) if description is not None else "image"
227
-
228
- # Clean and prepare description
229
- description = description.strip()
230
- if not description:
231
- description = "image"
232
 
233
- # Simplified mood mappings
234
- mood_mappings = {
235
- 'sunset': 'warm acoustic guitar',
236
- 'sunrise': 'bright acoustic',
237
- 'ocean': 'ambient waves',
238
- 'sea': 'ambient waves',
239
- 'mountain': 'epic orchestral',
240
- 'city': 'urban electronic',
241
- 'forest': 'nature folk',
242
- 'night': 'ambient piano',
243
- 'bright': 'upbeat cheerful',
244
- 'light': 'upbeat cheerful',
245
- 'dark': 'mysterious atmospheric',
246
- 'warm': 'acoustic peaceful',
247
- 'cool': 'ambient ethereal',
248
- 'water': 'flowing ambient',
249
- 'sky': 'ethereal atmospheric',
250
- 'street': 'urban contemporary',
251
- 'building': 'modern electronic'
252
- }
253
 
254
- # Safe string operations
255
- description_lower = description.lower()
256
- music_elements = []
257
-
258
- # Check each keyword safely
259
- for keyword, music_style in mood_mappings.items():
260
- try:
261
- if keyword in description_lower:
262
- music_elements.append(music_style)
263
- except (TypeError, AttributeError) as e:
264
- print(f"Error checking keyword '{keyword}' in description: {e}")
265
- continue
266
 
267
- # Build enhanced description
268
- if music_elements:
269
- enhanced = f"{description}, {music_elements[0]}"
270
- else:
271
- enhanced = f"{description}, cinematic atmospheric"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
- # Ensure return is a string
274
- return str(enhanced)
 
275
 
276
- except Exception as e:
277
- print(f"Error in enhance_description_for_music: {e}")
278
- traceback.print_exc()
279
- # Return safe fallback
280
- safe_desc = str(description) if description else "image"
281
- return f"{safe_desc}, cinematic atmospheric"
282
-
283
- def generate_placeholder_audio(self, duration=30, sampling_rate=32000):
284
- """Generate placeholder audio when MusicGen is not available"""
285
- print("🎡 Generating placeholder audio...")
286
-
287
- # Create a simple harmonic tone sequence
288
- t = np.linspace(0, duration, int(duration * sampling_rate))
289
-
290
- # Create a pleasant chord progression
291
- frequencies = [261.63, 329.63, 392.00, 523.25] # C, E, G, C (one octave up)
292
- audio = np.zeros_like(t)
293
-
294
- for i, freq in enumerate(frequencies):
295
- # Each note plays for duration/4 with overlap
296
- start_time = i * duration / 5
297
- end_time = start_time + duration / 3
298
 
299
- mask = (t >= start_time) & (t <= end_time)
300
- envelope = np.exp(-3 * (t[mask] - start_time) / (end_time - start_time)) # Decay envelope
301
- audio[mask] += 0.3 * envelope * np.sin(2 * np.pi * freq * t[mask])
302
-
303
- # Add some reverb-like effect
304
- audio = np.convolve(audio, np.exp(-np.linspace(0, 2, 1000)), mode='same')[:len(t)]
305
-
306
- # Normalize
307
- audio = audio / np.max(np.abs(audio)) * 0.7
308
-
309
- return audio.astype(np.float32), sampling_rate
310
-
311
- def generate_soundtrack(self, music_prompt, duration=30):
312
- """Generate soundtrack using MusicGen or placeholder"""
313
- try:
314
- # Ensure music_prompt is a string
315
- if not isinstance(music_prompt, str):
316
- print(f"Warning: music_prompt is not string: {type(music_prompt)}")
317
- music_prompt = str(music_prompt) if music_prompt is not None else "cinematic atmospheric"
318
 
319
- music_prompt = music_prompt.strip()
320
- if not music_prompt:
321
- music_prompt = "cinematic atmospheric"
322
 
323
- if self.music_model is not None and self.music_processor is not None:
324
- print(f"🎼 Generating soundtrack with MusicGen...")
325
- print(f"🎡 Prompt: {music_prompt}")
326
-
327
- # Process the text prompt
328
- inputs = self.music_processor(
329
- text=[music_prompt],
330
- padding=True,
331
- return_tensors="pt",
332
- )
333
-
334
- # Move inputs to same device as model
335
- device = next(self.music_model.parameters()).device
336
- inputs = {k: v.to(device) for k, v in inputs.items()}
337
-
338
- # Generate audio with shorter length for memory efficiency
339
- max_new_tokens = min(int(duration * 30), 800) # Further limit tokens
340
-
341
- with torch.no_grad():
342
- audio_values = self.music_model.generate(
343
- **inputs,
344
- max_new_tokens=max_new_tokens,
345
- do_sample=True,
346
- guidance_scale=3.0,
347
- )
348
-
349
- # Convert to numpy array
350
- sampling_rate = self.music_model.config.audio_encoder.sampling_rate
351
- audio_data = audio_values[0, 0].cpu().numpy()
352
-
353
- # Ensure correct duration
354
- target_length = int(duration * sampling_rate)
355
- if len(audio_data) > target_length:
356
- audio_data = audio_data[:target_length]
357
- elif len(audio_data) < target_length:
358
- audio_data = np.pad(audio_data, (0, target_length - len(audio_data)), mode='constant')
359
-
360
- return audio_data, sampling_rate
361
- else:
362
- print("🎡 Using placeholder audio generation...")
363
- return self.generate_placeholder_audio(duration)
364
-
365
  except Exception as e:
366
- print(f"❌ Error in soundtrack generation: {e}")
367
- traceback.print_exc()
368
- print("🎡 Falling back to placeholder audio...")
369
- return self.generate_placeholder_audio(duration)
370
 
371
- def create_simple_video(self, image, audio_data, sampling_rate, duration=30):
372
- """Create a simple video with basic zoom effect"""
373
- if not VIDEO_AVAILABLE:
374
- print("❌ Video creation not available")
375
- return None
376
-
377
  try:
378
- print("🎬 Creating simple video...")
 
 
379
 
380
- # Create frames with zoom effect
381
- frames = []
382
- fps = 12 # Lower FPS for memory efficiency
383
- total_frames = fps * duration
384
 
385
- # Convert PIL to numpy array and resize if too large
386
- img_array = np.array(image)
387
- h, w = img_array.shape[:2]
388
 
389
- # Limit resolution for memory efficiency
390
- max_size = 512
391
- if max(h, w) > max_size:
392
- ratio = max_size / max(h, w)
393
- new_h, new_w = int(h * ratio), int(w * ratio)
394
- image = image.resize((new_w, new_h), Image.Resampling.LANCZOS)
395
- img_array = np.array(image)
396
- h, w = img_array.shape[:2]
397
 
398
- for i in range(total_frames):
399
- # Simple zoom effect
400
- progress = i / total_frames
401
- zoom_factor = 1.0 + 0.1 * progress # Zoom in by 10%
402
-
403
- # Resize image
404
- new_h, new_w = int(h * zoom_factor), int(w * zoom_factor)
405
- pil_img = Image.fromarray(img_array).resize((new_w, new_h), Image.Resampling.LANCZOS)
406
-
407
- # Center crop back to original size
408
- resized_array = np.array(pil_img)
409
- start_y = (new_h - h) // 2
410
- start_x = (new_w - w) // 2
411
- cropped = resized_array[start_y:start_y+h, start_x:start_x+w]
412
-
413
- frames.append(cropped)
414
-
415
- # Create temporary video file
416
- with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
417
- temp_video_path = temp_video.name
418
 
419
- # Write video using imageio
420
- import imageio
421
- imageio.mimsave(temp_video_path, frames, fps=fps)
422
-
423
- print("βœ… Simple video created successfully!")
424
- return temp_video_path
425
 
426
  except Exception as e:
427
- print(f"❌ Video creation failed: {e}")
428
- traceback.print_exc()
429
- return None
430
 
431
- def process_image(self, image, progress=gr.Progress()):
432
- """Main processing function"""
 
 
433
  if image is None:
434
- return None, "❌ Please upload an image first.", None, None
435
 
436
  try:
437
- progress(0.1, desc="Starting analysis...")
438
-
439
- # System info
440
- device_info = f"πŸ–₯️ Device: {'GPU' if torch.cuda.is_available() else 'CPU'}"
441
-
442
- progress(0.2, desc="Analyzing image...")
443
-
444
- # Generate description
445
- try:
446
- description, music_prompt = self.generate_description(image)
447
-
448
- # Double-check that we have strings
449
- if not isinstance(description, str):
450
- description = "uploaded image"
451
- if not isinstance(music_prompt, str):
452
- music_prompt = "cinematic atmospheric"
453
-
454
- progress(0.4, desc="Description generated")
455
- except Exception as e:
456
- print(f"Error in description: {e}")
457
- traceback.print_exc()
458
- description, music_prompt = "uploaded image", "cinematic atmospheric"
459
-
460
- progress(0.5, desc="Generating soundtrack...")
461
-
462
- # Generate soundtrack
463
- try:
464
- audio_data, sampling_rate = self.generate_soundtrack(music_prompt, duration=20) # Shorter duration
465
- progress(0.7, desc="Soundtrack generated")
466
- except Exception as e:
467
- print(f"Error in soundtrack: {e}")
468
- audio_data, sampling_rate = self.generate_placeholder_audio(duration=20)
469
-
470
- # Save audio to temporary file
471
- with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as audio_file:
472
- sf.write(audio_file.name, audio_data, sampling_rate)
473
- audio_path = audio_file.name
474
-
475
- progress(0.8, desc="Creating video...")
476
-
477
- # Create video
478
- video_path = None
479
- if VIDEO_AVAILABLE:
480
  try:
481
- video_path = self.create_simple_video(image, audio_data, sampling_rate, duration=20)
482
- progress(0.9, desc="Video created")
 
 
 
483
  except Exception as e:
484
- print(f"Error in video creation: {e}")
485
-
486
- progress(1.0, desc="Complete!")
487
 
488
- # Prepare status message
489
  status_msg = f"""
490
- **βœ… Processing Complete!**
 
 
 
491
 
492
- **Image Description:** {description}
 
 
493
 
494
- **Music Style:** {music_prompt}
 
 
495
 
496
- **Technical Info:**
497
- - {device_info}
498
- - Audio: {len(audio_data)/sampling_rate:.1f} seconds at {sampling_rate}Hz
499
- - BLIP Model: {'βœ… Loaded' if self.caption_model else '❌ Using fallback'}
500
- - MusicGen Model: {'βœ… Loaded' if self.music_model else '❌ Using placeholder'}
501
- - Video: {'βœ… Created' if video_path else '❌ Failed/Unavailable'}
 
 
 
 
502
  """
503
 
504
- return (
505
- video_path,
506
- status_msg,
507
- (sampling_rate, audio_data),
508
- audio_path
509
- )
510
 
511
  except Exception as e:
512
- error_msg = f"❌ **Error during processing:**\n\n{str(e)}\n\n**System Info:**\n- Python: {sys.version[:20]}...\n- PyTorch: {torch.__version__}\n- CUDA: {torch.cuda.is_available()}"
513
- print(f"Main processing error: {e}")
514
- traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  return None, error_msg, None, None
516
 
517
- # Initialize with error handling
518
- print("πŸš€ Starting application initialization...")
519
  try:
520
- generator = LightweightPhotoSoundtrackGenerator()
521
- print("βœ… Generator initialized successfully")
522
  except Exception as e:
523
- print(f"❌ Failed to initialize generator: {e}")
524
  generator = None
525
 
526
- def create_interface():
527
- """Create Gradio interface with system status"""
528
 
529
- # System status
530
- status_html = f"""
531
- <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; margin: 10px 0;">
532
- <h3>πŸ”§ System Status</h3>
533
- <ul style="margin: 0;">
534
- <li><strong>Device:</strong> {'🟒 GPU Available' if torch.cuda.is_available() else '🟑 CPU Only'}</li>
535
- <li><strong>BLIP (Image Analysis):</strong> {'🟒 Available' if BLIP_AVAILABLE else '🟑 Fallback Mode'}</li>
536
- <li><strong>MusicGen (AI Music):</strong> {'🟒 Available' if MUSICGEN_AVAILABLE else '🟑 Placeholder Mode'}</li>
537
- <li><strong>Video Creation:</strong> {'🟒 Available' if VIDEO_AVAILABLE else 'πŸ”΄ Unavailable'}</li>
538
- </ul>
539
- </div>
540
- """
541
 
542
- # Simple test function for debugging
543
- def test_function(image):
544
- if image is None:
545
- return "❌ No image provided"
546
-
547
- try:
548
- # Test basic operations
549
- img_array = np.array(image)
550
- print(f"πŸ§ͺ Image shape: {img_array.shape}")
551
-
552
- # Test string operations
553
- test_desc = "test image with bright colors"
554
- test_enhanced = generator.enhance_description_for_music(test_desc) if generator else "test failed"
555
-
556
- return f"""
557
- **πŸ§ͺ Debug Test Results:**
558
-
559
- **Image Info:**
560
- - Shape: {img_array.shape}
561
- - Type: {type(img_array)}
562
-
563
- **String Test:**
564
- - Input: "{test_desc}"
565
- - Enhanced: "{test_enhanced}"
566
- - Types: {type(test_desc)} β†’ {type(test_enhanced)}
567
-
568
- **System Check:**
569
- - Generator: {'βœ… Loaded' if generator else '❌ Failed'}
570
- - Test passed: βœ…
571
- """
572
- except Exception as e:
573
- return f"❌ **Test failed:** {str(e)}\n\n{traceback.format_exc()}"
574
 
575
- with gr.Blocks(
576
- title="πŸ“ΈπŸŽ΅ Lightweight Photo Soundtrack Generator",
577
- theme=gr.themes.Soft()
578
- ) as demo:
579
 
580
  gr.HTML("""
581
- <div style="text-align: center; margin-bottom: 20px;">
582
- <h1>πŸ“ΈπŸŽ΅ AI Photo Soundtrack Generator</h1>
583
- <p>Upload a photo and create an AI-generated soundtrack!</p>
584
  </div>
585
  """)
586
 
 
 
 
 
 
 
 
 
 
587
  gr.HTML(status_html)
588
 
589
  with gr.Tabs():
590
- with gr.TabItem("🎬 Main Generator"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
  with gr.Row():
592
  with gr.Column():
593
  image_input = gr.Image(
594
  type="pil",
595
- label="πŸ“€ Upload Your Photo"
596
  )
597
-
598
- generate_btn = gr.Button(
599
- "🎬 Generate Soundtrack & Video",
600
- variant="primary",
601
- size="lg"
602
  )
603
 
604
  with gr.Column():
605
- video_output = gr.Video(label="🎬 Generated Video")
606
  status_output = gr.Markdown(
607
- value="Upload an image to start!",
608
- label="πŸ“Š Status & Analysis"
609
  )
610
 
611
  with gr.Row():
612
- audio_output = gr.Audio(label="🎡 Generated Soundtrack")
613
- audio_file_output = gr.File(label="πŸ“₯ Download Audio")
614
-
615
- with gr.TabItem("πŸ§ͺ Debug Mode"):
616
- with gr.Row():
617
- with gr.Column():
618
- debug_image_input = gr.Image(
619
- type="pil",
620
- label="πŸ“€ Upload Test Image"
621
- )
622
- debug_btn = gr.Button(
623
- "πŸ§ͺ Run Debug Test",
624
- variant="secondary"
625
- )
626
-
627
- with gr.Column():
628
- debug_output = gr.Markdown(
629
- value="Upload an image and click 'Run Debug Test'",
630
- label="πŸ” Debug Results"
631
- )
632
-
633
- # Event handlers
634
- if generator is not None:
635
- generate_btn.click(
636
- fn=generator.process_image,
637
- inputs=[image_input],
638
- outputs=[video_output, status_output, audio_output, audio_file_output],
639
- show_progress=True
640
- )
641
-
642
- debug_btn.click(
643
- fn=test_function,
644
- inputs=[debug_image_input],
645
- outputs=[debug_output]
646
- )
647
- else:
648
- generate_btn.click(
649
- fn=lambda x: (None, "❌ Generator failed to initialize. Please check logs.", None, None),
650
- inputs=[image_input],
651
- outputs=[video_output, status_output, audio_output, audio_file_output]
652
- )
653
-
654
- debug_btn.click(
655
- fn=lambda x: "❌ Generator not available for testing.",
656
- inputs=[debug_image_input],
657
- outputs=[debug_output]
658
- )
659
 
660
  gr.HTML("""
661
- <div style="text-align: center; margin-top: 20px; padding: 15px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 8px;">
662
- <p style="color: white; margin: 0;">
663
- <strong>πŸ’‘ Tip:</strong> Use Debug Mode to test functionality if you encounter errors.
664
- </p>
 
 
 
665
  </div>
666
  """)
667
 
@@ -669,37 +389,39 @@ def create_interface():
669
 
670
  # Launch
671
  if __name__ == "__main__":
 
 
672
  try:
673
- demo = create_interface()
674
- demo.launch(
675
- server_name="0.0.0.0",
676
- server_port=7860,
677
- share=True
678
- )
 
 
 
679
  except Exception as e:
680
- print(f"❌ Failed to launch: {e}")
681
- traceback.print_exc()
682
 
683
- # requirements.txt - MINIMAL VERSION
684
  """
685
- torch>=2.0.0
686
- transformers>=4.30.0
687
  gradio==4.44.0
688
  Pillow>=9.5.0
689
  numpy>=1.24.0
690
  soundfile>=0.12.0
691
- imageio>=2.31.1
692
- scipy>=1.10.0
693
- accelerate>=0.20.0
694
  """
695
 
696
- # README.md - Updated
697
  """
698
  ---
699
- title: AI Photo Soundtrack Generator
700
- emoji: 🎡
701
- colorFrom: blue
702
- colorTo: purple
703
  sdk: gradio
704
  sdk_version: "4.44.0"
705
  app_file: app.py
@@ -707,30 +429,22 @@ pinned: false
707
  license: apache-2.0
708
  ---
709
 
710
- # πŸ“ΈπŸŽ΅ AI Photo Soundtrack Generator
711
-
712
- A lightweight AI app that creates soundtracks from your photos!
713
-
714
- ## 🌟 Features
715
-
716
- - **Smart Image Analysis**: BLIP-based or fallback description
717
- - **AI Music Generation**: MusicGen or harmonic placeholder audio
718
- - **Simple Video Creation**: Basic zoom effects
719
- - **Adaptive Performance**: Works on both CPU and GPU
720
- - **Graceful Degradation**: Functions even when models fail to load
721
-
722
- ## πŸš€ Quick Start
723
 
724
- 1. Upload any photo
725
- 2. Click "Generate Soundtrack & Video"
726
- 3. Download your personalized audio/video
727
 
728
- ## βš™οΈ Technical Details
 
 
 
 
 
729
 
730
- - **Memory Optimized**: Uses smaller models and efficient processing
731
- - **Error Resilient**: Comprehensive fallback systems
732
- - **Resource Adaptive**: Automatically adjusts to available hardware
733
- - **Real-time Status**: Shows what's working and what's not
 
734
 
735
- Perfect for creative projects and social media content! 🎬✨
736
  """
 
1
+ # app.py - Ultra-Safe Debugging Version
2
 
3
  import os
4
  import sys
5
+ import traceback
 
 
 
6
  import tempfile
 
7
  import warnings
 
8
  warnings.filterwarnings('ignore')
9
 
10
+ print("πŸš€ Starting ULTRA-SAFE mode...")
11
+ print(f"Python: {sys.version}")
12
+
13
+ # Safe imports with individual error handling
14
+ IMPORTS = {
15
+ 'torch': False,
16
+ 'numpy': False,
17
+ 'PIL': False,
18
+ 'gradio': False,
19
+ 'soundfile': False,
20
+ 'transformers': False
21
+ }
22
 
23
+ try:
24
+ import torch
25
+ IMPORTS['torch'] = True
26
+ print("βœ… PyTorch imported")
27
+ except Exception as e:
28
+ print(f"❌ PyTorch failed: {e}")
29
 
30
  try:
31
+ import numpy as np
32
+ IMPORTS['numpy'] = True
33
+ print("βœ… NumPy imported")
34
  except Exception as e:
35
+ print(f"❌ NumPy failed: {e}")
36
 
37
  try:
38
+ from PIL import Image
39
+ IMPORTS['PIL'] = True
40
+ print("βœ… PIL imported")
41
  except Exception as e:
42
+ print(f"❌ PIL failed: {e}")
43
 
44
  try:
45
+ import gradio as gr
46
+ IMPORTS['gradio'] = True
47
+ print("βœ… Gradio imported")
48
  except Exception as e:
49
+ print(f"❌ Gradio failed: {e}")
50
+
51
+ try:
52
+ import soundfile as sf
53
+ IMPORTS['soundfile'] = True
54
+ print("βœ… SoundFile imported")
55
+ except Exception as e:
56
+ print(f"❌ SoundFile failed: {e}")
57
+
58
+ # Only try transformers if basic imports work
59
+ if IMPORTS['torch']:
60
+ try:
61
+ from transformers import BlipProcessor, BlipForConditionalGeneration
62
+ from transformers import MusicgenForConditionalGeneration, AutoProcessor
63
+ IMPORTS['transformers'] = True
64
+ print("βœ… Transformers imported")
65
+ except Exception as e:
66
+ print(f"❌ Transformers failed: {e}")
67
 
68
+ class SafeGenerator:
69
  def __init__(self):
70
+ print("πŸ”§ Initializing Safe Generator...")
71
+ self.working = True
72
 
73
+ def safe_string_test(self, input_val):
74
+ """Test string operations safely"""
75
+ print(f"πŸ§ͺ Testing string operations with: {input_val} (type: {type(input_val)})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  try:
78
+ # Convert to string safely
79
+ if input_val is None:
80
+ str_val = "none"
81
+ elif isinstance(input_val, str):
82
+ str_val = input_val
83
+ elif isinstance(input_val, (int, float, bool)):
84
+ str_val = str(input_val)
 
 
 
 
 
 
 
 
 
85
  else:
86
+ str_val = repr(input_val)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ print(f"βœ… String conversion: '{str_val}'")
89
 
90
+ # Test lower operation
91
+ lower_val = str_val.lower()
92
+ print(f"βœ… Lower operation: '{lower_val}'")
 
93
 
94
+ # Test 'in' operation with known string
95
+ test_keywords = ['test', 'image', 'bright']
96
+ results = []
 
 
 
97
 
98
+ for keyword in test_keywords:
99
+ try:
100
+ result = keyword in lower_val
101
+ results.append(f"{keyword}: {result}")
102
+ print(f"βœ… '{keyword}' in '{lower_val}': {result}")
103
+ except Exception as e:
104
+ error_msg = f"❌ Error with '{keyword}': {e}"
105
+ results.append(error_msg)
106
+ print(error_msg)
107
 
108
+ return f"String test passed. Results: {', '.join(results)}"
109
 
110
  except Exception as e:
111
+ error_msg = f"❌ String test failed: {e}\n{traceback.format_exc()}"
112
+ print(error_msg)
113
+ return error_msg
114
 
115
+ def create_simple_description(self, image):
116
+ """Create description without any AI models"""
117
  try:
118
+ if not IMPORTS['numpy'] or not IMPORTS['PIL']:
119
+ return "simple image", "simple image, ambient music"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ print("πŸ” Creating simple description...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
+ # Basic image analysis
124
+ img_array = np.array(image)
125
+ h, w, c = img_array.shape
126
+ print(f"πŸ“ Image dimensions: {w}x{h}x{c}")
 
 
 
 
 
 
 
 
127
 
128
+ # Safe color analysis
129
+ try:
130
+ mean_color = np.mean(img_array, axis=(0, 1))
131
+ brightness = float(np.mean(mean_color))
132
+ print(f"πŸ’‘ Brightness: {brightness}")
133
+
134
+ # Safe brightness categorization
135
+ if brightness > 200:
136
+ brightness_desc = "bright"
137
+ elif brightness < 100:
138
+ brightness_desc = "dark"
139
+ else:
140
+ brightness_desc = "medium"
141
+
142
+ print(f"🏷️ Brightness category: {brightness_desc}")
143
+
144
+ except Exception as e:
145
+ print(f"⚠️ Color analysis failed: {e}")
146
+ brightness_desc = "colorful"
147
 
148
+ # Build description safely
149
+ base_desc = f"{brightness_desc} image"
150
+ enhanced_desc = f"{base_desc}, peaceful music"
151
 
152
+ print(f"πŸ“ Description: '{base_desc}' -> '{enhanced_desc}'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
+ # Test the strings before returning
155
+ test_result = self.safe_string_test(base_desc)
156
+ print(f"πŸ§ͺ String test result: {test_result}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
+ return base_desc, enhanced_desc
 
 
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  except Exception as e:
161
+ error_msg = f"❌ Description creation failed: {e}\n{traceback.format_exc()}"
162
+ print(error_msg)
163
+ return "error image", "error image, quiet music"
 
164
 
165
+ def create_simple_audio(self, duration=10):
166
+ """Create simple audio without AI"""
 
 
 
 
167
  try:
168
+ if not IMPORTS['numpy']:
169
+ print("❌ NumPy not available for audio")
170
+ return None, None
171
 
172
+ print(f"🎡 Creating {duration}s simple audio...")
 
 
 
173
 
174
+ # Create simple tone
175
+ sample_rate = 22050
176
+ t = np.linspace(0, duration, int(duration * sample_rate))
177
 
178
+ # Simple pleasant tone (C major chord)
179
+ freq1, freq2, freq3 = 261.63, 329.63, 392.00 # C, E, G
180
+ audio = (np.sin(2 * np.pi * freq1 * t) * 0.3 +
181
+ np.sin(2 * np.pi * freq2 * t) * 0.2 +
182
+ np.sin(2 * np.pi * freq3 * t) * 0.2)
 
 
 
183
 
184
+ # Add fade in/out
185
+ fade_samples = int(0.5 * sample_rate) # 0.5 second fade
186
+ audio[:fade_samples] *= np.linspace(0, 1, fade_samples)
187
+ audio[-fade_samples:] *= np.linspace(1, 0, fade_samples)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
+ print(f"βœ… Audio created: {len(audio)} samples at {sample_rate}Hz")
190
+ return audio.astype(np.float32), sample_rate
 
 
 
 
191
 
192
  except Exception as e:
193
+ print(f"❌ Audio creation failed: {e}")
194
+ return None, None
 
195
 
196
+ def process_image_safe(self, image):
197
+ """Ultra-safe image processing"""
198
+ print("πŸ”„ Starting SAFE image processing...")
199
+
200
  if image is None:
201
+ return None, "❌ No image provided", None, None
202
 
203
  try:
204
+ # Step 1: Test the image
205
+ print("πŸ“Έ Step 1: Testing image...")
206
+ img_info = f"Image type: {type(image)}"
207
+ if hasattr(image, 'size'):
208
+ img_info += f", Size: {image.size}"
209
+ print(img_info)
210
+
211
+ # Step 2: Create description
212
+ print("πŸ“ Step 2: Creating description...")
213
+ description, music_prompt = self.create_simple_description(image)
214
+
215
+ # Step 3: Test strings
216
+ print("πŸ§ͺ Step 3: Testing strings...")
217
+ desc_test = self.safe_string_test(description)
218
+ prompt_test = self.safe_string_test(music_prompt)
219
+
220
+ # Step 4: Create audio
221
+ print("🎡 Step 4: Creating audio...")
222
+ audio_data, sample_rate = self.create_simple_audio(10)
223
+
224
+ # Step 5: Save audio if successful
225
+ audio_file = None
226
+ if audio_data is not None and IMPORTS['soundfile']:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  try:
228
+ print("πŸ’Ύ Step 5: Saving audio...")
229
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
230
+ sf.write(f.name, audio_data, sample_rate)
231
+ audio_file = f.name
232
+ print(f"βœ… Audio saved: {audio_file}")
233
  except Exception as e:
234
+ print(f"⚠️ Audio save failed: {e}")
 
 
235
 
236
+ # Prepare results
237
  status_msg = f"""
238
+ **βœ… SAFE MODE PROCESSING COMPLETE**
239
+
240
+ **Image Analysis:**
241
+ - {img_info}
242
 
243
+ **Generated Text:**
244
+ - Description: "{description}"
245
+ - Music Prompt: "{music_prompt}"
246
 
247
+ **String Tests:**
248
+ - Description test: {desc_test[:100]}...
249
+ - Prompt test: {prompt_test[:100]}...
250
 
251
+ **Audio:**
252
+ - Status: {'βœ… Created' if audio_data is not None else '❌ Failed'}
253
+ - Duration: {len(audio_data)/sample_rate:.1f}s if audio_data else 'N/A'
254
+
255
+ **System Status:**
256
+ - PyTorch: {'βœ…' if IMPORTS['torch'] else '❌'}
257
+ - NumPy: {'βœ…' if IMPORTS['numpy'] else '❌'}
258
+ - PIL: {'βœ…' if IMPORTS['PIL'] else '❌'}
259
+ - SoundFile: {'βœ…' if IMPORTS['soundfile'] else '❌'}
260
+ - Transformers: {'βœ…' if IMPORTS['transformers'] else '❌'}
261
  """
262
 
263
+ audio_output = (sample_rate, audio_data) if audio_data is not None else None
264
+
265
+ print("βœ… Safe processing completed successfully!")
266
+ return None, status_msg, audio_output, audio_file
 
 
267
 
268
  except Exception as e:
269
+ error_msg = f"""
270
+ ❌ **SAFE MODE ERROR**
271
+
272
+ **Error:** {str(e)}
273
+
274
+ **Location:** {traceback.format_exc()}
275
+
276
+ **System Info:**
277
+ - Python: {sys.version}
278
+ - Working directory: {os.getcwd()}
279
+ - Environment: {dict(os.environ).get('SPACE_ID', 'Local')}
280
+
281
+ **Import Status:**
282
+ {chr(10).join([f"- {k}: {'βœ…' if v else '❌'}" for k, v in IMPORTS.items()])}
283
+ """
284
+ print(error_msg)
285
  return None, error_msg, None, None
286
 
287
+ # Initialize generator
288
+ print("🎬 Initializing generator...")
289
  try:
290
+ generator = SafeGenerator()
291
+ print("βœ… Generator ready!")
292
  except Exception as e:
293
+ print(f"❌ Generator failed: {e}")
294
  generator = None
295
 
296
+ def create_minimal_interface():
297
+ """Create minimal interface for debugging"""
298
 
299
+ if not IMPORTS['gradio']:
300
+ print("❌ Cannot create interface - Gradio not available")
301
+ return None
 
 
 
 
 
 
 
 
 
302
 
303
+ print("πŸ–₯️ Creating minimal interface...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
+ with gr.Blocks(title="πŸ§ͺ Ultra-Safe Debug Mode") as demo:
 
 
 
306
 
307
  gr.HTML("""
308
+ <div style="text-align: center; background: #fff3cd; padding: 20px; border-radius: 10px; margin: 20px 0;">
309
+ <h1>πŸ§ͺ ULTRA-SAFE DEBUG MODE</h1>
310
+ <p><strong>This version isolates and tests each component individually</strong></p>
311
  </div>
312
  """)
313
 
314
+ # System status
315
+ status_html = f"""
316
+ <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; margin: 10px 0;">
317
+ <h3>πŸ”§ Import Status</h3>
318
+ <ul>
319
+ {"".join([f"<li><strong>{k}:</strong> {'🟒 OK' if v else 'πŸ”΄ Failed'}</li>" for k, v in IMPORTS.items()])}
320
+ </ul>
321
+ </div>
322
+ """
323
  gr.HTML(status_html)
324
 
325
  with gr.Tabs():
326
+ # Tab 1: Basic String Test
327
+ with gr.TabItem("πŸ§ͺ String Test"):
328
+ with gr.Row():
329
+ test_input = gr.Textbox(
330
+ value="bright colorful sunset image",
331
+ label="Test String Input"
332
+ )
333
+ test_btn = gr.Button("Test String Operations")
334
+
335
+ test_output = gr.Textbox(
336
+ label="Test Results",
337
+ lines=10
338
+ )
339
+
340
+ if generator:
341
+ test_btn.click(
342
+ fn=generator.safe_string_test,
343
+ inputs=[test_input],
344
+ outputs=[test_output]
345
+ )
346
+
347
+ # Tab 2: Image Processing Test
348
+ with gr.TabItem("πŸ“Έ Image Test"):
349
  with gr.Row():
350
  with gr.Column():
351
  image_input = gr.Image(
352
  type="pil",
353
+ label="Upload Test Image"
354
  )
355
+ process_btn = gr.Button(
356
+ "πŸ”„ Process Image (Safe Mode)",
357
+ variant="primary"
 
 
358
  )
359
 
360
  with gr.Column():
 
361
  status_output = gr.Markdown(
362
+ value="Upload an image and click process",
363
+ label="Processing Results"
364
  )
365
 
366
  with gr.Row():
367
+ audio_output = gr.Audio(label="Generated Audio")
368
+ file_output = gr.File(label="Audio File")
369
+
370
+ if generator:
371
+ process_btn.click(
372
+ fn=generator.process_image_safe,
373
+ inputs=[image_input],
374
+ outputs=[gr.Video(visible=False), status_output, audio_output, file_output]
375
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
 
377
  gr.HTML("""
378
+ <div style="background: #e3f2fd; padding: 15px; border-radius: 8px; margin: 20px 0;">
379
+ <h3>🎯 Debug Instructions</h3>
380
+ <ol>
381
+ <li><strong>String Test:</strong> Test if string operations work correctly</li>
382
+ <li><strong>Image Test:</strong> Upload an image to see where the error occurs</li>
383
+ <li><strong>Check Results:</strong> Look for the exact error location in the output</li>
384
+ </ol>
385
  </div>
386
  """)
387
 
 
389
 
390
  # Launch
391
  if __name__ == "__main__":
392
+ print("πŸš€ Launching debug interface...")
393
+
394
  try:
395
+ demo = create_minimal_interface()
396
+ if demo is not None:
397
+ demo.launch(
398
+ server_name="0.0.0.0",
399
+ server_port=7860,
400
+ share=True
401
+ )
402
+ else:
403
+ print("❌ Could not create interface")
404
  except Exception as e:
405
+ print(f"❌ Launch failed: {e}")
406
+ print(traceback.format_exc())
407
 
408
+ # Minimal requirements.txt
409
  """
 
 
410
  gradio==4.44.0
411
  Pillow>=9.5.0
412
  numpy>=1.24.0
413
  soundfile>=0.12.0
414
+ torch>=2.0.0
415
+ transformers>=4.30.0
 
416
  """
417
 
418
+ # Updated README.md
419
  """
420
  ---
421
+ title: Ultra-Safe Photo Soundtrack Debug
422
+ emoji: πŸ§ͺ
423
+ colorFrom: red
424
+ colorTo: orange
425
  sdk: gradio
426
  sdk_version: "4.44.0"
427
  app_file: app.py
 
429
  license: apache-2.0
430
  ---
431
 
432
+ # πŸ§ͺ Ultra-Safe Debug Mode
 
 
 
 
 
 
 
 
 
 
 
 
433
 
434
+ This is a debugging version to isolate the "bool is not iterable" error.
 
 
435
 
436
+ ## Features
437
+ - βœ… Safe imports with individual error handling
438
+ - βœ… Step-by-step processing with detailed logs
439
+ - βœ… String operation testing
440
+ - βœ… Fallbacks for every component
441
+ - βœ… Detailed error reporting
442
 
443
+ ## Debug Process
444
+ 1. Upload to see import status
445
+ 2. Test string operations
446
+ 3. Process an image to find exact error location
447
+ 4. Check detailed logs for the root cause
448
 
449
+ This version will work even with missing dependencies and show exactly where errors occur.
450
  """