Spaces:

citoreh
/

AxAva

Sleeping

App Files Files Community

citoreh commited on Jul 28, 2025

Commit

60ec455

verified ·

1 Parent(s): e128d93

Update app.py

Browse files

Files changed (1) hide show

app.py +323 -609

app.py CHANGED Viewed

@@ -1,667 +1,387 @@
-# app.py - Lightweight version for Hugging Face Spaces
 import os
 import sys
-import torch
-import numpy as np
-from PIL import Image
-import gradio as gr
 import tempfile
-import soundfile as sf
 import warnings
-import traceback
 warnings.filterwarnings('ignore')
-# System info logging
-print(f"🔍 Python version: {sys.version}")
-print(f"🔍 PyTorch version: {torch.__version__}")
-print(f"🔍 CUDA available: {torch.cuda.is_available()}")
-print(f"🔍 Device count: {torch.cuda.device_count() if torch.cuda.is_available() else 'CPU only'}")
-# Try imports with detailed error reporting
-BLIP_AVAILABLE = False
-MUSICGEN_AVAILABLE = False
-VIDEO_AVAILABLE = False
 try:
-    from transformers import BlipProcessor, BlipForConditionalGeneration
-    BLIP_AVAILABLE = True
-    print("✅ BLIP models imported successfully")
 except Exception as e:
-    print(f"❌ BLIP import failed: {e}")
 try:
-    from transformers import MusicgenForConditionalGeneration, AutoProcessor
-    MUSICGEN_AVAILABLE = True
-    print("✅ MusicGen models imported successfully")
 except Exception as e:
-    print(f"❌ MusicGen import failed: {e}")
 try:
-    import imageio
-    VIDEO_AVAILABLE = True
-    print("✅ Video creation available")
 except Exception as e:
-    print(f"❌ Video imports failed: {e}")
-class LightweightPhotoSoundtrackGenerator:
     def __init__(self):
-        print("🎵 Initializing Lightweight Photo-Soundtrack Generator...")
-        self.caption_processor = None
-        self.caption_model = None
-        self.music_processor = None
-        self.music_model = None
-        # Try to load models with error handling
-        self.load_models()
-    def test_string_operations(self, test_input):
-        """Test function to debug string operation issues"""
-        print(f"🧪 Testing input: {test_input} (type: {type(test_input)})")
-        try:
-            # Test string conversion
-            str_version = str(test_input) if test_input is not None else "default"
-            print(f"✅ String conversion: {str_version}")
-            # Test lower() operation
-            lower_version = str_version.lower()
-            print(f"✅ Lower operation: {lower_version}")
-            # Test 'in' operation
-            test_result = 'test' in lower_version
-            print(f"✅ 'in' operation: {test_result}")
-            return True
-        except Exception as e:
-            print(f"❌ String operation failed: {e}")
-            return False
-    def load_models(self):
-        """Load models with comprehensive error handling"""
-        try:
-            if BLIP_AVAILABLE:
-                print("📸 Loading BLIP model (lightweight)...")
-                self.caption_processor = BlipProcessor.from_pretrained(
-                    "Salesforce/blip-image-captioning-base",
-                    cache_dir="./cache"
-                )
-                self.caption_model = BlipForConditionalGeneration.from_pretrained(
-                    "Salesforce/blip-image-captioning-base",
-                    cache_dir="./cache",
-                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
-                )
-                # Move to appropriate device
-                device = "cuda" if torch.cuda.is_available() else "cpu"
-                self.caption_model = self.caption_model.to(device)
-                print(f"✅ BLIP loaded on {device}")
-            else:
-                print("⚠️ BLIP not available - using fallback descriptions")
-        except Exception as e:
-            print(f"❌ Error loading BLIP: {e}")
-            self.caption_processor = None
-            self.caption_model = None
         try:
-            if MUSICGEN_AVAILABLE:
-                print("🎼 Loading MusicGen model (small)...")
-                self.music_processor = AutoProcessor.from_pretrained(
-                    "facebook/musicgen-small",
-                    cache_dir="./cache"
-                )
-                self.music_model = MusicgenForConditionalGeneration.from_pretrained(
-                    "facebook/musicgen-small",
-                    cache_dir="./cache",
-                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
-                )
-                # Move to appropriate device
-                device = "cuda" if torch.cuda.is_available() else "cpu"
-                self.music_model = self.music_model.to(device)
-                print(f"✅ MusicGen loaded on {device}")
             else:
-                print("⚠️ MusicGen not available - will use placeholder audio")
-        except Exception as e:
-            print(f"❌ Error loading MusicGen: {e}")
-            self.music_processor = None
-            self.music_model = None
-    def get_fallback_description(self, image):
-        """Generate a simple fallback description when BLIP is not available"""
-        try:
-            # Simple heuristic based on image properties
-            img_array = np.array(image)
-            # Analyze dominant colors
-            mean_color = np.mean(img_array, axis=(0,1))
-            brightness = np.mean(mean_color)
-            # Analyze color distribution
-            red_dominant = mean_color[0] > mean_color[1] and mean_color[0] > mean_color[2]
-            blue_dominant = mean_color[2] > mean_color[0] and mean_color[2] > mean_color[1]
-            green_dominant = mean_color[1] > mean_color[0] and mean_color[1] > mean_color[2]
-            descriptions = []
-            if brightness > 200:
-                descriptions.append("bright")
-            elif brightness < 100:
-                descriptions.append("dark")
-            if red_dominant:
-                descriptions.append("warm tones")
-            elif blue_dominant:
-                descriptions.append("cool tones")
-            elif green_dominant:
-                descriptions.append("natural colors")
-            # Basic description - ensure it's always a string
-            if descriptions:
-                base_desc = "image with " + ", ".join(descriptions)
-            else:
-                base_desc = "colorful image"
-            # Ensure both returns are strings
-            base_desc = str(base_desc)
-            enhanced_desc = f"{base_desc}, cinematic, atmospheric"
-            return base_desc, enhanced_desc
         except Exception as e:
-            print(f"Error in fallback description: {e}")
-            # Return safe default strings
-            return "uploaded image", "uploaded image, cinematic, atmospheric"
-    def generate_description(self, image):
-        """Generate description using BLIP or fallback"""
         try:
-            if self.caption_model is not None and self.caption_processor is not None:
-                print("🔍 Analyzing image with BLIP...")
-                # Process the image
-                inputs = self.caption_processor(image, return_tensors="pt")
-                # Move inputs to same device as model
-                device = next(self.caption_model.parameters()).device
-                inputs = {k: v.to(device) for k, v in inputs.items()}
-                # Generate caption
-                with torch.no_grad():
-                    out = self.caption_model.generate(**inputs, max_length=30, num_beams=3)
-                description = self.caption_processor.decode(out[0], skip_special_tokens=True)
-                # Ensure description is a string
-                if not isinstance(description, str):
-                    print(f"Warning: BLIP returned non-string: {type(description)}")
-                    description = str(description) if description is not None else "image"
-                # Clean up the description
-                description = description.strip()
-                if not description:
-                    description = "image"
-                enhanced_description = self.enhance_description_for_music(description)
-                return description, enhanced_description
-            else:
-                print("🔍 Using fallback description method...")
-                return self.get_fallback_description(image)
-        except Exception as e:
-            print(f"❌ Error in description generation: {e}")
-            traceback.print_exc()
-            return self.get_fallback_description(image)
-    def enhance_description_for_music(self, description):
-        """Enhance description for music generation"""
-        try:
-            # Ensure description is a string
-            if not isinstance(description, str):
-                print(f"Warning: enhance_description received non-string: {type(description)}")
-                description = str(description) if description is not None else "image"
-            # Clean and prepare description
-            description = description.strip()
-            if not description:
-                description = "image"
-            # Simplified mood mappings
-            mood_mappings = {
-                'sunset': 'warm acoustic guitar',
-                'sunrise': 'bright acoustic',
-                'ocean': 'ambient waves',
-                'sea': 'ambient waves',
-                'mountain': 'epic orchestral',
-                'city': 'urban electronic',
-                'forest': 'nature folk',
-                'night': 'ambient piano',
-                'bright': 'upbeat cheerful',
-                'light': 'upbeat cheerful',
-                'dark': 'mysterious atmospheric',
-                'warm': 'acoustic peaceful',
-                'cool': 'ambient ethereal',
-                'water': 'flowing ambient',
-                'sky': 'ethereal atmospheric',
-                'street': 'urban contemporary',
-                'building': 'modern electronic'
-            }
-            # Safe string operations
-            description_lower = description.lower()
-            music_elements = []
-            # Check each keyword safely
-            for keyword, music_style in mood_mappings.items():
-                try:
-                    if keyword in description_lower:
-                        music_elements.append(music_style)
-                except (TypeError, AttributeError) as e:
-                    print(f"Error checking keyword '{keyword}' in description: {e}")
-                    continue
-            # Build enhanced description
-            if music_elements:
-                enhanced = f"{description}, {music_elements[0]}"
-            else:
-                enhanced = f"{description}, cinematic atmospheric"
-            # Ensure return is a string
-            return str(enhanced)
-        except Exception as e:
-            print(f"Error in enhance_description_for_music: {e}")
-            traceback.print_exc()
-            # Return safe fallback
-            safe_desc = str(description) if description else "image"
-            return f"{safe_desc}, cinematic atmospheric"
-    def generate_placeholder_audio(self, duration=30, sampling_rate=32000):
-        """Generate placeholder audio when MusicGen is not available"""
-        print("🎵 Generating placeholder audio...")
-        # Create a simple harmonic tone sequence
-        t = np.linspace(0, duration, int(duration * sampling_rate))
-        # Create a pleasant chord progression
-        frequencies = [261.63, 329.63, 392.00, 523.25]  # C, E, G, C (one octave up)
-        audio = np.zeros_like(t)
-        for i, freq in enumerate(frequencies):
-            # Each note plays for duration/4 with overlap
-            start_time = i * duration / 5
-            end_time = start_time + duration / 3
-            mask = (t >= start_time) & (t <= end_time)
-            envelope = np.exp(-3 * (t[mask] - start_time) / (end_time - start_time))  # Decay envelope
-            audio[mask] += 0.3 * envelope * np.sin(2 * np.pi * freq * t[mask])
-        # Add some reverb-like effect
-        audio = np.convolve(audio, np.exp(-np.linspace(0, 2, 1000)), mode='same')[:len(t)]
-        # Normalize
-        audio = audio / np.max(np.abs(audio)) * 0.7
-        return audio.astype(np.float32), sampling_rate
-    def generate_soundtrack(self, music_prompt, duration=30):
-        """Generate soundtrack using MusicGen or placeholder"""
-        try:
-            # Ensure music_prompt is a string
-            if not isinstance(music_prompt, str):
-                print(f"Warning: music_prompt is not string: {type(music_prompt)}")
-                music_prompt = str(music_prompt) if music_prompt is not None else "cinematic atmospheric"
-            music_prompt = music_prompt.strip()
-            if not music_prompt:
-                music_prompt = "cinematic atmospheric"
-            if self.music_model is not None and self.music_processor is not None:
-                print(f"🎼 Generating soundtrack with MusicGen...")
-                print(f"🎵 Prompt: {music_prompt}")
-                # Process the text prompt
-                inputs = self.music_processor(
-                    text=[music_prompt],
-                    padding=True,
-                    return_tensors="pt",
-                )
-                # Move inputs to same device as model
-                device = next(self.music_model.parameters()).device
-                inputs = {k: v.to(device) for k, v in inputs.items()}
-                # Generate audio with shorter length for memory efficiency
-                max_new_tokens = min(int(duration * 30), 800)  # Further limit tokens
-                with torch.no_grad():
-                    audio_values = self.music_model.generate(
-                        **inputs,
-                        max_new_tokens=max_new_tokens,
-                        do_sample=True,
-                        guidance_scale=3.0,
-                    )
-                # Convert to numpy array
-                sampling_rate = self.music_model.config.audio_encoder.sampling_rate
-                audio_data = audio_values[0, 0].cpu().numpy()
-                # Ensure correct duration
-                target_length = int(duration * sampling_rate)
-                if len(audio_data) > target_length:
-                    audio_data = audio_data[:target_length]
-                elif len(audio_data) < target_length:
-                    audio_data = np.pad(audio_data, (0, target_length - len(audio_data)), mode='constant')
-                return audio_data, sampling_rate
-            else:
-                print("🎵 Using placeholder audio generation...")
-                return self.generate_placeholder_audio(duration)
         except Exception as e:
-            print(f"❌ Error in soundtrack generation: {e}")
-            traceback.print_exc()
-            print("🎵 Falling back to placeholder audio...")
-            return self.generate_placeholder_audio(duration)
-    def create_simple_video(self, image, audio_data, sampling_rate, duration=30):
-        """Create a simple video with basic zoom effect"""
-        if not VIDEO_AVAILABLE:
-            print("❌ Video creation not available")
-            return None
         try:
-            print("🎬 Creating simple video...")
-            # Create frames with zoom effect
-            frames = []
-            fps = 12  # Lower FPS for memory efficiency
-            total_frames = fps * duration
-            # Convert PIL to numpy array and resize if too large
-            img_array = np.array(image)
-            h, w = img_array.shape[:2]
-            # Limit resolution for memory efficiency
-            max_size = 512
-            if max(h, w) > max_size:
-                ratio = max_size / max(h, w)
-                new_h, new_w = int(h * ratio), int(w * ratio)
-                image = image.resize((new_w, new_h), Image.Resampling.LANCZOS)
-                img_array = np.array(image)
-                h, w = img_array.shape[:2]
-            for i in range(total_frames):
-                # Simple zoom effect
-                progress = i / total_frames
-                zoom_factor = 1.0 + 0.1 * progress  # Zoom in by 10%
-                # Resize image
-                new_h, new_w = int(h * zoom_factor), int(w * zoom_factor)
-                pil_img = Image.fromarray(img_array).resize((new_w, new_h), Image.Resampling.LANCZOS)
-                # Center crop back to original size
-                resized_array = np.array(pil_img)
-                start_y = (new_h - h) // 2
-                start_x = (new_w - w) // 2
-                cropped = resized_array[start_y:start_y+h, start_x:start_x+w]
-                frames.append(cropped)
-            # Create temporary video file
-            with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
-                temp_video_path = temp_video.name
-            # Write video using imageio
-            import imageio
-            imageio.mimsave(temp_video_path, frames, fps=fps)
-            print("✅ Simple video created successfully!")
-            return temp_video_path
         except Exception as e:
-            print(f"❌ Video creation failed: {e}")
-            traceback.print_exc()
-            return None
-    def process_image(self, image, progress=gr.Progress()):
-        """Main processing function"""
         if image is None:
-            return None, "❌ Please upload an image first.", None, None
         try:
-            progress(0.1, desc="Starting analysis...")
-            # System info
-            device_info = f"🖥️ Device: {'GPU' if torch.cuda.is_available() else 'CPU'}"
-            progress(0.2, desc="Analyzing image...")
-            # Generate description
-            try:
-                description, music_prompt = self.generate_description(image)
-                # Double-check that we have strings
-                if not isinstance(description, str):
-                    description = "uploaded image"
-                if not isinstance(music_prompt, str):
-                    music_prompt = "cinematic atmospheric"
-                progress(0.4, desc="Description generated")
-            except Exception as e:
-                print(f"Error in description: {e}")
-                traceback.print_exc()
-                description, music_prompt = "uploaded image", "cinematic atmospheric"
-            progress(0.5, desc="Generating soundtrack...")
-            # Generate soundtrack
-            try:
-                audio_data, sampling_rate = self.generate_soundtrack(music_prompt, duration=20)  # Shorter duration
-                progress(0.7, desc="Soundtrack generated")
-            except Exception as e:
-                print(f"Error in soundtrack: {e}")
-                audio_data, sampling_rate = self.generate_placeholder_audio(duration=20)
-            # Save audio to temporary file
-            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as audio_file:
-                sf.write(audio_file.name, audio_data, sampling_rate)
-                audio_path = audio_file.name
-            progress(0.8, desc="Creating video...")
-            # Create video
-            video_path = None
-            if VIDEO_AVAILABLE:
                 try:
-                    video_path = self.create_simple_video(image, audio_data, sampling_rate, duration=20)
-                    progress(0.9, desc="Video created")
                 except Exception as e:
-                    print(f"Error in video creation: {e}")
-            progress(1.0, desc="Complete!")
-            # Prepare status message
             status_msg = f"""
-**✅ Processing Complete!**
-**Image Description:** {description}
-**Music Style:** {music_prompt}
-**Technical Info:**
-- {device_info}
-- Audio: {len(audio_data)/sampling_rate:.1f} seconds at {sampling_rate}Hz
-- BLIP Model: {'✅ Loaded' if self.caption_model else '❌ Using fallback'}
-- MusicGen Model: {'✅ Loaded' if self.music_model else '❌ Using placeholder'}
-- Video: {'✅ Created' if video_path else '❌ Failed/Unavailable'}
 """
-            return (
-                video_path,
-                status_msg,
-                (sampling_rate, audio_data),
-                audio_path
-            )
         except Exception as e:
-            error_msg = f"❌ **Error during processing:**\n\n{str(e)}\n\n**System Info:**\n- Python: {sys.version[:20]}...\n- PyTorch: {torch.__version__}\n- CUDA: {torch.cuda.is_available()}"
-            print(f"Main processing error: {e}")
-            traceback.print_exc()
             return None, error_msg, None, None
-# Initialize with error handling
-print("🚀 Starting application initialization...")
 try:
-    generator = LightweightPhotoSoundtrackGenerator()
-    print("✅ Generator initialized successfully")
 except Exception as e:
-    print(f"❌ Failed to initialize generator: {e}")
     generator = None
-def create_interface():
-    """Create Gradio interface with system status"""
-    # System status
-    status_html = f"""
-    <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; margin: 10px 0;">
-        <h3>🔧 System Status</h3>
-        <ul style="margin: 0;">
-            <li><strong>Device:</strong> {'🟢 GPU Available' if torch.cuda.is_available() else '🟡 CPU Only'}</li>
-            <li><strong>BLIP (Image Analysis):</strong> {'🟢 Available' if BLIP_AVAILABLE else '🟡 Fallback Mode'}</li>
-            <li><strong>MusicGen (AI Music):</strong> {'🟢 Available' if MUSICGEN_AVAILABLE else '🟡 Placeholder Mode'}</li>
-            <li><strong>Video Creation:</strong> {'🟢 Available' if VIDEO_AVAILABLE else '🔴 Unavailable'}</li>
-        </ul>
-    </div>
-    """
-    # Simple test function for debugging
-    def test_function(image):
-        if image is None:
-            return "❌ No image provided"
-        try:
-            # Test basic operations
-            img_array = np.array(image)
-            print(f"🧪 Image shape: {img_array.shape}")
-            # Test string operations
-            test_desc = "test image with bright colors"
-            test_enhanced = generator.enhance_description_for_music(test_desc) if generator else "test failed"
-            return f"""
-**🧪 Debug Test Results:**
-**Image Info:**
-- Shape: {img_array.shape}
-- Type: {type(img_array)}
-**String Test:**
-- Input: "{test_desc}"
-- Enhanced: "{test_enhanced}"
-- Types: {type(test_desc)} → {type(test_enhanced)}
-**System Check:**
-- Generator: {'✅ Loaded' if generator else '❌ Failed'}
-- Test passed: ✅
-"""
-        except Exception as e:
-            return f"❌ **Test failed:** {str(e)}\n\n{traceback.format_exc()}"
-    with gr.Blocks(
-        title="📸🎵 Lightweight Photo Soundtrack Generator",
-        theme=gr.themes.Soft()
-    ) as demo:
         gr.HTML("""
-        <div style="text-align: center; margin-bottom: 20px;">
-            <h1>📸🎵 AI Photo Soundtrack Generator</h1>
-            <p>Upload a photo and create an AI-generated soundtrack!</p>
         </div>
         """)
         gr.HTML(status_html)
         with gr.Tabs():
-            with gr.TabItem("🎬 Main Generator"):
                 with gr.Row():
                     with gr.Column():
                         image_input = gr.Image(
                             type="pil",
-                            label="📤 Upload Your Photo"
                         )
-                        generate_btn = gr.Button(
-                            "🎬 Generate Soundtrack & Video",
-                            variant="primary",
-                            size="lg"
                         )
                     with gr.Column():
-                        video_output = gr.Video(label="🎬 Generated Video")
                         status_output = gr.Markdown(
-                            value="Upload an image to start!",
-                            label="📊 Status & Analysis"
                         )
                 with gr.Row():
-                    audio_output = gr.Audio(label="🎵 Generated Soundtrack")
-                    audio_file_output = gr.File(label="📥 Download Audio")
-            with gr.TabItem("🧪 Debug Mode"):
-                with gr.Row():
-                    with gr.Column():
-                        debug_image_input = gr.Image(
-                            type="pil",
-                            label="📤 Upload Test Image"
-                        )
-                        debug_btn = gr.Button(
-                            "🧪 Run Debug Test",
-                            variant="secondary"
-                        )
-                    with gr.Column():
-                        debug_output = gr.Markdown(
-                            value="Upload an image and click 'Run Debug Test'",
-                            label="🔍 Debug Results"
-                        )
-        # Event handlers
-        if generator is not None:
-            generate_btn.click(
-                fn=generator.process_image,
-                inputs=[image_input],
-                outputs=[video_output, status_output, audio_output, audio_file_output],
-                show_progress=True
-            )
-            debug_btn.click(
-                fn=test_function,
-                inputs=[debug_image_input],
-                outputs=[debug_output]
-            )
-        else:
-            generate_btn.click(
-                fn=lambda x: (None, "❌ Generator failed to initialize. Please check logs.", None, None),
-                inputs=[image_input],
-                outputs=[video_output, status_output, audio_output, audio_file_output]
-            )
-            debug_btn.click(
-                fn=lambda x: "❌ Generator not available for testing.",
-                inputs=[debug_image_input],
-                outputs=[debug_output]
-            )
         gr.HTML("""
-        <div style="text-align: center; margin-top: 20px; padding: 15px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 8px;">
-            <p style="color: white; margin: 0;">
-                <strong>💡 Tip:</strong> Use Debug Mode to test functionality if you encounter errors.
-            </p>
         </div>
         """)
@@ -669,37 +389,39 @@ def create_interface():
 # Launch
 if __name__ == "__main__":
     try:
-        demo = create_interface()
-        demo.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            share=True
-        )
     except Exception as e:
-        print(f"❌ Failed to launch: {e}")
-        traceback.print_exc()
-# requirements.txt - MINIMAL VERSION
 """
-torch>=2.0.0
-transformers>=4.30.0
 gradio==4.44.0
 Pillow>=9.5.0
 numpy>=1.24.0
 soundfile>=0.12.0
-imageio>=2.31.1
-scipy>=1.10.0
-accelerate>=0.20.0
 """
-# README.md - Updated
 """
 ---
-title: AI Photo Soundtrack Generator
-emoji: 🎵
-colorFrom: blue
-colorTo: purple
 sdk: gradio
 sdk_version: "4.44.0"
 app_file: app.py
@@ -707,30 +429,22 @@ pinned: false
 license: apache-2.0
 ---
-# 📸🎵 AI Photo Soundtrack Generator
-A lightweight AI app that creates soundtracks from your photos!
-## 🌟 Features
-- **Smart Image Analysis**: BLIP-based or fallback description
-- **AI Music Generation**: MusicGen or harmonic placeholder audio
-- **Simple Video Creation**: Basic zoom effects
-- **Adaptive Performance**: Works on both CPU and GPU
-- **Graceful Degradation**: Functions even when models fail to load
-## 🚀 Quick Start
-1. Upload any photo
-2. Click "Generate Soundtrack & Video"
-3. Download your personalized audio/video
-## ⚙️ Technical Details
-- **Memory Optimized**: Uses smaller models and efficient processing
-- **Error Resilient**: Comprehensive fallback systems
-- **Resource Adaptive**: Automatically adjusts to available hardware
-- **Real-time Status**: Shows what's working and what's not
-Perfect for creative projects and social media content! 🎬✨
 """

+# app.py - Ultra-Safe Debugging Version
 import os
 import sys
+import traceback
 import tempfile
 import warnings
 warnings.filterwarnings('ignore')
+print("🚀 Starting ULTRA-SAFE mode...")
+print(f"Python: {sys.version}")
+# Safe imports with individual error handling
+IMPORTS = {
+    'torch': False,
+    'numpy': False,
+    'PIL': False,
+    'gradio': False,
+    'soundfile': False,
+    'transformers': False
+}
+try:
+    import torch
+    IMPORTS['torch'] = True
+    print("✅ PyTorch imported")
+except Exception as e:
+    print(f"❌ PyTorch failed: {e}")
 try:
+    import numpy as np
+    IMPORTS['numpy'] = True
+    print("✅ NumPy imported")
 except Exception as e:
+    print(f"❌ NumPy failed: {e}")
 try:
+    from PIL import Image
+    IMPORTS['PIL'] = True
+    print("✅ PIL imported")
 except Exception as e:
+    print(f"❌ PIL failed: {e}")
 try:
+    import gradio as gr
+    IMPORTS['gradio'] = True
+    print("✅ Gradio imported")
 except Exception as e:
+    print(f"❌ Gradio failed: {e}")
+try:
+    import soundfile as sf
+    IMPORTS['soundfile'] = True
+    print("✅ SoundFile imported")
+except Exception as e:
+    print(f"❌ SoundFile failed: {e}")
+# Only try transformers if basic imports work
+if IMPORTS['torch']:
+    try:
+        from transformers import BlipProcessor, BlipForConditionalGeneration
+        from transformers import MusicgenForConditionalGeneration, AutoProcessor
+        IMPORTS['transformers'] = True
+        print("✅ Transformers imported")
+    except Exception as e:
+        print(f"❌ Transformers failed: {e}")
+class SafeGenerator:
     def __init__(self):
+        print("🔧 Initializing Safe Generator...")
+        self.working = True
+    def safe_string_test(self, input_val):
+        """Test string operations safely"""
+        print(f"🧪 Testing string operations with: {input_val} (type: {type(input_val)})")
         try:
+            # Convert to string safely
+            if input_val is None:
+                str_val = "none"
+            elif isinstance(input_val, str):
+                str_val = input_val
+            elif isinstance(input_val, (int, float, bool)):
+                str_val = str(input_val)
             else:
+                str_val = repr(input_val)
+            print(f"✅ String conversion: '{str_val}'")
+            # Test lower operation
+            lower_val = str_val.lower()
+            print(f"✅ Lower operation: '{lower_val}'")
+            # Test 'in' operation with known string
+            test_keywords = ['test', 'image', 'bright']
+            results = []
+            for keyword in test_keywords:
+                try:
+                    result = keyword in lower_val
+                    results.append(f"{keyword}: {result}")
+                    print(f"✅ '{keyword}' in '{lower_val}': {result}")
+                except Exception as e:
+                    error_msg = f"❌ Error with '{keyword}': {e}"
+                    results.append(error_msg)
+                    print(error_msg)
+            return f"String test passed. Results: {', '.join(results)}"
         except Exception as e:
+            error_msg = f"❌ String test failed: {e}\n{traceback.format_exc()}"
+            print(error_msg)
+            return error_msg
+    def create_simple_description(self, image):
+        """Create description without any AI models"""
         try:
+            if not IMPORTS['numpy'] or not IMPORTS['PIL']:
+                return "simple image", "simple image, ambient music"
+            print("🔍 Creating simple description...")
+            # Basic image analysis
+            img_array = np.array(image)
+            h, w, c = img_array.shape
+            print(f"📏 Image dimensions: {w}x{h}x{c}")
+            # Safe color analysis
+            try:
+                mean_color = np.mean(img_array, axis=(0, 1))
+                brightness = float(np.mean(mean_color))
+                print(f"💡 Brightness: {brightness}")
+                # Safe brightness categorization
+                if brightness > 200:
+                    brightness_desc = "bright"
+                elif brightness < 100:
+                    brightness_desc = "dark"
+                else:
+                    brightness_desc = "medium"
+                print(f"🏷️ Brightness category: {brightness_desc}")
+            except Exception as e:
+                print(f"⚠️ Color analysis failed: {e}")
+                brightness_desc = "colorful"
+            # Build description safely
+            base_desc = f"{brightness_desc} image"
+            enhanced_desc = f"{base_desc}, peaceful music"
+            print(f"📝 Description: '{base_desc}' -> '{enhanced_desc}'")
+            # Test the strings before returning
+            test_result = self.safe_string_test(base_desc)
+            print(f"🧪 String test result: {test_result}")
+            return base_desc, enhanced_desc
         except Exception as e:
+            error_msg = f"❌ Description creation failed: {e}\n{traceback.format_exc()}"
+            print(error_msg)
+            return "error image", "error image, quiet music"
+    def create_simple_audio(self, duration=10):
+        """Create simple audio without AI"""
         try:
+            if not IMPORTS['numpy']:
+                print("❌ NumPy not available for audio")
+                return None, None
+            print(f"🎵 Creating {duration}s simple audio...")
+            # Create simple tone
+            sample_rate = 22050
+            t = np.linspace(0, duration, int(duration * sample_rate))
+            # Simple pleasant tone (C major chord)
+            freq1, freq2, freq3 = 261.63, 329.63, 392.00  # C, E, G
+            audio = (np.sin(2 * np.pi * freq1 * t) * 0.3 +
+                    np.sin(2 * np.pi * freq2 * t) * 0.2 +
+                    np.sin(2 * np.pi * freq3 * t) * 0.2)
+            # Add fade in/out
+            fade_samples = int(0.5 * sample_rate)  # 0.5 second fade
+            audio[:fade_samples] *= np.linspace(0, 1, fade_samples)
+            audio[-fade_samples:] *= np.linspace(1, 0, fade_samples)
+            print(f"✅ Audio created: {len(audio)} samples at {sample_rate}Hz")
+            return audio.astype(np.float32), sample_rate
         except Exception as e:
+            print(f"❌ Audio creation failed: {e}")
+            return None, None
+    def process_image_safe(self, image):
+        """Ultra-safe image processing"""
+        print("🔄 Starting SAFE image processing...")
         if image is None:
+            return None, "❌ No image provided", None, None
         try:
+            # Step 1: Test the image
+            print("📸 Step 1: Testing image...")
+            img_info = f"Image type: {type(image)}"
+            if hasattr(image, 'size'):
+                img_info += f", Size: {image.size}"
+            print(img_info)
+            # Step 2: Create description
+            print("📝 Step 2: Creating description...")
+            description, music_prompt = self.create_simple_description(image)
+            # Step 3: Test strings
+            print("🧪 Step 3: Testing strings...")
+            desc_test = self.safe_string_test(description)
+            prompt_test = self.safe_string_test(music_prompt)
+            # Step 4: Create audio
+            print("🎵 Step 4: Creating audio...")
+            audio_data, sample_rate = self.create_simple_audio(10)
+            # Step 5: Save audio if successful
+            audio_file = None
+            if audio_data is not None and IMPORTS['soundfile']:
                 try:
+                    print("💾 Step 5: Saving audio...")
+                    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
+                        sf.write(f.name, audio_data, sample_rate)
+                        audio_file = f.name
+                        print(f"✅ Audio saved: {audio_file}")
                 except Exception as e:
+                    print(f"⚠️ Audio save failed: {e}")
+            # Prepare results
             status_msg = f"""
+**✅ SAFE MODE PROCESSING COMPLETE**
+**Image Analysis:**
+- {img_info}
+**Generated Text:**
+- Description: "{description}"
+- Music Prompt: "{music_prompt}"
+**String Tests:**
+- Description test: {desc_test[:100]}...
+- Prompt test: {prompt_test[:100]}...
+**Audio:**
+- Status: {'✅ Created' if audio_data is not None else '❌ Failed'}
+- Duration: {len(audio_data)/sample_rate:.1f}s if audio_data else 'N/A'
+**System Status:**
+- PyTorch: {'✅' if IMPORTS['torch'] else '❌'}
+- NumPy: {'✅' if IMPORTS['numpy'] else '❌'}
+- PIL: {'✅' if IMPORTS['PIL'] else '❌'}
+- SoundFile: {'✅' if IMPORTS['soundfile'] else '❌'}
+- Transformers: {'✅' if IMPORTS['transformers'] else '❌'}
 """
+            audio_output = (sample_rate, audio_data) if audio_data is not None else None
+            print("✅ Safe processing completed successfully!")
+            return None, status_msg, audio_output, audio_file
         except Exception as e:
+            error_msg = f"""
+❌ **SAFE MODE ERROR**
+**Error:** {str(e)}
+**Location:** {traceback.format_exc()}
+**System Info:**
+- Python: {sys.version}
+- Working directory: {os.getcwd()}
+- Environment: {dict(os.environ).get('SPACE_ID', 'Local')}
+**Import Status:**
+{chr(10).join([f"- {k}: {'✅' if v else '❌'}" for k, v in IMPORTS.items()])}
+"""
+            print(error_msg)
             return None, error_msg, None, None
+# Initialize generator
+print("🎬 Initializing generator...")
 try:
+    generator = SafeGenerator()
+    print("✅ Generator ready!")
 except Exception as e:
+    print(f"❌ Generator failed: {e}")
     generator = None
+def create_minimal_interface():
+    """Create minimal interface for debugging"""
+    if not IMPORTS['gradio']:
+        print("❌ Cannot create interface - Gradio not available")
+        return None
+    print("🖥️ Creating minimal interface...")
+    with gr.Blocks(title="🧪 Ultra-Safe Debug Mode") as demo:
         gr.HTML("""
+        <div style="text-align: center; background: #fff3cd; padding: 20px; border-radius: 10px; margin: 20px 0;">
+            <h1>🧪 ULTRA-SAFE DEBUG MODE</h1>
+            <p><strong>This version isolates and tests each component individually</strong></p>
         </div>
         """)
+        # System status
+        status_html = f"""
+        <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; margin: 10px 0;">
+            <h3>🔧 Import Status</h3>
+            <ul>
+                {"".join([f"<li><strong>{k}:</strong> {'🟢 OK' if v else '🔴 Failed'}</li>" for k, v in IMPORTS.items()])}
+            </ul>
+        </div>
+        """
         gr.HTML(status_html)
         with gr.Tabs():
+            # Tab 1: Basic String Test
+            with gr.TabItem("🧪 String Test"):
+                with gr.Row():
+                    test_input = gr.Textbox(
+                        value="bright colorful sunset image",
+                        label="Test String Input"
+                    )
+                    test_btn = gr.Button("Test String Operations")
+                test_output = gr.Textbox(
+                    label="Test Results",
+                    lines=10
+                )
+                if generator:
+                    test_btn.click(
+                        fn=generator.safe_string_test,
+                        inputs=[test_input],
+                        outputs=[test_output]
+                    )
+            # Tab 2: Image Processing Test
+            with gr.TabItem("📸 Image Test"):
                 with gr.Row():
                     with gr.Column():
                         image_input = gr.Image(
                             type="pil",
+                            label="Upload Test Image"
                         )
+                        process_btn = gr.Button(
+                            "🔄 Process Image (Safe Mode)",
+                            variant="primary"
                         )
                     with gr.Column():
                         status_output = gr.Markdown(
+                            value="Upload an image and click process",
+                            label="Processing Results"
                         )
                 with gr.Row():
+                    audio_output = gr.Audio(label="Generated Audio")
+                    file_output = gr.File(label="Audio File")
+                if generator:
+                    process_btn.click(
+                        fn=generator.process_image_safe,
+                        inputs=[image_input],
+                        outputs=[gr.Video(visible=False), status_output, audio_output, file_output]
+                    )
         gr.HTML("""
+        <div style="background: #e3f2fd; padding: 15px; border-radius: 8px; margin: 20px 0;">
+            <h3>🎯 Debug Instructions</h3>
+            <ol>
+                <li><strong>String Test:</strong> Test if string operations work correctly</li>
+                <li><strong>Image Test:</strong> Upload an image to see where the error occurs</li>
+                <li><strong>Check Results:</strong> Look for the exact error location in the output</li>
+            </ol>
         </div>
         """)
 # Launch
 if __name__ == "__main__":
+    print("🚀 Launching debug interface...")
     try:
+        demo = create_minimal_interface()
+        if demo is not None:
+            demo.launch(
+                server_name="0.0.0.0",
+                server_port=7860,
+                share=True
+            )
+        else:
+            print("❌ Could not create interface")
     except Exception as e:
+        print(f"❌ Launch failed: {e}")
+        print(traceback.format_exc())
+# Minimal requirements.txt
 """
 gradio==4.44.0
 Pillow>=9.5.0
 numpy>=1.24.0
 soundfile>=0.12.0
+torch>=2.0.0
+transformers>=4.30.0
 """
+# Updated README.md
 """
 ---
+title: Ultra-Safe Photo Soundtrack Debug
+emoji: 🧪
+colorFrom: red
+colorTo: orange
 sdk: gradio
 sdk_version: "4.44.0"
 app_file: app.py
 license: apache-2.0
 ---
+# 🧪 Ultra-Safe Debug Mode
+This is a debugging version to isolate the "bool is not iterable" error.
+## Features
+- ✅ Safe imports with individual error handling
+- ✅ Step-by-step processing with detailed logs
+- ✅ String operation testing
+- ✅ Fallbacks for every component
+- ✅ Detailed error reporting
+## Debug Process
+1. Upload to see import status
+2. Test string operations
+3. Process an image to find exact error location
+4. Check detailed logs for the root cause
+This version will work even with missing dependencies and show exactly where errors occur.
 """