Spaces:

HSinghHuggingFace
/

stable-diffusion-image-generator

Sleeping

App Files Files Community

HSinghHuggingFace commited on Feb 26, 2025

Commit

2c843c7

1 Parent(s): eff7e87

stable diffusion image generator

Browse files

Files changed (10) hide show

README.md +87 -3
requirements.txt +10 -0
src/app.py +49 -0
src/utils/style_generator.py +195 -0
src/utils/ui_components.py +181 -0
style_embeddings/balloon.bin +3 -0
style_embeddings/dhoni.bin +3 -0
style_embeddings/lion_king.bin +3 -0
style_embeddings/mickey_mouse.bin +3 -0
style_embeddings/rose_flower.bin +3 -0

README.md CHANGED Viewed

@@ -5,10 +5,94 @@ colorFrom: blue
 colorTo: purple
 sdk: streamlit
 sdk_version: 1.42.2
-app_file: app.py
 pinned: false
 license: apache-2.0
-short_description: Stable-Diffusion-Image-Generator
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 colorTo: purple
 sdk: streamlit
 sdk_version: 1.42.2
+app_file: src/app.py
 pinned: false
 license: apache-2.0
+short_description: Transform your ideas into artistic masterpieces using Stable Diffusion with custom style embeddings
 ---
+# 🎨 AI Style Transfer Studio
+Transform your ideas into artistic masterpieces using Stable Diffusion with custom style embeddings.
+## 🚀 Features
+- Multiple pre-trained style embeddings (Dhoni, Mickey Mouse, Balloon, Lion King, Rose Flower)
+- Advanced color enhancement technology
+- User-friendly Streamlit interface
+- Real-time image generation
+- Example gallery with style comparisons
+## 🛠️ Local Setup
+1. Clone the repository:
+```bash
+git clone https://github.com/yourusername/stable-diffusion-image-generator.git
+cd stable-diffusion-image-generator
+```
+2. Create and activate a virtual environment (recommended):
+```bash
+python -m venv venv
+# On Windows
+venv\Scripts\activate
+# On Unix or MacOS
+source venv/bin/activate
+```
+3. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+4. Run the Streamlit app:
+```bash
+streamlit run src/app.py
+```
+The app will open in your default web browser at `http://localhost:8501`
+## 🌐 Deploying to Hugging Face Spaces
+1. Create a new Space on Hugging Face:
+   - Go to https://huggingface.co/spaces
+   - Click "Create new Space"
+   - Choose "Streamlit" as the SDK
+   - Set the Space name and visibility
+2. Push your code to Hugging Face:
+```bash
+git add .
+git commit -m "Initial commit"
+git remote add space https://huggingface.co/spaces/yourusername/your-space-name
+git push space main
+```
+3. The deployment will start automatically. Monitor the build logs on your Space's page.
+## 🎯 Usage
+1. Enter your creative prompt in the text area
+2. Select a style from the available options
+3. Click "Generate Artwork"
+4. View both the original and color-enhanced versions of your creation
+## 📝 Requirements
+- Python 3.8+
+- CUDA-capable GPU (recommended)
+- 8GB+ RAM
+## 🔑 Environment Variables
+No additional environment variables are required for basic usage.
+## 📄 License
+This project is licensed under the Apache 2.0 License.
+## 🙏 Acknowledgments
+- [Stable Diffusion](https://github.com/CompVis/stable-diffusion) for the base model
+- [Hugging Face](https://huggingface.co/) for model hosting and Spaces
+- [Streamlit](https://streamlit.io/) for the web interface

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch>=2.0.0
+diffusers>=0.19.0
+transformers>=4.30.0
+accelerator>=0.21.0
+streamlit>=1.24.0
+Pillow>=9.5.0
+numpy>=1.24.0
+pathlib>=1.0.1
+tqdm>=4.65.0
+huggingface-hub>=0.16.0

src/app.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import streamlit as st
+from utils.style_generator import StyleTransfer
+from utils.ui_components import (
+    setup_page_config,
+    apply_custom_css,
+    render_header,
+    render_controls,
+    render_image_columns,
+    render_example_gallery,
+    render_info_sections
+)
+# Initialize the application
+setup_page_config()
+apply_custom_css()
+render_header()
+# Initialize session state
+if 'generator' not in st.session_state:
+    st.session_state.generator = StyleTransfer.get_instance()
+    if not st.session_state.generator.is_initialized:
+        st.session_state.generator.initialize_pipeline()
+# Render controls and handle user input
+prompt, selected_style = render_controls(st.session_state.generator.style_names)
+if st.sidebar.button("🚀 Generate Artwork", use_container_width=True):
+    if prompt:
+        try:
+            with st.spinner("Generating your artwork..."):
+                base_image, enhanced_image = st.session_state.generator.generate_artwork(prompt, selected_style)
+                # Store images in session state
+                st.session_state.base_image = base_image
+                st.session_state.enhanced_image = enhanced_image
+        except Exception as e:
+            st.error(f"Error: {str(e)}")
+    else:
+        st.warning("Please enter a prompt first!")
+# Display generated images
+render_image_columns(
+    base_image=st.session_state.get('base_image'),
+    enhanced_image=st.session_state.get('enhanced_image')
+)
+# Render example gallery and information sections
+render_example_gallery()
+render_info_sections()

src/utils/style_generator.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import torch
+from diffusers import StableDiffusionPipeline
+from torch import autocast
+from pathlib import Path
+import traceback
+class StyleTransfer:
+    _instance = None
+    @classmethod
+    def get_instance(cls):
+        if cls._instance is None:
+            cls._instance = cls()
+        return cls._instance
+    def __init__(self):
+        self.pipeline = None
+        self.style_tokens = []
+        self.styles = [
+            "dhoni",
+            "mickey_mouse",
+            "balloon",
+            "lion_king",
+            "rose_flower"
+        ]
+        self.style_names = [
+            "Dhoni Style",
+            "Mickey Mouse Style",
+            "Balloon Style",
+            "Lion King Style",
+            "Rose Flower Style"
+        ]
+        self.is_initialized = False
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        if self.device == "cpu":
+            print("NVIDIA GPU not found. Running on CPU (this will be slower)")
+    def initialize_pipeline(self):
+        if self.is_initialized:
+            return
+        try:
+            print("Initializing Stable Diffusion model...")
+            model_id = "runwayml/stable-diffusion-v1-5"
+            self.pipeline = StableDiffusionPipeline.from_pretrained(
+                model_id,
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
+                safety_checker=None
+            )
+            self.pipeline = self.pipeline.to(self.device)
+            # Load style embeddings from current directory
+            current_dir = Path(__file__).parent.parent
+            for style, style_name in zip(self.styles, self.style_names):
+                style_path = current_dir / f"{style}.bin"
+                if not style_path.exists():
+                    raise FileNotFoundError(f"Style embedding not found: {style_path}")
+                print(f"Loading style: {style_name}")
+                token = self._load_style_embedding(str(style_path))
+                self.style_tokens.append(token)
+                print(f"✓ Loaded style: {style_name}")
+            self.is_initialized = True
+            print(f"Model initialization complete! Using device: {self.device}")
+        except Exception as e:
+            print(f"Error during initialization: {str(e)}")
+            print(traceback.format_exc())
+            raise
+    def _load_style_embedding(self, embedding_path, token=None):
+        loaded_embeds = torch.load(embedding_path, map_location="cpu")
+        trained_token = list(loaded_embeds.keys())[0]
+        embeds = loaded_embeds[trained_token]
+        # Get the expected dimension from the text encoder
+        expected_dim = self.pipeline.text_encoder.get_input_embeddings().weight.shape[1]
+        current_dim = embeds.shape[0]
+        # Resize embeddings if dimensions don't match
+        if current_dim != expected_dim:
+            print(f"Resizing embedding from {current_dim} to {expected_dim}")
+            if current_dim > expected_dim:
+                embeds = embeds[:expected_dim]
+            else:
+                embeds = torch.cat([embeds, torch.zeros(expected_dim - current_dim)], dim=0)
+        # Reshape to match expected dimensions
+        embeds = embeds.unsqueeze(0)  # Add batch dimension
+        # Cast to dtype of text_encoder
+        dtype = self.pipeline.text_encoder.get_input_embeddings().weight.dtype
+        embeds = embeds.to(dtype)
+        # Add the token in tokenizer
+        token = token if token is not None else trained_token
+        self.pipeline.tokenizer.add_tokens(token)
+        # Resize the token embeddings
+        self.pipeline.text_encoder.resize_token_embeddings(len(self.pipeline.tokenizer))
+        # Get the id for the token and assign the embeds
+        token_id = self.pipeline.tokenizer.convert_tokens_to_ids(token)
+        self.pipeline.text_encoder.get_input_embeddings().weight.data[token_id] = embeds[0]
+        return token
+    def generate_artwork(self, prompt, selected_style):
+        try:
+            # Find the index of the selected style
+            style_idx = self.style_names.index(selected_style)
+            # Generate single image with selected style
+            styled_prompt = f"{prompt}, {self.style_tokens[style_idx]}"
+            # Set seed for reproducibility
+            generator_seed = 42
+            torch.manual_seed(generator_seed)
+            if self.device == "cuda":
+                torch.cuda.manual_seed(generator_seed)
+            # Generate base image
+            with autocast(self.device):
+                base_image = self.pipeline(
+                    styled_prompt,
+                    num_inference_steps=50,
+                    guidance_scale=7.5,
+                    generator=torch.Generator(self.device).manual_seed(generator_seed)
+                ).images[0]
+            # Generate same image with color enhancement
+            with autocast(self.device):
+                enhanced_image = self.pipeline(
+                    styled_prompt,
+                    num_inference_steps=50,
+                    guidance_scale=7.5,
+                    callback=self._enhance_colors,
+                    callback_steps=5,
+                    generator=torch.Generator(self.device).manual_seed(generator_seed)
+                ).images[0]
+            return base_image, enhanced_image
+        except Exception as e:
+            print(f"Error in generate_artwork: {e}")
+            raise
+    def _enhance_colors(self, i, t, latents):
+        if i % 5 == 0:  # Apply enhancement every 5 steps
+            try:
+                # Create a copy that requires gradients
+                latents_copy = latents.detach().clone()
+                latents_copy.requires_grad_(True)
+                # Compute color distance loss
+                loss = self._calculate_color_distance(latents_copy)
+                # Compute gradients
+                if loss.requires_grad:
+                    grads = torch.autograd.grad(
+                        outputs=loss,
+                        inputs=latents_copy,
+                        allow_unused=True,
+                        retain_graph=False
+                    )[0]
+                    if grads is not None:
+                        # Apply gradients to original latents
+                        return latents - 0.1 * grads.detach()
+            except Exception as e:
+                print(f"Error in color enhancement: {e}")
+        return latents
+    def _calculate_color_distance(self, images):
+        # Ensure we're working with gradients
+        if not images.requires_grad:
+            images = images.detach().requires_grad_(True)
+        # Convert to float32 and normalize
+        images = images.float() / 2 + 0.5
+        # Get RGB channels
+        red = images[:,0:1]
+        green = images[:,1:2]
+        blue = images[:,2:3]
+        # Calculate color distances using L2 norm
+        rg_distance = ((red - green) ** 2).mean()
+        rb_distance = ((red - blue) ** 2).mean()
+        gb_distance = ((green - blue) ** 2).mean()
+        return (rg_distance + rb_distance + gb_distance) * 100  # Scale up the loss

src/utils/ui_components.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import streamlit as st
+from pathlib import Path
+def setup_page_config():
+    st.set_page_config(
+        page_title="AI Style Transfer Studio",
+        page_icon="🎨",
+        layout="wide"
+    )
+def apply_custom_css():
+    st.markdown("""
+    <style>
+        .stApp {
+            background-color: #1f2937;
+        }
+        .stMarkdown {
+            color: #f3f4f6;
+        }
+        .stButton > button {
+            background-color: #6366F1;
+            color: white;
+        }
+        .stButton > button:hover {
+            background-color: #4F46E5;
+        }
+        .dark-theme {
+            background-color: #111827;
+            border-radius: 10px;
+            padding: 20px;
+            margin: 10px 0;
+            border: 1px solid #374151;
+        }
+    </style>
+    """, unsafe_allow_html=True)
+def render_header():
+    st.markdown("""
+    <div class="dark-theme" style="text-align: center;">
+        <h1>🎨 AI Style Transfer Studio</h1>
+        <h3>Transform your ideas into artistic masterpieces</h3>
+    </div>
+    """, unsafe_allow_html=True)
+def render_controls(style_names):
+    with st.sidebar:
+        st.markdown("## 🎯 Controls")
+        prompt = st.text_area(
+            "What would you like to create?",
+            placeholder="e.g., a soccer player celebrating a goal",
+            height=100
+        )
+        selected_style = st.radio(
+            "Choose Your Style",
+            style_names,
+            index=0
+        )
+        return prompt, selected_style
+def render_image_columns(base_image=None, enhanced_image=None):
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown("### Original Style")
+        if base_image:
+            st.image(base_image, use_column_width=True)
+    with col2:
+        st.markdown("### Color Enhanced")
+        if enhanced_image:
+            st.image(enhanced_image, use_column_width=True)
+def render_example_gallery():
+    st.markdown("""
+    <div class="dark-theme">
+        <h2>🎆 Example Gallery</h2>
+        <p>Compare original and enhanced versions for each style:</p>
+    </div>
+    """, unsafe_allow_html=True)
+    try:
+        output_dir = Path("Outputs")
+        original_dir = output_dir
+        enhanced_dir = output_dir / "Color_Enhanced"
+        if enhanced_dir.exists():
+            original_images = {
+                Path(f).stem.split('_example')[0]: f
+                for f in original_dir.glob("*.webp")
+                if '_example' in f.name
+            }
+            enhanced_images = {
+                Path(f).stem.split('_example')[0]: f
+                for f in enhanced_dir.glob("*.webp")
+                if '_example' in f.name
+            }
+            styles = [
+                ("ronaldo", "Ronaldo Style"),
+                ("canna_lily", "Canna Lily"),
+                ("three_stooges", "Three Stooges"),
+                ("pop_art", "Pop Art"),
+                ("bird_style", "Bird Style")
+            ]
+            for style_key, style_name in styles:
+                if style_key in original_images and style_key in enhanced_images:
+                    st.markdown(f"### {style_name}")
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        st.image(
+                            str(original_images[style_key]),
+                            caption="Original",
+                            use_column_width=True
+                        )
+                    with col2:
+                        st.image(
+                            str(enhanced_images[style_key]),
+                            caption="Color Enhanced",
+                            use_column_width=True
+                        )
+                    st.markdown("<hr>", unsafe_allow_html=True)
+    except Exception as e:
+        st.error(f"Error loading example gallery: {str(e)}")
+def render_info_sections():
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown("""
+        <div class="dark-theme">
+            <h2>🎨 Style Guide</h2>
+            <table>
+                <tr>
+                    <th>Style</th>
+                    <th>Best For</th>
+                </tr>
+                <tr>
+                    <td><strong>Dhoni Style</strong></td>
+                    <td>Cricket scenes, sports action, victory celebrations</td>
+                </tr>
+                <tr>
+                    <td><strong>Mickey Mouse Style</strong></td>
+                    <td>Cartoon characters, playful scenes, whimsical art</td>
+                </tr>
+                <tr>
+                    <td><strong>Balloon Style</strong></td>
+                    <td>Festive scenes, colorful celebrations, light and airy compositions</td>
+                </tr>
+                <tr>
+                    <td><strong>Lion King Style</strong></td>
+                    <td>Animal portraits, majestic scenes, dramatic landscapes</td>
+                </tr>
+                <tr>
+                    <td><strong>Rose Flower Style</strong></td>
+                    <td>Floral art, romantic scenes, delicate compositions</td>
+                </tr>
+            </table>
+            <em>Choose the style that best matches your creative vision</em>
+        </div>
+        """, unsafe_allow_html=True)
+    with col2:
+        st.markdown("""
+        <div class="dark-theme">
+            <h2>🔍 Color Enhancement Technology</h2>
+            <p>Our advanced color processing uses distance loss to maximize the distinction between color channels,
+            resulting in more vibrant and visually striking images. This technique helps to:</p>
+            <ul>
+                <li>Enhance color separation</li>
+                <li>Improve visual contrast</li>
+                <li>Create more dynamic compositions</li>
+                <li>Preserve artistic style while boosting vibrancy</li>
+            </ul>
+        </div>
+        """, unsafe_allow_html=True)

style_embeddings/balloon.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5916ba4a9c011cb7f04df4501b20307b05b115c1aafacd538439db055790e6e1
+size 151785628

style_embeddings/dhoni.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb3894eb1e73b4ee7b22806c4bc74dd1177188e3282d8fe7968aa281de8b2119
+size 151785554

style_embeddings/lion_king.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a7a97e656141710692e65655a6992ddfa783c08f3e80584c4ed4933a8a3471b
+size 151785638

style_embeddings/mickey_mouse.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b576e7a808d880786b0c155e249c18473512ae3c16a6fe23419f586247c2406
+size 151785717

style_embeddings/rose_flower.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88020c89e2fb6ee4e1d89eb55f08ee9762850f46c3b7d6f19dc665ba961aad6c
+size 151785712