Spaces:

lazerkat
/

RandomDiffusion

Sleeping

App Files Files Community

lazerkat commited on 17 days ago

Commit

d4f89b8

verified ·

1 Parent(s): 4ad9a53

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -22

app.py CHANGED Viewed

@@ -8,10 +8,6 @@ from PIL import Image
 import numpy as np
 import json
-# ============================================================================
-# DIFFUSION Model Architecture (from your training code)
-# ============================================================================
 class TextEncoder(nn.Module):
     def __init__(self, vocab_size, embed_dim=256, hidden_dim=512):
         super().__init__()
@@ -169,7 +165,6 @@ class Diffusion:
             x = (1 / torch.sqrt(alpha)) * (x - ((1 - alpha) / torch.sqrt(1 - alpha_bar)) * predicted_noise)
             x = x + torch.sqrt(beta) * noise
-            # Report progress
             if progress_callback is not None:
                 progress = (i + 1) / steps
                 progress_callback(progress)
@@ -178,13 +173,11 @@ class Diffusion:
         return x
-# Global variables
 model = None
 device = None
 vocab_data = None
 def download_file(url, filename):
-    """Download with progress tracking"""
     if not os.path.exists(filename):
         print(f"Downloading {filename}...")
         urllib.request.urlretrieve(url, filename)
@@ -192,36 +185,30 @@ def download_file(url, filename):
     else:
         print(f"{filename} already exists")
-# Download and load model
 def initialize_model():
     global model, device, vocab_data
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    # Download model and vocab
     model_url = "https://huggingface.co/lazerkat/randomdiffusion/resolve/main/newest.pth"
     model_path = "newest.pth"
     download_file(model_url, model_path)
-    # Load checkpoint
     checkpoint = torch.load(model_path, map_location=device)
-    # Get vocab info from checkpoint
     vocab_data = {
         'vocab': checkpoint['vocab'],
         'word_to_idx': checkpoint['word_to_idx'],
         'vocab_size': checkpoint['vocab_size']
     }
-    # Create model with correct vocab size
     model = DiffusionUNet(
         vocab_size=vocab_data['vocab_size'],
         image_channels=3,
         base_channels=64
     ).to(device)
-    # Load state dict
     model.load_state_dict(checkpoint['model_state_dict'])
     model.eval()
@@ -229,15 +216,13 @@ def initialize_model():
     return "✅ Model loaded successfully! You can now generate images."
 def tokenize_text(text, max_len=20):
-    """Tokenize text input for the model"""
     words = [w.strip('.,!?"\'') for w in text.lower().split()]
     tokens = words[:max_len]
     indices = [vocab_data['word_to_idx'].get(token, vocab_data['word_to_idx'].get('<UNK>', 1)) for token in tokens]
     while len(indices) < max_len:
-        indices.append(0)  # PAD token
     return torch.tensor(indices).unsqueeze(0).to(device)
-# Generate image with progress
 def generate_image(prompt, progress=gr.Progress()):
     global model, device, vocab_data
@@ -246,7 +231,7 @@ def generate_image(prompt, progress=gr.Progress()):
     progress(0, desc="Starting generation...")
-    diffusion = Diffusion(timesteps=500, device=device)  # Use 500 timesteps like training
     def update_progress(pct):
         progress(pct, desc=f"Generating... {pct*100:.1f}%")
@@ -263,7 +248,6 @@ def generate_image(prompt, progress=gr.Progress()):
     progress(1.0, desc="Converting to image...")
-    # Convert to image
     image = generated.cpu().squeeze(0)
     image = (image + 1) / 2
     image = image.clamp(0, 1)
@@ -272,7 +256,6 @@ def generate_image(prompt, progress=gr.Progress()):
     return Image.fromarray(image)
-# Create interface
 with gr.Blocks(title="RandomDiffusion Text-to-Image") as demo:
     gr.Markdown("# 🎨 RandomDiffusion")
     gr.Markdown("Text-to-Image generation using diffusion model")
@@ -291,13 +274,11 @@ with gr.Blocks(title="RandomDiffusion Text-to-Image") as demo:
     output_image = gr.Image(label="Generated Image", type="pil")
-    # Load model on startup
     demo.load(
         lambda: initialize_model(),
         outputs=[status]
     )
-    # Generate on button click
     generate_btn.click(
         generate_image,
         inputs=[prompt_input],
@@ -305,4 +286,5 @@ with gr.Blocks(title="RandomDiffusion Text-to-Image") as demo:
     )
 if __name__ == "__main__":
-    demo.launch()

 import numpy as np
 import json
 class TextEncoder(nn.Module):
     def __init__(self, vocab_size, embed_dim=256, hidden_dim=512):
         super().__init__()
             x = (1 / torch.sqrt(alpha)) * (x - ((1 - alpha) / torch.sqrt(1 - alpha_bar)) * predicted_noise)
             x = x + torch.sqrt(beta) * noise
             if progress_callback is not None:
                 progress = (i + 1) / steps
                 progress_callback(progress)
         return x
 model = None
 device = None
 vocab_data = None
 def download_file(url, filename):
     if not os.path.exists(filename):
         print(f"Downloading {filename}...")
         urllib.request.urlretrieve(url, filename)
     else:
         print(f"{filename} already exists")
 def initialize_model():
     global model, device, vocab_data
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model_url = "https://huggingface.co/lazerkat/randomdiffusion/resolve/main/newest.pth"
     model_path = "newest.pth"
     download_file(model_url, model_path)
     checkpoint = torch.load(model_path, map_location=device)
     vocab_data = {
         'vocab': checkpoint['vocab'],
         'word_to_idx': checkpoint['word_to_idx'],
         'vocab_size': checkpoint['vocab_size']
     }
     model = DiffusionUNet(
         vocab_size=vocab_data['vocab_size'],
         image_channels=3,
         base_channels=64
     ).to(device)
     model.load_state_dict(checkpoint['model_state_dict'])
     model.eval()
     return "✅ Model loaded successfully! You can now generate images."
 def tokenize_text(text, max_len=20):
     words = [w.strip('.,!?"\'') for w in text.lower().split()]
     tokens = words[:max_len]
     indices = [vocab_data['word_to_idx'].get(token, vocab_data['word_to_idx'].get('<UNK>', 1)) for token in tokens]
     while len(indices) < max_len:
+        indices.append(0)
     return torch.tensor(indices).unsqueeze(0).to(device)
 def generate_image(prompt, progress=gr.Progress()):
     global model, device, vocab_data
     progress(0, desc="Starting generation...")
+    diffusion = Diffusion(timesteps=500, device=device)
     def update_progress(pct):
         progress(pct, desc=f"Generating... {pct*100:.1f}%")
     progress(1.0, desc="Converting to image...")
     image = generated.cpu().squeeze(0)
     image = (image + 1) / 2
     image = image.clamp(0, 1)
     return Image.fromarray(image)
 with gr.Blocks(title="RandomDiffusion Text-to-Image") as demo:
     gr.Markdown("# 🎨 RandomDiffusion")
     gr.Markdown("Text-to-Image generation using diffusion model")
     output_image = gr.Image(label="Generated Image", type="pil")
     demo.load(
         lambda: initialize_model(),
         outputs=[status]
     )
     generate_btn.click(
         generate_image,
         inputs=[prompt_input],
     )
 if __name__ == "__main__":
+    demo.launch()