AbstractPhil
/

tiny-flux-deep

@@ -94,17 +94,8 @@ def flux_shift(t, s=3.0):
     """Flux-style timestep shifting - biases toward data end."""
     return s * t / (1 + (s - 1) * t)
-def generate(model, t5_emb, clip_pooled, num_steps=25, cfg_scale=4.0):
-    """
-    Euler sampling for rectified flow.
-    Flow matching: x_t = (1-t)*noise + t*data
-    - t=0: pure noise
-    - t=1: pure data
-    - v = data - noise
-    Integrate from t=0 → t=1
-    """
     device = next(model.parameters()).device
     dtype = next(model.parameters()).dtype
@@ -112,22 +103,17 @@ def generate(model, t5_emb, clip_pooled, num_steps=25, cfg_scale=4.0):
     x = torch.randn(1, 64*64, 16, device=device, dtype=dtype)
     img_ids = TinyFluxDeep.create_img_ids(1, 64, 64, device)
-    # Timesteps: 0 → 1 with Flux shift
-    t_linear = torch.linspace(0, 1, num_steps + 1, device=device)
-    timesteps = flux_shift(t_linear, s=3.0)
-    # Null embeddings for CFG
-    t5_null = torch.zeros_like(t5_emb)
-    clip_null = torch.zeros_like(clip_pooled)
     for i in range(num_steps):
         t_curr = timesteps[i]
         t_next = timesteps[i + 1]
-        dt = t_next - t_curr  # Positive
-        t_batch = t_curr.unsqueeze(0)
-        # Predict velocity
         v_cond = model(
             hidden_states=x,
             encoder_hidden_states=t5_emb,
@@ -136,10 +122,11 @@ def generate(model, t5_emb, clip_pooled, num_steps=25, cfg_scale=4.0):
             img_ids=img_ids,
         )
         v_uncond = model(
             hidden_states=x,
-            encoder_hidden_states=t5_null,
-            pooled_projections=clip_null,
             timestep=t_batch,
             img_ids=img_ids,
         )
@@ -147,7 +134,7 @@ def generate(model, t5_emb, clip_pooled, num_steps=25, cfg_scale=4.0):
         # Classifier-free guidance
         v = v_uncond + cfg_scale * (v_cond - v_uncond)
-        # Euler step: x_{t+dt} = x_t + v * dt
         x = x + v * dt
     return x  # [1, 4096, 16] - decode with VAE
@@ -418,12 +405,9 @@ vae = AutoencoderKL.from_pretrained(
 model_py = hf_hub_download("AbstractPhil/tiny-flux-deep", "scripts/model_v4.py")
 exec(open(model_py).read())
-config = TinyFluxConfig(
-    use_sol_prior=True,  # Disabled until trained
-    use_t5_vec=True,     # Disabled until trained
-)
 model = TinyFluxDeep(config).to("cuda", torch.bfloat16)
-weights = load_file(hf_hub_download("AbstractPhil/tiny-flux-deep", "checkpoint_runs/v4_init/lailah_401434_v4_init.safetensors"))
 model.load_state_dict(weights, strict=False)
 model.eval()
@@ -444,71 +428,63 @@ def encode_prompt(prompt):
     return t5_emb, clip_pooled
-def flux_shift(t, s=3.0):
-    """Flux-style timestep shift."""
-    return s * t / (1 + (s - 1) * t)
-@torch.inference_mode()
 def generate_image(prompt, num_steps=25, cfg_scale=4.0, seed=None):
     """
     Euler sampling for rectified flow.
-    Flow matching formulation:
-        x_t = (1 - t) * noise + t * data
-        At t=0: pure noise
-        At t=1: pure data
-        Velocity v = data - noise (constant)
-    Sampling: Integrate from t=0 (noise) → t=1 (data)
     """
     if seed is not None:
         torch.manual_seed(seed)
-    with torch.autocast("cuda", dtype=torch.bfloat16):
-      t5_emb, clip_pooled = encode_prompt(prompt)
-      t5_null, clip_null = encode_prompt("")
-      # Start from pure noise (t=0)
-      x = torch.randn(1, 64*64, 16, device="cuda", dtype=torch.bfloat16)
-      img_ids = TinyFluxDeep.create_img_ids(1, 64, 64, "cuda")
-      # Timesteps: 0 → 1 with Flux shift
-      t_linear = torch.linspace(0, 1, num_steps + 1, device="cuda", dtype=torch.float32)
-      timesteps = flux_shift(t_linear, s=3.0)
-      for i in range(num_steps):
-          t_curr = timesteps[i]
-          t_next = timesteps[i + 1]
-          dt = t_next - t_curr  # Positive, moving toward data
-          t_batch = t_curr.unsqueeze(0)
-          # Predict velocity
-          v_cond = model(x, t5_emb, clip_pooled, t_batch, img_ids)
-          v_uncond = model(x, t5_null, clip_null, t_batch, img_ids)
-          # Classifier-free guidance
-          v = v_uncond + cfg_scale * (v_cond - v_uncond)
-          # Euler step: x_{t+dt} = x_t + v * dt
-          x = x + v * dt
-      # Decode with VAE
-      x = x.reshape(1, 64, 64, 16).permute(0, 3, 1, 2)  # [B, C, H, W]
-      x = x / vae.config.scaling_factor
-      image = vae.decode(x).sample
-      # Convert to PIL
-      image = (image / 2 + 0.5).clamp(0, 1)
-      image = image[0].permute(1, 2, 0).cpu().float().numpy()
-      image = (image * 255).astype("uint8")
-      from PIL import Image
-      return Image.fromarray(image)
-# Generate
 image = generate_image("a photograph of a tiger in natural habitat", seed=42)
 image.save("tiger.png")
 ```

     """Flux-style timestep shifting - biases toward data end."""
     return s * t / (1 + (s - 1) * t)
+def generate(model, t5_emb, clip_emb, clip_pooled, num_steps=25, cfg_scale=4.0):
+    """Euler sampling with classifier-free guidance."""
     device = next(model.parameters()).device
     dtype = next(model.parameters()).dtype
     x = torch.randn(1, 64*64, 16, device=device, dtype=dtype)
     img_ids = TinyFluxDeep.create_img_ids(1, 64, 64, device)
+    # Rectified flow: integrate from t=0 (noise) to t=1 (data)
+    timesteps = flux_shift(torch.linspace(0, 1, num_steps + 1, device=device))
     for i in range(num_steps):
         t_curr = timesteps[i]
         t_next = timesteps[i + 1]
+        dt = t_next - t_curr
+        t_batch = t_curr.expand(1)
+        # Conditional prediction
         v_cond = model(
             hidden_states=x,
             encoder_hidden_states=t5_emb,
             img_ids=img_ids,
         )
+        # Unconditional prediction (for CFG)
         v_uncond = model(
             hidden_states=x,
+            encoder_hidden_states=torch.zeros_like(t5_emb),
+            pooled_projections=torch.zeros_like(clip_pooled),
             timestep=t_batch,
             img_ids=img_ids,
         )
         # Classifier-free guidance
         v = v_uncond + cfg_scale * (v_cond - v_uncond)
+        # Euler step
         x = x + v * dt
     return x  # [1, 4096, 16] - decode with VAE
 model_py = hf_hub_download("AbstractPhil/tiny-flux-deep", "scripts/model_v4.py")
 exec(open(model_py).read())
+config = TinyFluxConfig()
 model = TinyFluxDeep(config).to("cuda", torch.bfloat16)
+weights = load_file(hf_hub_download("AbstractPhil/tiny-flux-deep", "model.safetensors"))
 model.load_state_dict(weights, strict=False)
 model.eval()
     return t5_emb, clip_pooled
 def generate_image(prompt, num_steps=25, cfg_scale=4.0, seed=None):
     """
     Euler sampling for rectified flow.
+    Flow: x_t = (1-t)*noise + t*data
+    Integrate from t=0 (noise) to t=1 (data)
     """
     if seed is not None:
         torch.manual_seed(seed)
+    t5_emb, clip_pooled = encode_prompt(prompt)
+    # Null embeddings for CFG
+    t5_null, clip_null = encode_prompt("")
+    # Start from pure noise (t=0)
+    x = torch.randn(1, 64*64, 16, device="cuda", dtype=torch.bfloat16)
+    img_ids = TinyFluxDeep.create_img_ids(1, 64, 64, "cuda")
+    # Rectified flow: 0 → 1 with Flux shift
+    def flux_shift(t, s=3.0):
+        return s * t / (1 + (s - 1) * t)
+    timesteps = flux_shift(torch.linspace(0, 1, num_steps + 1, device="cuda"))
+    with torch.no_grad():
+        for i in range(num_steps):
+            t = timesteps[i].expand(1)
+            dt = timesteps[i + 1] - timesteps[i]  # Positive
+            # Conditional
+            v_cond = model(x, t5_emb, clip_pooled, t, img_ids)
+            # Unconditional
+            v_uncond = model(x, t5_null, clip_null, t, img_ids)
+            # CFG
+            v = v_uncond + cfg_scale * (v_cond - v_uncond)
+            # Euler step
+            x = x + v * dt
+    # Decode with VAE
+    x = x.reshape(1, 64, 64, 16).permute(0, 3, 1, 2)  # [B, C, H, W]
+    x = x / vae.config.scaling_factor
+    with torch.no_grad():
+        image = vae.decode(x).sample
+    # Convert to PIL
+    image = (image / 2 + 0.5).clamp(0, 1)
+    image = image[0].permute(1, 2, 0).cpu().float().numpy()
+    image = (image * 255).astype("uint8")
+    from PIL import Image
+    return Image.fromarray(image)
+# Generate!
 image = generate_image("a photograph of a tiger in natural habitat", seed=42)
 image.save("tiger.png")
 ```