Spaces:

NotOpenAI
/

Axion

Sleeping

App Files Files Community

Dhenenjay commited on Nov 26, 2025

Commit

98d98f5

verified ·

1 Parent(s): 7f03507

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +284 -211

app.py CHANGED Viewed

@@ -4,9 +4,8 @@ HuggingFace Spaces Deployment
 Features:
 - Full resolution processing with seamless tiling
-- Multi-step inference for maximum quality
 - TIFF output support
-- Professional post-processing
 """
 import os
@@ -20,7 +19,7 @@ import gradio as gr
 from pathlib import Path
 import tempfile
 import time
-from tqdm import tqdm
 from huggingface_hub import hf_hub_download
 # ============================================================================
@@ -59,8 +58,7 @@ class SoftPool2d(nn.Module):
         return soft_pool2d(x, self.kernel_size, self.stride)
-# Monkey-patch SoftPool into the expected location
-import sys
 class SoftPoolModule:
     soft_pool2d = staticmethod(soft_pool2d)
     SoftPool2d = SoftPool2d
@@ -198,82 +196,66 @@ class ResnetBlocWithAttn(nn.Module):
         if with_attn:
             self.attn = SelfAttention(dim_out, norm_groups=norm_groups)
-    def forward(self, x, time_emb, c, t=0, save_flag=False, file_i=0):
         x = self.res_block(x, time_emb, c)
         if self.with_attn:
-            x = self.attn(x, t=t, save_flag=save_flag, file_num=file_i)
         return x
-class ResBlock_normal(nn.Module):
-    def __init__(self, dim, dim_out, dropout=0, norm_groups=32):
-        super().__init__()
-        self.block1 = Block(dim, dim_out, groups=norm_groups)
-        self.block2 = Block(dim_out, dim_out, groups=norm_groups, dropout=dropout)
-        self.res_conv = nn.Conv2d(dim, dim_out, 1) if dim != dim_out else nn.Identity()
-    def forward(self, x):
-        h = self.block1(x)
-        h = self.block2(h)
-        return h + self.res_conv(x)
 class CPEN(nn.Module):
-    def __init__(self, inchannel=1):
         super(CPEN, self).__init__()
-        self.pool = SoftPool2d(kernel_size=(2,2), stride=(2,2))
-        self.E1 = nn.Sequential(nn.Conv2d(inchannel, 64, kernel_size=3, padding=1), Swish())
-        self.E2 = nn.Sequential(ResBlock_normal(64, 128, dropout=0, norm_groups=16), ResBlock_normal(128, 128, dropout=0, norm_groups=16))
-        self.E3 = nn.Sequential(ResBlock_normal(128, 256, dropout=0, norm_groups=16), ResBlock_normal(256, 256, dropout=0, norm_groups=16))
-        self.E4 = nn.Sequential(ResBlock_normal(256, 512, dropout=0, norm_groups=16), ResBlock_normal(512, 512, dropout=0, norm_groups=16))
-        self.E5 = nn.Sequential(ResBlock_normal(512, 512, dropout=0, norm_groups=16), ResBlock_normal(512, 1024, dropout=0, norm_groups=16))
     def forward(self, x):
-        x1 = self.E1(x)
-        x2 = self.pool(x1)
-        x2 = self.E2(x2)
-        x3 = self.pool(x2)
-        x3 = self.E3(x3)
-        x4 = self.pool(x3)
-        x4 = self.E4(x4)
-        x5 = self.pool(x4)
-        x5 = self.E5(x5)
-        return x1, x2, x3, x4, x5
 class UNet(nn.Module):
     def __init__(self, in_channel=6, out_channel=3, inner_channel=32, norm_groups=32,
-                 channel_mults=(1, 2, 4, 8, 8), attn_res=(8), res_blocks=3, dropout=0,
                  with_noise_level_emb=True, image_size=128, condition_ch=3):
         super().__init__()
-        if with_noise_level_emb:
-            noise_level_channel = inner_channel
-            self.noise_level_mlp = nn.Sequential(
-                PositionalEncoding(inner_channel),
-                nn.Linear(inner_channel, inner_channel * 4),
-                Swish(),
-                nn.Linear(inner_channel * 4, inner_channel)
-            )
-        else:
-            noise_level_channel = None
-            self.noise_level_mlp = None
         self.res_blocks = res_blocks
         num_mults = len(channel_mults)
-        self.num_mults = num_mults
         pre_channel = inner_channel
         feat_channels = [pre_channel]
         now_res = image_size
         downs = [nn.Conv2d(in_channel, inner_channel, kernel_size=3, padding=1)]
         for ind in range(num_mults):
             is_last = (ind == num_mults - 1)
             use_attn = (now_res in attn_res)
             channel_mult = inner_channel * channel_mults[ind]
             for _ in range(0, res_blocks):
-                downs.append(ResnetBlocWithAttn(pre_channel, channel_mult, noise_level_emb_dim=noise_level_channel,
-                                                norm_groups=norm_groups, dropout=dropout, with_attn=use_attn, size=now_res))
                 feat_channels.append(channel_mult)
                 pre_channel = channel_mult
             if not is_last:
@@ -283,7 +265,7 @@ class UNet(nn.Module):
         self.downs = nn.ModuleList(downs)
         self.mid = nn.ModuleList([
-            ResnetBlocWithAttn(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel,
                               norm_groups=norm_groups, dropout=dropout, with_attn=True, size=now_res),
             ResnetBlocWithAttn(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel,
                               norm_groups=norm_groups, dropout=dropout, with_attn=False, size=now_res)
@@ -359,25 +341,135 @@ class UNet(nn.Module):
 # ============================================================================
-# E3Diff High-Resolution Inference
 # ============================================================================
-class E3DiffHighRes:
-    def __init__(self, device="cuda"):
         self.device = torch.device(device if torch.cuda.is_available() else "cpu")
-        self.model = None
         self.image_size = 256
-    def load_model(self, weights_path=None):
-        if weights_path is None:
-            # Download from HuggingFace
-            weights_path = hf_hub_download(
-                repo_id="Dhenenjay/E3Diff-SAR2Optical",
-                filename="I700000_E719_gen.pth"
-            )
-        # Build UNet
-        self.model = UNet(
             in_channel=3,
             out_channel=3,
             norm_groups=16,
@@ -388,88 +480,115 @@ class E3DiffHighRes:
             dropout=0,
             image_size=self.image_size,
             condition_ch=3
-        ).to(self.device)
-        # Load weights
-        state_dict = torch.load(weights_path, map_location=self.device, weights_only=False)
-        # Filter only UNet weights
-        unet_dict = {k.replace('denoise_fn.', ''): v for k, v in state_dict.items()
-                     if k.startswith('denoise_fn.')}
-        self.model.load_state_dict(unet_dict, strict=False)
-        self.model.eval()
-        print(f"Model loaded on {self.device}")
     @torch.no_grad()
-    def translate_tile(self, tile_tensor, num_steps=1):
-        """Translate a single 256x256 tile."""
-        batch_size = tile_tensor.shape[0]
-        # Initialize noise
-        noise = torch.randn(batch_size, 3, self.image_size, self.image_size, device=self.device)
-        # DDIM sampling
-        total_timesteps = 1000
-        ts = torch.linspace(total_timesteps, 0, num_steps + 1).to(self.device).long()
-        # Create beta schedule
-        betas = torch.linspace(1e-6, 1e-2, total_timesteps, device=self.device)
-        alphas = 1. - betas
-        alphas_cumprod = torch.cumprod(alphas, dim=0)
-        sqrt_alphas_cumprod_prev = torch.sqrt(torch.cat([torch.ones(1, device=self.device), alphas_cumprod]))
-        x = noise
-        for i in range(1, num_steps + 1):
-            cur_t = ts[i - 1] - 1
-            prev_t = ts[i] - 1
-            noise_level = sqrt_alphas_cumprod_prev[cur_t].repeat(batch_size, 1)
-            alpha_prod_t = alphas_cumprod[cur_t]
-            alpha_prod_t_prev = alphas_cumprod[prev_t] if prev_t >= 0 else torch.tensor(1.0, device=self.device)
-            beta_prod_t = 1 - alpha_prod_t
-            # Model prediction
-            model_input = torch.cat([tile_tensor, x], dim=1)
-            model_output = self.model(model_input, noise_level)
-            # DDIM update
-            pred_original = (x - beta_prod_t ** 0.5 * model_output) / alpha_prod_t ** 0.5
-            pred_original = pred_original.clamp(-1, 1)
-            sigma_2 = 0.8 * (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev)
-            pred_dir = (1 - alpha_prod_t_prev - sigma_2) ** 0.5 * model_output
-            if i < num_steps:
-                noise = torch.randn_like(x)
-                x = alpha_prod_t_prev ** 0.5 * pred_original + pred_dir + sigma_2 ** 0.5 * noise
-            else:
-                x = pred_original
-        return x
     def create_blend_weights(self, tile_size, overlap):
-        """Create smooth blending weights for seamless tiling."""
-        # Linear ramp for overlap regions
         ramp = np.linspace(0, 1, overlap)
-        # Create 2D weight matrix
         weight = np.ones((tile_size, tile_size))
-        # Apply ramps to edges
-        weight[:overlap, :] *= ramp[:, np.newaxis]  # Top
-        weight[-overlap:, :] *= ramp[::-1, np.newaxis]  # Bottom
-        weight[:, :overlap] *= ramp[np.newaxis, :]  # Left
-        weight[:, -overlap:] *= ramp[np.newaxis, ::-1]  # Right
         return weight[:, :, np.newaxis]
-    def translate_full_resolution(self, image, num_steps=1, overlap=64, progress_callback=None):
-        """
-        Translate full resolution image using seamless tiling.
-        """
-        # Convert to numpy if PIL
         if isinstance(image, Image.Image):
             if image.mode != 'RGB':
                 image = image.convert('RGB')
@@ -478,77 +597,51 @@ class E3DiffHighRes:
             img_np = image
         h, w = img_np.shape[:2]
-        tile_size = self.image_size
         step = tile_size - overlap
-        # Pad image to ensure full coverage
         pad_h = (step - (h - overlap) % step) % step
         pad_w = (step - (w - overlap) % step) % step
         img_padded = np.pad(img_np, ((0, pad_h), (0, pad_w), (0, 0)), mode='reflect')
         h_pad, w_pad = img_padded.shape[:2]
-        # Output arrays
         output = np.zeros((h_pad, w_pad, 3), dtype=np.float32)
         weights = np.zeros((h_pad, w_pad, 1), dtype=np.float32)
-        # Blending weights
         blend_weight = self.create_blend_weights(tile_size, overlap)
-        # Calculate tile positions
         y_positions = list(range(0, h_pad - tile_size + 1, step))
         x_positions = list(range(0, w_pad - tile_size + 1, step))
         total_tiles = len(y_positions) * len(x_positions)
-        print(f"Processing {total_tiles} tiles ({len(x_positions)}x{len(y_positions)})...")
         tile_idx = 0
         for y in y_positions:
             for x in x_positions:
-                # Extract tile
                 tile = img_padded[y:y+tile_size, x:x+tile_size]
-                # Convert to tensor [-1, 1]
-                tile_tensor = torch.from_numpy(tile).permute(2, 0, 1).unsqueeze(0)
-                tile_tensor = tile_tensor * 2.0 - 1.0
-                tile_tensor = tile_tensor.to(self.device)
-                # Translate
-                result_tensor = self.translate_tile(tile_tensor, num_steps)
-                # Convert back to numpy [0, 1]
-                result = result_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy()
-                result = (result + 1.0) / 2.0
-                result = np.clip(result, 0, 1)
-                # Add to output with blending
                 output[y:y+tile_size, x:x+tile_size] += result * blend_weight
                 weights[y:y+tile_size, x:x+tile_size] += blend_weight
                 tile_idx += 1
-                if progress_callback:
-                    progress_callback(tile_idx / total_tiles)
-        # Normalize by weights
         output = output / (weights + 1e-8)
-        # Crop to original size
         output = output[:h, :w]
-        return output
-    def enhance_output(self, image, contrast=1.1, sharpness=1.15, color=1.1):
-        """Apply professional post-processing."""
         if isinstance(image, np.ndarray):
-            image = Image.fromarray((image * 255).astype(np.uint8))
-        # Contrast
         image = ImageEnhance.Contrast(image).enhance(contrast)
-        # Sharpness
         image = ImageEnhance.Sharpness(image).enhance(sharpness)
-        # Color saturation
         image = ImageEnhance.Color(image).enhance(color)
         return image
@@ -556,7 +649,7 @@ class E3DiffHighRes:
 # Gradio Interface
 # ============================================================================
-model = None
 def load_sar_image(filepath):
     """Load SAR image from various formats."""
@@ -581,58 +674,44 @@ def load_sar_image(filepath):
     return Image.open(filepath).convert('RGB')
-def translate_sar(file, num_steps, overlap, enhance):
     """Main translation function."""
-    global model
     if file is None:
         return None, None, "Please upload a SAR image"
-    if model is None:
-        print("Loading model...")
-        model = E3DiffHighRes()
-        model.load_model()
-    print("Processing image...")
-    # Handle file upload - get the filepath
     filepath = file.name if hasattr(file, 'name') else file
     image = load_sar_image(filepath)
     w, h = image.size
     print(f"Input size: {w}x{h}")
-    # Translate
     start = time.time()
-    result = model.translate_full_resolution(
-        image,
-        num_steps=num_steps,
-        overlap=overlap,
-        progress_callback=None
-    )
     elapsed = time.time() - start
-    print("Post-processing...")
-    # Convert to PIL
-    result_pil = Image.fromarray((result * 255).astype(np.uint8))
-    # Enhance if requested
-    if enhance:
-        result_pil = model.enhance_output(result_pil)
-    # Save as TIFF
     tiff_path = tempfile.mktemp(suffix='.tiff')
     result_pil.save(tiff_path, format='TIFF', compression='lzw')
-    print("Complete!")
     info = f"Processed in {elapsed:.1f}s | Output: {result_pil.size[0]}x{result_pil.size[1]}"
     return result_pil, tiff_path, info
-# Create Gradio interface
 with gr.Blocks(title="E3Diff: SAR-to-Optical Translation") as demo:
     gr.Markdown("""
     # 🛰️ E3Diff: High-Resolution SAR-to-Optical Translation
@@ -641,7 +720,6 @@ with gr.Blocks(title="E3Diff: SAR-to-Optical Translation") as demo:
     - Supports full resolution processing with seamless tiling
     - Multiple quality levels (1-8 inference steps)
-    - Professional post-processing
     - TIFF output for commercial use
     """)
@@ -650,16 +728,16 @@ with gr.Blocks(title="E3Diff: SAR-to-Optical Translation") as demo:
             input_file = gr.File(label="SAR Input (TIFF, PNG, JPG supported)", file_types=[".tif", ".tiff", ".png", ".jpg", ".jpeg"])
             with gr.Row():
-                num_steps = gr.Slider(1, 8, value=1, step=1, label="Quality Steps (1=fast, 4-8=high quality)")
-                overlap = gr.Slider(16, 128, value=64, step=16, label="Tile Overlap (higher=smoother)")
-            enhance = gr.Checkbox(value=True, label="Apply post-processing enhancement")
             submit_btn = gr.Button("🚀 Translate to Optical", variant="primary")
         with gr.Column():
             output_image = gr.Image(label="Optical Output")
-            output_file = gr.File(label="Download TIFF (full resolution)")
             info_text = gr.Textbox(label="Processing Info")
     submit_btn.click(
@@ -670,12 +748,7 @@ with gr.Blocks(title="E3Diff: SAR-to-Optical Translation") as demo:
     gr.Markdown("""
     ---
-    **Tips for best results:**
-    - For aerial/satellite SAR: Use steps=1-2 for speed, steps=4-8 for quality
-    - For noisy SAR: Apply speckle filtering first (Lee or PPB filter)
-    - The model works best with Sentinel-1 style imagery
-    **Citation:** Qin et al., "Efficient End-to-End Diffusion Model for One-step SAR-to-Optical Translation", IEEE GRSL 2024
     """)

 Features:
 - Full resolution processing with seamless tiling
+- Proper diffusion sampling (matching local inference)
 - TIFF output support
 """
 import os
 from pathlib import Path
 import tempfile
 import time
+from functools import partial
 from huggingface_hub import hf_hub_download
 # ============================================================================
         return soft_pool2d(x, self.kernel_size, self.stride)
+# Monkey-patch SoftPool
 class SoftPoolModule:
     soft_pool2d = staticmethod(soft_pool2d)
     SoftPool2d = SoftPool2d
         if with_attn:
             self.attn = SelfAttention(dim_out, norm_groups=norm_groups)
+    def forward(self, x, time_emb, c):
         x = self.res_block(x, time_emb, c)
         if self.with_attn:
+            x = self.attn(x, time_emb)
         return x
+# CPEN Condition Encoder
 class CPEN(nn.Module):
+    def __init__(self, inchannel=3):
         super(CPEN, self).__init__()
+        from SoftPool import SoftPool2d
+        self.conv1 = nn.Conv2d(inchannel, 64, 3, 1, 1)
+        self.pool1 = SoftPool2d(kernel_size=(2, 2), stride=(2, 2))
+        self.conv2 = nn.Conv2d(64, 128, 3, 1, 1)
+        self.pool2 = SoftPool2d(kernel_size=(2, 2), stride=(2, 2))
+        self.conv3 = nn.Conv2d(128, 256, 3, 1, 1)
+        self.pool3 = SoftPool2d(kernel_size=(2, 2), stride=(2, 2))
+        self.conv4 = nn.Conv2d(256, 512, 3, 1, 1)
+        self.pool4 = SoftPool2d(kernel_size=(2, 2), stride=(2, 2))
+        self.conv5 = nn.Conv2d(512, 1024, 3, 1, 1)
     def forward(self, x):
+        c1 = self.pool1(F.leaky_relu(self.conv1(x)))
+        c2 = self.pool2(F.leaky_relu(self.conv2(c1)))
+        c3 = self.pool3(F.leaky_relu(self.conv3(c2)))
+        c4 = self.pool4(F.leaky_relu(self.conv4(c3)))
+        c5 = F.leaky_relu(self.conv5(c4))
+        return c1, c2, c3, c4, c5
 class UNet(nn.Module):
     def __init__(self, in_channel=6, out_channel=3, inner_channel=32, norm_groups=32,
+                 channel_mults=(1, 2, 4, 8, 8), attn_res=(8,), res_blocks=3, dropout=0,
                  with_noise_level_emb=True, image_size=128, condition_ch=3):
         super().__init__()
         self.res_blocks = res_blocks
+        noise_level_channel = inner_channel
+        self.noise_level_mlp = nn.Sequential(
+            PositionalEncoding(inner_channel),
+            nn.Linear(inner_channel, inner_channel * 4),
+            Swish(),
+            nn.Linear(inner_channel * 4, inner_channel)
+        ) if with_noise_level_emb else None
         num_mults = len(channel_mults)
         pre_channel = inner_channel
         feat_channels = [pre_channel]
         now_res = image_size
         downs = [nn.Conv2d(in_channel, inner_channel, kernel_size=3, padding=1)]
         for ind in range(num_mults):
             is_last = (ind == num_mults - 1)
             use_attn = (now_res in attn_res)
             channel_mult = inner_channel * channel_mults[ind]
             for _ in range(0, res_blocks):
+                downs.append(ResnetBlocWithAttn(pre_channel, channel_mult, noise_level_emb_dim=noise_level_channel,
+                                               norm_groups=norm_groups, dropout=dropout, with_attn=use_attn, size=now_res))
                 feat_channels.append(channel_mult)
                 pre_channel = channel_mult
             if not is_last:
         self.downs = nn.ModuleList(downs)
         self.mid = nn.ModuleList([
+            ResnetBlocWithAttn(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel,
                               norm_groups=norm_groups, dropout=dropout, with_attn=True, size=now_res),
             ResnetBlocWithAttn(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel,
                               norm_groups=norm_groups, dropout=dropout, with_attn=False, size=now_res)
 # ============================================================================
+# GaussianDiffusion - Proper DDIM Sampling
 # ============================================================================
+def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2):
+    if schedule == 'linear':
+        betas = np.linspace(linear_start, linear_end, n_timestep, dtype=np.float64)
+    else:
+        raise NotImplementedError(schedule)
+    return betas
+class GaussianDiffusion(nn.Module):
+    def __init__(self, denoise_fn, image_size, channels=3, schedule_opt=None, opt=None):
+        super().__init__()
+        self.channels = channels
+        self.image_size = image_size
+        self.denoise_fn = denoise_fn
+        self.opt = opt
+        self.ddim = schedule_opt.get('ddim', 1) if schedule_opt else 1
+    def set_new_noise_schedule(self, schedule_opt, device, num_train_timesteps=1000):
+        self.ddim = schedule_opt['ddim']
+        self.num_train_timesteps = num_train_timesteps
+        to_torch = partial(torch.tensor, dtype=torch.float32, device=device)
+        betas = make_beta_schedule(
+            schedule=schedule_opt['schedule'],
+            n_timestep=num_train_timesteps,
+            linear_start=schedule_opt['linear_start'],
+            linear_end=schedule_opt['linear_end']
+        )
+        alphas = 1. - betas
+        alphas_cumprod = np.cumprod(alphas, axis=0)
+        self.sqrt_alphas_cumprod_prev = np.sqrt(np.append(1., alphas_cumprod))
+        self.num_timesteps = int(betas.shape[0])
+        self.register_buffer('betas', to_torch(betas))
+        self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
+        self.ddim_num_steps = schedule_opt['n_timestep']
+        print(f'DDIM sampling steps: {self.ddim_num_steps}')
+    def ddim_sample(self, condition_x, img_or_shape, device, seed=1):
+        """DDIM sampling - matches the original E3Diff implementation."""
+        eta = 0.8  # ddim_sampling_eta for linear schedule
+        batch = img_or_shape[0]
+        total_timesteps = self.num_train_timesteps
+        sampling_timesteps = self.ddim_num_steps
+        ts = torch.linspace(total_timesteps, 0, sampling_timesteps + 1).to(device).long()
+        x = torch.randn(img_or_shape, device=device)
+        batch_size = x.shape[0]
+        imgs = [x]
+        img_onestep = [condition_x[:, :self.channels, ...]]
+        for i in range(1, sampling_timesteps + 1):
+            cur_t = ts[i - 1] - 1
+            prev_t = ts[i] - 1
+            noise_level = torch.FloatTensor(
+                [self.sqrt_alphas_cumprod_prev[cur_t.item()]]
+            ).repeat(batch_size, 1).to(device)
+            alpha_prod_t = self.alphas_cumprod[cur_t]
+            alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else torch.tensor(1.0, device=device)
+            beta_prod_t = 1 - alpha_prod_t
+            # Model prediction
+            model_output = self.denoise_fn(torch.cat([condition_x, x], dim=1), noise_level)
+            # Compute sigma
+            sigma_2 = eta * (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev)
+            noise = torch.randn_like(x)
+            # Predict original sample
+            pred_original_sample = (x - beta_prod_t ** 0.5 * model_output) / alpha_prod_t ** 0.5
+            pred_original_sample = pred_original_sample.clamp(-1, 1)
+            pred_sample_direction = (1 - alpha_prod_t_prev - sigma_2) ** 0.5 * model_output
+            x = alpha_prod_t_prev ** 0.5 * pred_original_sample + pred_sample_direction + sigma_2 ** 0.5 * noise
+            imgs.append(x)
+            img_onestep.append(pred_original_sample)
+        imgs = torch.cat(imgs, dim=0)
+        img_onestep = torch.cat(img_onestep, dim=0)
+        return imgs, img_onestep
+    @torch.no_grad()
+    def super_resolution(self, x_in, continous=False, seed=1, img_s1=None):
+        """Main inference method."""
+        device = self.betas.device
+        x = x_in
+        shape = (x.shape[0], self.channels, x.shape[-2], x.shape[-1])
+        self.ddim_num_steps = self.opt['ddim_steps']
+        ret_img, img_onestep = self.ddim_sample(condition_x=x, img_or_shape=shape, device=device, seed=seed)
+        if continous:
+            return ret_img, img_onestep
+        else:
+            return ret_img[-x_in.shape[0]:], img_onestep
+# ============================================================================
+# E3Diff Inference Class
+# ============================================================================
+class E3DiffInference:
+    def __init__(self, weights_path=None, device="cuda", num_inference_steps=1):
         self.device = torch.device(device if torch.cuda.is_available() else "cpu")
         self.image_size = 256
+        self.num_inference_steps = num_inference_steps
+        print(f"[E3Diff] Initializing on device: {self.device}")
+        print(f"[E3Diff] Inference steps: {num_inference_steps}")
+        self.model = self._build_model()
+        self._load_weights(weights_path)
+        self.model.eval()
+        print("[E3Diff] Model ready!")
+    def _build_model(self):
+        unet = UNet(
             in_channel=3,
             out_channel=3,
             norm_groups=16,
             dropout=0,
             image_size=self.image_size,
             condition_ch=3
+        )
+        schedule_opt = {
+            'schedule': 'linear',
+            'n_timestep': self.num_inference_steps,
+            'linear_start': 1e-6,
+            'linear_end': 1e-2,
+            'ddim': 1,
+            'lq_noiselevel': 0
+        }
+        opt = {
+            'stage': 2,
+            'ddim_steps': self.num_inference_steps,
+        }
+        model = GaussianDiffusion(
+            denoise_fn=unet,
+            image_size=self.image_size,
+            channels=3,
+            schedule_opt=schedule_opt,
+            opt=opt
+        )
+        return model.to(self.device)
+    def _load_weights(self, weights_path):
+        if weights_path is None:
+            weights_path = hf_hub_download(
+                repo_id="Dhenenjay/E3Diff-SAR2Optical",
+                filename="I700000_E719_gen.pth"
+            )
+        print(f"[E3Diff] Loading weights from: {weights_path}")
+        state_dict = torch.load(weights_path, map_location=self.device, weights_only=False)
+        self.model.load_state_dict(state_dict, strict=False)
+        print("[E3Diff] Weights loaded!")
+    def preprocess(self, image):
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        if image.size != (self.image_size, self.image_size):
+            image = image.resize((self.image_size, self.image_size), Image.LANCZOS)
+        img_np = np.array(image).astype(np.float32) / 255.0
+        img_tensor = torch.from_numpy(img_np).permute(2, 0, 1)
+        img_tensor = img_tensor * 2.0 - 1.0
+        return img_tensor.unsqueeze(0).to(self.device)
+    def postprocess(self, tensor):
+        tensor = tensor.squeeze(0).cpu()
+        tensor = torch.clamp(tensor, -1, 1)
+        tensor = (tensor + 1.0) / 2.0
+        img_np = (tensor.permute(1, 2, 0).numpy() * 255).astype(np.uint8)
+        return Image.fromarray(img_np)
     @torch.no_grad()
+    def translate(self, sar_image, seed=42):
+        if seed is not None:
+            torch.manual_seed(seed)
+            np.random.seed(seed)
+        sar_tensor = self.preprocess(sar_image)
+        self.model.set_new_noise_schedule(
+            {
+                'schedule': 'linear',
+                'n_timestep': self.num_inference_steps,
+                'linear_start': 1e-6,
+                'linear_end': 1e-2,
+                'ddim': 1,
+                'lq_noiselevel': 0
+            },
+            self.device,
+            num_train_timesteps=1000
+        )
+        output, _ = self.model.super_resolution(sar_tensor, continous=False, seed=seed, img_s1=sar_tensor)
+        return self.postprocess(output)
+# ============================================================================
+# High-Resolution Processor
+# ============================================================================
+class HighResProcessor:
+    def __init__(self, device="cuda"):
+        self.device = device
+        self.model = None
+        self.tile_size = 256
+    def load_model(self, num_steps=1):
+        print("Loading E3Diff model...")
+        self.model = E3DiffInference(device=self.device, num_inference_steps=num_steps)
+        self.num_steps = num_steps
     def create_blend_weights(self, tile_size, overlap):
         ramp = np.linspace(0, 1, overlap)
         weight = np.ones((tile_size, tile_size))
+        weight[:overlap, :] *= ramp[:, np.newaxis]
+        weight[-overlap:, :] *= ramp[::-1, np.newaxis]
+        weight[:, :overlap] *= ramp[np.newaxis, :]
+        weight[:, -overlap:] *= ramp[np.newaxis, ::-1]
         return weight[:, :, np.newaxis]
+    def process(self, image, overlap=64, num_steps=1):
+        if self.model is None or self.num_steps != num_steps:
+            self.load_model(num_steps)
         if isinstance(image, Image.Image):
             if image.mode != 'RGB':
                 image = image.convert('RGB')
             img_np = image
         h, w = img_np.shape[:2]
+        tile_size = self.tile_size
         step = tile_size - overlap
         pad_h = (step - (h - overlap) % step) % step
         pad_w = (step - (w - overlap) % step) % step
         img_padded = np.pad(img_np, ((0, pad_h), (0, pad_w), (0, 0)), mode='reflect')
         h_pad, w_pad = img_padded.shape[:2]
         output = np.zeros((h_pad, w_pad, 3), dtype=np.float32)
         weights = np.zeros((h_pad, w_pad, 1), dtype=np.float32)
         blend_weight = self.create_blend_weights(tile_size, overlap)
         y_positions = list(range(0, h_pad - tile_size + 1, step))
         x_positions = list(range(0, w_pad - tile_size + 1, step))
         total_tiles = len(y_positions) * len(x_positions)
+        print(f"Processing {total_tiles} tiles at {w}x{h}...")
         tile_idx = 0
         for y in y_positions:
             for x in x_positions:
                 tile = img_padded[y:y+tile_size, x:x+tile_size]
+                tile_pil = Image.fromarray((tile * 255).astype(np.uint8))
+                result_pil = self.model.translate(tile_pil, seed=42)
+                result = np.array(result_pil).astype(np.float32) / 255.0
                 output[y:y+tile_size, x:x+tile_size] += result * blend_weight
                 weights[y:y+tile_size, x:x+tile_size] += blend_weight
                 tile_idx += 1
+                print(f"  Tile {tile_idx}/{total_tiles}")
         output = output / (weights + 1e-8)
         output = output[:h, :w]
+        return (output * 255).astype(np.uint8)
+    def enhance(self, image, contrast=1.1, sharpness=1.15, color=1.1):
         if isinstance(image, np.ndarray):
+            image = Image.fromarray(image)
         image = ImageEnhance.Contrast(image).enhance(contrast)
         image = ImageEnhance.Sharpness(image).enhance(sharpness)
         image = ImageEnhance.Color(image).enhance(color)
         return image
 # Gradio Interface
 # ============================================================================
+processor = None
 def load_sar_image(filepath):
     """Load SAR image from various formats."""
     return Image.open(filepath).convert('RGB')
+def translate_sar(file, num_steps, overlap, enhance_output):
     """Main translation function."""
+    global processor
     if file is None:
         return None, None, "Please upload a SAR image"
+    if processor is None:
+        processor = HighResProcessor()
+    print("Processing SAR image...")
     filepath = file.name if hasattr(file, 'name') else file
     image = load_sar_image(filepath)
     w, h = image.size
     print(f"Input size: {w}x{h}")
     start = time.time()
+    result = processor.process(image, overlap=int(overlap), num_steps=int(num_steps))
     elapsed = time.time() - start
+    result_pil = Image.fromarray(result)
+    if enhance_output:
+        result_pil = processor.enhance(result_pil)
     tiff_path = tempfile.mktemp(suffix='.tiff')
     result_pil.save(tiff_path, format='TIFF', compression='lzw')
+    print(f"Complete in {elapsed:.1f}s!")
     info = f"Processed in {elapsed:.1f}s | Output: {result_pil.size[0]}x{result_pil.size[1]}"
     return result_pil, tiff_path, info
+# Create interface
 with gr.Blocks(title="E3Diff: SAR-to-Optical Translation") as demo:
     gr.Markdown("""
     # 🛰️ E3Diff: High-Resolution SAR-to-Optical Translation
     - Supports full resolution processing with seamless tiling
     - Multiple quality levels (1-8 inference steps)
     - TIFF output for commercial use
     """)
             input_file = gr.File(label="SAR Input (TIFF, PNG, JPG supported)", file_types=[".tif", ".tiff", ".png", ".jpg", ".jpeg"])
             with gr.Row():
+                num_steps = gr.Slider(1, 8, value=1, step=1, label="Quality Steps (1=fast, 8=best)")
+                overlap = gr.Slider(16, 128, value=64, step=16, label="Tile Overlap")
+            enhance = gr.Checkbox(value=True, label="Apply enhancement")
             submit_btn = gr.Button("🚀 Translate to Optical", variant="primary")
         with gr.Column():
             output_image = gr.Image(label="Optical Output")
+            output_file = gr.File(label="Download TIFF")
             info_text = gr.Textbox(label="Processing Info")
     submit_btn.click(
     gr.Markdown("""
     ---
+    **Tips:** The model works best with Sentinel-1 style SAR imagery. Use steps=1 for speed, steps=4-8 for quality.
     """)