mutou0308
/

GSASR

Model card Files Files and versions

xet

Community

mutou0308 commited on Jun 21, 2025

Commit

bc3e084

verified ·

1 Parent(s): 3652801

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -359

app.py DELETED Viewed

@@ -1,359 +0,0 @@
-import torch
-import numpy as np
-import gradio as gr
-from PIL import Image
-import math
-import torch.nn.functional as F
-import os
-import tempfile
-import time
-import threading
-from utils.hatropeamp import HATNOUP_ROPE_AMP
-from utils.fea2gsropeamp import Fea2GS_ROPE_AMP
-from utils.edsrbaseline import EDSRNOUP
-from utils.hatropeamp import HATNOUP_ROPE_AMP
-from utils.rdn import RDNNOUP
-from utils.swinir import SwinIRNOUP
-from utils.fea2gsropeamp import Fea2GS_ROPE_AMP
-from utils.gaussian_splatting import generate_2D_gaussian_splatting_step
-from utils.split_and_joint_image import split_and_joint_image
-from huggingface_hub import hf_hub_download
-import subprocess
-import sys
-import spaces
-# Device setup
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# Global stop flag for interrupting inference
-stop_inference = False
-inference_lock = threading.Lock()
-def load_model(
-    pretrained_model_name_or_path: str = "mutou0308/GSASR",
-    model_name: str = "HATL_SA1B",
-    device: str | torch.device = "cuda"
-):
-    enc_path = hf_hub_download(
-            repo_id=pretrained_model_name_or_path, filename=os.path.join(model_name, 'GSASR_enhenced_ultra', 'encoder.pth')
-        )
-    dec_path = hf_hub_download(
-            repo_id=pretrained_model_name_or_path, filename=os.path.join(model_name, 'GSASR_enhenced_ultra', 'decoder.pth')
-        )
-    enc_weight = torch.load(enc_path, weights_only=True)['params_ema']
-    dec_weight = torch.load(dec_path, weights_only=True)['params_ema']
-    if model_name in ['EDSR_DIV2K', 'EDSR_DF2K']:
-        encoder = EDSRNOUP()
-        decoder = Fea2GS_ROPE_AMP()
-    elif model_name in ['RDN_DIV2K', 'RDN_DF2K']:
-        encoder = RDNNOUP()
-        decoder = Fea2GS_ROPE_AMP(num_crossattn_blocks = 2)
-    elif model_name in ['SwinIR_DIV2K', 'SwinIR_DF2K']:
-        encoder = SwinIRNOUP()
-        decoder = Fea2GS_ROPE_AMP(num_crossattn_blocks=2, num_crossattn_layers=4, num_gs_seed=256, window_size=16)
-    elif model_name in ['HATL_SA1B']:
-        encoder = HATNOUP_ROPE_AMP()
-        decoder = Fea2GS_ROPE_AMP(channel=192, num_crossattn_blocks=4, num_crossattn_layers=4, num_selfattn_blocks=8, num_selfattn_layers=6,
-                                  num_gs_seed=256, window_size=16)
-    else:
-        raise ValueError(f"args.model-{model_name} must be in ['EDSR_DIV2K', 'EDSR_DF2K', 'RDN_DIV2K', 'RDN_DF2K', 'SwinIR_DIV2K', 'SwinIR_DF2K', 'HATL_SA1B']")
-    encoder.load_state_dict(enc_weight, strict=True)
-    decoder.load_state_dict(dec_weight, strict=True)
-    encoder.eval()
-    decoder.eval()
-    encoder = encoder.to(device)
-    decoder = decoder.to(device)
-    return encoder, decoder
-def preprocess(x, denominator=16):
-    """Preprocess image to ensure dimensions are multiples of denominator"""
-    _, c, h, w = x.shape
-    if h % denominator > 0:
-        pad_h = denominator - h % denominator
-    else:
-        pad_h = 0
-    if w % denominator > 0:
-        pad_w = denominator - w % denominator
-    else:
-        pad_w = 0
-    x_new = F.pad(x, (0, pad_w, 0, pad_h), 'reflect')
-    return x_new
-def postprocess(x, gt_size_h, gt_size_w):
-    """Post-process by cropping to target size"""
-    x_new = x[:, :, :gt_size_h, :gt_size_w]
-    return x_new
-def should_use_tile(image_height, image_width, threshold=1024):
-    """Determine if tile processing should be used based on image resolution"""
-    return max(image_height, image_width) > threshold
-def set_stop_flag():
-    """Set the global stop flag to interrupt inference"""
-    global stop_inference
-    with inference_lock:
-        stop_inference = True
-    return "🛑 Stopping inference...", gr.update(interactive=False)
-def reset_stop_flag():
-    """Reset the global stop flag"""
-    global stop_inference
-    with inference_lock:
-        stop_inference = False
-def check_stop_flag():
-    """Check if inference should be stopped"""
-    global stop_inference
-    with inference_lock:
-        return stop_inference
-@spaces.GPU
-def super_resolution_inference(image, scale=4.0):
-    """Super-resolution inference function with automatic tile processing"""
-    # Check if gscuda setup has been run
-    setup_marker = ".setup_complete"
-    if not os.path.exists(setup_marker):
-        print("First run detected, installing dependencies...")
-        try:
-            # subprocess.check_call(["pip", "install", "-e", "."])
-            subprocess.check_call(["pip", "install", "dist/gscuda-0.0.0-cp310-cp310-linux_x86_64.whl"])
-            # Create marker file to indicate setup is complete
-            with open(setup_marker, "w") as f:
-                f.write("Setup completed")
-            print("Setup completed successfully!")
-        except subprocess.CalledProcessError as e:
-            return None, f"❌ Setup failed with error: {e}", None
-    if image is None:
-        return None, "Please upload an image", None
-    # Load model
-    encoder, decoder = load_model(model_name="HATL_SA1B")
-    # Reset stop flag at the beginning
-    reset_stop_flag()
-    # Fixed parameters
-    tile_overlap = 16  # Fixed overlap size
-    crop_size = 8     # Fixed crop size
-    tile_size = 1024   # Fixed tile size for large images
-    try:
-        # Check for interruption
-        if check_stop_flag():
-            return None, "❌ Inference interrupted", None
-        # Convert PIL image to numpy array
-        img_np = np.array(image)
-        if len(img_np.shape) == 3:
-            img_np = img_np[:, :, [2, 1, 0]]  # RGB to BGR
-        # Convert to tensor
-        img = torch.from_numpy(np.transpose(img_np.astype(np.float32) / 255., (2, 0, 1))).float()
-        img = img.unsqueeze(0).to(device)
-        # Check for interruption
-        if check_stop_flag():
-            return None, "❌ Inference interrupted", None
-        # Calculate target size
-        gt_size = [math.floor(scale * img.shape[2]), math.floor(scale * img.shape[3])]
-        # Determine if tile processing should be used
-        use_tile = should_use_tile(img.shape[2], img.shape[3])
-        # Force AMP mixed precision
-        with torch.inference_mode():
-            with torch.amp.autocast(device_type='cuda', dtype=torch.bfloat16):
-                # Check for interruption before main processing
-                if check_stop_flag():
-                    return None, "❌ Inference interrupted", None
-                if use_tile:
-                    # Use tile processing
-                    assert tile_size % 16 == 0, f"tile_size-{tile_size} must be divisible by 16"
-                    assert 2 * tile_overlap < tile_size, f"2 * tile_overlap must be less than tile_size"
-                    assert 2 * crop_size <= tile_overlap, f"2 * crop_size must be less than or equal to tile_overlap"
-                    with torch.no_grad():
-                        output = split_and_joint_image(
-                            lq=img,
-                            scale_factor=scale,
-                            split_size=tile_size,
-                            overlap_size=tile_overlap,
-                            model_g=encoder,
-                            model_fea2gs=decoder,
-                            crop_size=crop_size,
-                            scale_modify=torch.tensor([scale, scale]),
-                            default_step_size=1.2,
-                            cuda_rendering=True,
-                            mode='scale_modify',
-                            if_dmax=True,
-                            dmax_mode='fix',
-                            dmax=0.1
-                        )
-                else:
-                    # Direct processing without tiles
-                    lq_pad = preprocess(img, 16)  # denominator=16 for HATL
-                    gt_size_pad = torch.tensor([math.floor(scale * lq_pad.shape[2]),
-                                            math.floor(scale * lq_pad.shape[3])])
-                    gt_size_pad = gt_size_pad.unsqueeze(0)
-                    with torch.no_grad():
-                        # Check for interruption before encoder
-                        if check_stop_flag():
-                            return None, "❌ Inference interrupted", None
-                        # Encoder output
-                        encoder_output = encoder(lq_pad)  # b,c,h,w
-                        # Check for interruption before decoder
-                        if check_stop_flag():
-                            return None, "❌ Inference interrupted", None
-                        scale_vector = torch.tensor(scale, dtype=torch.float32).unsqueeze(0).to(device)
-                        # Decoder output
-                        batch_gs_parameters = decoder(encoder_output, scale_vector)
-                        gs_parameters = batch_gs_parameters[0, :]
-                        # Check for interruption before gaussian rendering
-                        if check_stop_flag():
-                            return None, "❌ Inference interrupted", None
-                        # Gaussian rendering
-                        b_output = generate_2D_gaussian_splatting_step(
-                            gs_parameters=gs_parameters,
-                            sr_size=gt_size_pad[0],
-                            scale=scale,
-                            sample_coords=None,
-                            scale_modify=torch.tensor([scale, scale]),
-                            default_step_size=1.2,
-                            cuda_rendering=True,
-                            mode='scale_modify',
-                            if_dmax=True,
-                            dmax_mode='fix',
-                            dmax=0.1
-                        )
-                        output = b_output.unsqueeze(0)
-        # Check for interruption before post-processing
-        if check_stop_flag():
-            return None, "❌ Inference interrupted", None
-        # Post-processing
-        output = postprocess(output, gt_size[0], gt_size[1])
-        # Convert back to PIL image format
-        output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
-        output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))  # BGR to RGB
-        output = (output * 255.0).round().astype(np.uint8)
-        # Convert to PIL image
-        output_pil = Image.fromarray(output)
-        # Generate result information
-        original_size = f"{img.shape[3]}x{img.shape[2]}"
-        output_size = f"{output.shape[1]}x{output.shape[0]}"
-        tile_info = f"Tile processing enabled (size: {tile_size})" if use_tile else "Direct processing (no tiles)"
-        result_info = f"✅ Processing completed successfully!\nOriginal size: {original_size}\nSuper-resolution size: {output_size}\nScale factor: {scale:.2f}x\nProcessing mode: {tile_info}\nAMP acceleration: Force enabled\nOverlap size: {tile_overlap}\nCrop size: {crop_size}"
-        return output_pil, result_info, output_pil
-    except Exception as e:
-        if check_stop_flag():
-            return None, "❌ Inference interrupted", None
-        return None, f"❌ Error during processing: {str(e)}", None
-def predict(image, scale):
-    """Gradio prediction function"""
-    output_image, info, download_image = super_resolution_inference(image, scale)
-    # If processing successful, save image for download
-    if output_image is not None:
-        # Create temporary filename
-        timestamp = int(time.time())
-        temp_filename = f"GSASR_SR_result_{scale}x_{timestamp}.png"
-        temp_path = os.path.join(tempfile.gettempdir(), temp_filename)
-        # Save image
-        output_image.save(temp_path, "PNG")
-        return output_image, temp_path, "✅ Ready", gr.update(interactive=True)
-    else:
-        return output_image, None, info if info else "❌ Processing failed", gr.update(interactive=True)
-# Create Gradio interface
-with gr.Blocks(title="🚀 GSASR (2D Gaussian Splatting Super-Resolution)") as demo:
-    gr.Markdown("# **🚀 GSASR (Generalized and efficient 2d gaussian splatting for arbitrary-scale super-resolution)**")
-    gr.Markdown("Official demo for GSASR. Please refer to our [paper](https://arxiv.org/pdf/2501.06838), [project page](https://mt-cly.github.io/GSASR.github.io/), and [github](https://github.com/ChrisDud0257/GSASR) for more details.")
-    with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(type="pil", label="Input Image")
-            # Scale parameters
-            with gr.Group():
-                gr.Markdown("### SR Scale")
-                scale_slider = gr.Slider(minimum=1.0, maximum=30.0, value=4.0, step=0.1, label="SR Scale")
-            # Control buttons
-            with gr.Row():
-                submit_btn = gr.Button("🚀 Start Super-Resolution", variant="primary")
-                stop_btn = gr.Button("🛑 Stop Inference", variant="stop")
-        with gr.Column():
-            output_image = gr.Image(type="pil", label="Super-Resolution Result")
-            # Status display
-            status_text = gr.Textbox(label="Status", value="✅ Ready", interactive=False)
-            # Download component
-            with gr.Group():
-                gr.Markdown("### 📥 Download Super-Resolution Result")
-                download_btn = gr.File(visible=True)
-    # Event handlers
-    submit_event = submit_btn.click(
-        fn=predict,
-        inputs=[input_image, scale_slider],
-        outputs=[output_image, download_btn, status_text, stop_btn]
-    )
-    stop_btn.click(
-        fn=set_stop_flag,
-        inputs=[],
-        outputs=[status_text, stop_btn],
-        cancels=[submit_event]
-    )
-    # Example images
-    gr.Markdown("### 📚 Example Images")
-    gr.Markdown("Try these examples with different scales:")
-    gr.Examples(
-        examples=[
-            ["assets/0846x4.png", 1.5],
-            ["assets/0892x4.png", 2.8],
-            ["assets/0873x4_cropped_120x120.png", 30.0]
-        ],
-        inputs=[input_image, scale_slider],
-        examples_per_page=3,
-        cache_examples=False,
-        label="Examples"
-    )
-if __name__ == "__main__":
-    demo.launch(share=True, server_name="0.0.0.0")