Spaces:

jabiru55
/

Image_Quality_inhancer_API

Sleeping

App Files Files Community

ihtesham0345 commited on 24 days ago

Commit

1ea9514

1 Parent(s): 3cab236

Add real InvSR model with CPU/float32 support (SD-Turbo + noise predictor)

Browse files

Files changed (3) hide show

Dockerfile +5 -2
app.py +219 -99
requirements.txt +7 -0

Dockerfile CHANGED Viewed

@@ -1,7 +1,7 @@
 FROM python:3.12-slim
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    libgl1 libglib2.0-0 git curl \
     && rm -rf /var/lib/apt/lists/*
 RUN useradd -m -u 1000 user
@@ -9,9 +9,12 @@ USER user
 ENV PATH="/home/user/.local/bin:$PATH"
 WORKDIR /app
 COPY --chown=user requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY --chown=user app.py .
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.12-slim
 RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 libglib2.0-0 libsm6 libxext6 libxrender1 git curl \
     && rm -rf /var/lib/apt/lists/*
 RUN useradd -m -u 1000 user
 ENV PATH="/home/user/.local/bin:$PATH"
 WORKDIR /app
+# Clone InvSR source (custom diffusers pipeline + noise predictor support)
+RUN git clone --depth 1 https://github.com/zsyOAOA/InvSR.git /app/InvSR
 COPY --chown=user requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY --chown=user app.py .
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--timeout-keep-alive", "600"]

app.py CHANGED Viewed

@@ -1,6 +1,4 @@
-import json
-import logging
-import time
 from io import BytesIO
 from pathlib import Path
 from contextlib import asynccontextmanager
@@ -15,112 +13,243 @@ from fastapi import FastAPI, File, UploadFile, Query, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, JSONResponse
-from torchvision.io import decode_image, ImageReadMode
-from torchvision.transforms.v2 import ToDtype, ToPILImage
 from mewzoom.model import MewZoom
-MODELS_CONFIG = {"2x": "andrewdalpino/MewZoom-V1-2X-Unet", "4x": "andrewdalpino/MewZoom-V1-4X-Unet"}
-MAX_DIM = {"2x": 2048, "4x": 1024}
 CACHE_DIR = Path("models")
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 logger = logging.getLogger(__name__)
-_models: dict[str, MewZoom] = {}
-_image_to_tensor = ToDtype(torch.float32, scale=True)
-_tensor_to_pil = ToPILImage()
-_DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-def _load_model(scale: str) -> MewZoom:
-    if scale in _models:
-        return _models[scale]
-    model_id = MODELS_CONFIG[scale]
-    logger.info("Loading %s (%s) on %s ...", scale, model_id, _DEVICE)
     CACHE_DIR.mkdir(exist_ok=True)
-    model = MewZoom.from_pretrained(model_id, cache_dir=str(CACHE_DIR))
-    model.to(_DEVICE).eval()
-    _models[scale] = model
-    logger.info("%s loaded (%s params)", scale, f"{sum(p.numel() for p in model.parameters()):,}")
-    return model
 def _resize_if_needed(img: Image.Image, scale: str) -> tuple[Image.Image, bool]:
-    max_dim = MAX_DIM[scale]
     w, h = img.size
     if max(w, h) <= max_dim:
         return img, False
-    ratio = max_dim / max(w, h)
-    return img.resize((int(w * ratio), int(h * ratio)), Image.LANCZOS), True
-def _pil_to_tensor(img: Image.Image) -> torch.Tensor:
-    arr = np.array(img, dtype=np.float32) / 255.0
-    return torch.from_numpy(arr).permute(2, 0, 1)
-def upscale_image(image_bytes: bytes, scale: str) -> tuple[bytes, dict]:
-    model = _load_model(scale)
     factor = int(scale[0])
-    try:
-        pil = Image.open(BytesIO(image_bytes)).convert("RGB")
-    except Exception as e:
-        raise HTTPException(400, f"Bad image: {e}")
     orig = (pil.width, pil.height)
     pil, resized = _resize_if_needed(pil, scale)
     out_mp = pil.width * factor * pil.height * factor / 1e6
     if out_mp > 64:
-        raise HTTPException(400, f"Output too large ({out_mp:.0f}MP). Use smaller image.")
     x = _pil_to_tensor(pil).unsqueeze(0).to(_DEVICE)
     with torch.inference_mode():
         y = model.upscale(x)
-    result = _tensor_to_pil(y.squeeze(0).cpu())
-    buf = BytesIO()
-    result.save(buf, format="PNG")
-    buf.seek(0)
     return buf.getvalue(), {"scale": scale, "input": f"{orig[0]}x{orig[1]}", "output": f"{result.width}x{result.height}", "resized": resized}
-def _laplacian_variance(img: Image.Image) -> float:
-    lap = ndimage.laplace(np.array(img.convert("L"), dtype=np.float64))
-    return float(lap.var())
-def _entropy(img: Image.Image) -> float:
-    hist = np.histogram(np.array(img.convert("L")), bins=256, range=(0, 256))[0]
-    hist = hist[hist > 0] / hist.sum()
-    return float(-np.sum(hist * np.log2(hist)))
-def _edge_density(img: Image.Image) -> float:
-    arr = np.array(img.convert("L"), dtype=np.float64)
-    mag = np.hypot(ndimage.sobel(arr, axis=0), ndimage.sobel(arr, axis=1))
-    return float(np.mean(mag > mag.mean() + mag.std()))
 def compute_metrics(img: Image.Image) -> dict:
-    return {"size": f"{img.width}x{img.height}", "sharpness": round(_laplacian_variance(img), 4), "entropy": round(_entropy(img), 4), "edge_density": round(_edge_density(img), 4), "contrast_std": round(float(np.array(img).std()), 2)}
 def generate_comparison(image_bytes: bytes) -> tuple[bytes, dict]:
     original = Image.open(BytesIO(image_bytes)).convert("RGB")
     metrics = {"original": compute_metrics(original)}
     upscaled = {}
-    for scale in MODELS_CONFIG:
         t0 = time.perf_counter()
-        result_bytes, info = upscale_image(image_bytes, scale)
-        elapsed = time.perf_counter() - t0
-        img = Image.open(BytesIO(result_bytes)).convert("RGB")
         upscaled[scale] = img
-        metrics[scale] = {**compute_metrics(img), "time_s": round(elapsed, 3), **info}
     orig_r = original.resize(upscaled["2x"].size, Image.LANCZOS)
     images = [orig_r, upscaled["2x"], upscaled["4x"]]
     labels = ["Original", "MewZoom 2X", "MewZoom 4X"]
-    label_h, gap = 30, 8
-    max_h = max(i.height for i in images)
-    total_w = sum(i.width for i in images) + gap * (len(images) - 1)
-    canvas = Image.new("RGB", (total_w, max_h + label_h), (30, 30, 30))
     draw = ImageDraw.Draw(canvas)
     try:
         font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 14)
@@ -128,29 +257,28 @@ def generate_comparison(image_bytes: bytes) -> tuple[bytes, dict]:
         font = ImageFont.load_default()
     x = 0
     for img, lbl in zip(images, labels):
-        canvas.paste(img, (x, label_h))
-        bbox = draw.textbbox((0, 0), lbl, font=font)
-        tw = bbox[2] - bbox[0]
-        draw.text((x + (img.width - tw) // 2, 6), lbl, fill=(255, 255, 255), font=font)
         x += img.width + gap
-    buf = BytesIO()
-    canvas.save(buf, format="PNG")
-    buf.seek(0)
     return buf.getvalue(), metrics
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    logger.info("Starting on %s, loading models...", _DEVICE)
-    for scale in MODELS_CONFIG:
-        _load_model(scale)
     yield
 app = FastAPI(
     title="Super-Resolution API",
-    description="MewZoom 2X/4X upscaling + comparison + quality metrics. InvSR requires GPU (not on free tier).",
-    version="1.0.0",
     lifespan=lifespan,
 )
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
@@ -159,50 +287,42 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
 @app.get("/")
 @app.get("/health")
 async def health():
-    return JSONResponse({"status": "healthy", "device": str(_DEVICE), "models": list(MODELS_CONFIG.keys()), "gpu": torch.cuda.is_available()})
 @app.post("/upscale/2x")
 async def route_2x(file: UploadFile = File(...)):
-    result, info = upscale_image(await file.read(), "2x")
-    return StreamingResponse(BytesIO(result), media_type="image/png", headers={"X-Info": json.dumps(info)})
 @app.post("/upscale/4x")
 async def route_4x(file: UploadFile = File(...)):
-    result, info = upscale_image(await file.read(), "4x")
-    return StreamingResponse(BytesIO(result), media_type="image/png", headers={"X-Info": json.dumps(info)})
 @app.post("/upscale/compare")
 async def route_compare(file: UploadFile = File(...), format: Literal["image", "json", "both"] = Query("both")):
-    img, metrics = generate_comparison(await file.read())
-    if format == "json":
-        return JSONResponse(metrics)
-    if format == "image":
-        return StreamingResponse(BytesIO(img), media_type="image/png")
-    return StreamingResponse(BytesIO(img), media_type="image/png", headers={"X-Metrics": json.dumps(metrics)})
 @app.post("/upscale/metrics")
 async def route_metrics(file: UploadFile = File(...)):
-    _, metrics = generate_comparison(await file.read())
-    return JSONResponse(metrics)
 @app.post("/upscale/invsr")
 async def route_invsr(
     file: UploadFile = File(...),
-    num_steps: int = Query(1, ge=1, le=5),
-    tile_size: int = Query(128, ge=64, le=512),
 ):
-    if torch.cuda.is_available():
-        raise HTTPException(501, detail="InvSR GPU pipeline not bundled in this Space. Use the Colab notebook.")
-    # Fallback to MewZoom 4X on CPU
-    logger.info("InvSR endpoint called on CPU — falling back to MewZoom 4X")
-    result, info = upscale_image(await file.read(), "4x")
-    info["fallback"] = "InvSR not available on CPU, used MewZoom 4X instead"
-    return StreamingResponse(
-        BytesIO(result), media_type="image/png",
-        headers={"X-Info": json.dumps(info)},
-    )

+import json, logging, time, sys, os, tempfile
 from io import BytesIO
 from pathlib import Path
 from contextlib import asynccontextmanager
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, JSONResponse
 from mewzoom.model import MewZoom
+# ── Config ──────────────────────────────────────────────────
+MEWZOOM_MODELS = {"2x": "andrewdalpino/MewZoom-V1-2X-Unet", "4x": "andrewdalpino/MewZoom-V1-4X-Unet"}
+MAX_DIM = {"2x": 2048, "4x": 1024, "invsr": 256}
 CACHE_DIR = Path("models")
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 logger = logging.getLogger(__name__)
+_DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+logger.info("Device: %s", _DEVICE)
+# ── MewZoom Models ──────────────────────────────────────────
+_mz_models: dict[str, MewZoom] = {}
+def _load_mewzoom(scale: str) -> MewZoom:
+    if scale in _mz_models:
+        return _mz_models[scale]
+    mid = MEWZOOM_MODELS[scale]
+    logger.info("Loading MewZoom %s (%s) ...", scale, mid)
     CACHE_DIR.mkdir(exist_ok=True)
+    m = MewZoom.from_pretrained(mid, cache_dir=str(CACHE_DIR))
+    m.to(_DEVICE).eval()
+    _mz_models[scale] = m
+    logger.info("MewZoom %s ready (%s params)", scale, f"{sum(p.numel() for p in m.parameters()):,}")
+    return m
+def _pil_to_tensor(img: Image.Image) -> torch.Tensor:
+    arr = np.array(img, dtype=np.float32) / 255.0
+    return torch.from_numpy(arr).permute(2, 0, 1)
 def _resize_if_needed(img: Image.Image, scale: str) -> tuple[Image.Image, bool]:
+    max_dim = MAX_DIM.get(scale, 1024)
     w, h = img.size
     if max(w, h) <= max_dim:
         return img, False
+    r = max_dim / max(w, h)
+    return img.resize((int(w * r), int(h * r)), Image.LANCZOS), True
+def upscale_mewzoom(image_bytes: bytes, scale: str) -> tuple[bytes, dict]:
+    model = _load_mewzoom(scale)
     factor = int(scale[0])
+    pil = Image.open(BytesIO(image_bytes)).convert("RGB")
     orig = (pil.width, pil.height)
     pil, resized = _resize_if_needed(pil, scale)
     out_mp = pil.width * factor * pil.height * factor / 1e6
     if out_mp > 64:
+        raise HTTPException(400, f"Output too large ({out_mp:.0f}MP)")
     x = _pil_to_tensor(pil).unsqueeze(0).to(_DEVICE)
     with torch.inference_mode():
         y = model.upscale(x)
+    result_np = (y.squeeze(0).permute(1, 2, 0).cpu().numpy() * 255).clip(0, 255).astype(np.uint8)
+    result = Image.fromarray(result_np)
+    buf = BytesIO(); result.save(buf, format="PNG"); buf.seek(0)
     return buf.getvalue(), {"scale": scale, "input": f"{orig[0]}x{orig[1]}", "output": f"{result.width}x{result.height}", "resized": resized}
+# ── InvSR Model (Diffusion 4X) ──────────────────────────────
+_INVSR_PATH = Path("/app/InvSR")
+_sampler_invsr = None
+def _patch_invsr():
+    """Patch InvSR source for CPU/float32 support."""
+    p = _INVSR_PATH / "sampler_invsr.py"
+    code = p.read_text()
+    # Remove basicsr import chain (not needed for inference)
+    code = code.replace("from datapipe.datasets import create_dataset", "")
+    # Add device param to BaseSampler
+    old_init = """class BaseSampler:
+    def __init__(self, configs):
+        '''
+        Input:
+            configs: config, see the yaml file in folder ./configs/
+                configs.sampler_config.{start_timesteps, padding_mod, seed, sf, num_sample_steps}
+            seed: int, random seed
+        '''
+        self.configs = configs
+        self.setup_seed()
+        self.build_model()
+    def setup_seed(self, seed=None):
+        seed = self.configs.seed if seed is None else seed
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)"""
+    new_init = """class BaseSampler:
+    def __init__(self, configs, device='auto'):
+        self.configs = configs
+        if device == 'auto':
+            device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.device = torch.device(device)
+        self.dtype = torch.float16 if self.device.type == 'cuda' else torch.float32
+        self.setup_seed()
+        self.build_model()
+    def setup_seed(self, seed=None):
+        seed = self.configs.seed if seed is None else seed
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(seed)"""
+    code = code.replace(old_init, new_init)
+    # Replace .cuda() and .type(torch.float16) with device-aware versions
+    code = code.replace('sd_pipe.to(f"cuda")', "sd_pipe.to(self.device)")
+    code = code.replace("model_start.cuda()", "model_start.to(self.device)")
+    code = code.replace('map_location=f"cuda"', "map_location=self.device")
+    code = code.replace("im_cond.type(torch.float16)", "im_cond.type(self.dtype)")
+    code = code.replace(".type(torch.float16)", ".type(self.dtype)")
+    code = code.replace("data['lq'].cuda()", "data['lq'].to(self.device)")
+    code = code.replace("util_image.img2tensor(im_cond).cuda()", "util_image.img2tensor(im_cond).to(self.device)")
+    # Lazy import create_dataset in inference method
+    code = code.replace(
+        "if in_path.is_dir():\n            data_config",
+        "if in_path.is_dir():\n            from datapipe.datasets import create_dataset\n            data_config",
+    )
+    p.write_text(code)
+    logger.info("InvSR sampler patched for CPU/float32")
+def _load_invsr():
+    global _sampler_invsr
+    if _sampler_invsr is not None:
+        return _sampler_invsr
+    _patch_invsr()
+    sys.path.insert(0, str(_INVSR_PATH))
+    sys.path.insert(0, str(_INVSR_PATH / "src"))
+    from omegaconf import OmegaConf
+    from sampler_invsr import InvSamplerSR
+    cfg = OmegaConf.load(str(_INVSR_PATH / "configs" / "sample-sd-turbo.yaml"))
+    cfg.sd_pipe.params.torch_dtype = "torch.float32" if _DEVICE == "cpu" else "torch.float16"
+    cfg.sd_pipe.params.cache_dir = str(CACHE_DIR / "invsr")
+    CACHE_DIR.mkdir(exist_ok=True)
+    # Download noise predictor
+    from torch.hub import download_url_to_file
+    ckpt = CACHE_DIR / "invsr" / "noise_predictor_sd_turbo_v5.pth"
+    ckpt.parent.mkdir(exist_ok=True)
+    if not ckpt.exists():
+        logger.info("Downloading noise predictor (~800MB)...")
+        download_url_to_file(
+            "https://huggingface.co/OAOA/InvSR/resolve/main/noise_predictor_sd_turbo_v5.pth",
+            str(ckpt), progress=True,
+        )
+    cfg.model_start.ckpt_path = str(ckpt)
+    cfg.timesteps = [200]; cfg.bs = 1; cfg.tiled_vae = True
+    cfg.color_fix = "wavelet"; cfg.basesr.chopping.pch_size = 128
+    cfg.basesr.chopping.extra_bs = 8
+    logger.info("Loading InvSR sampler (SD-Turbo ~5GB download on first run)...")
+    _sampler_invsr = InvSamplerSR(cfg, device="auto")
+    if _DEVICE == "cpu":
+        _sampler_invsr.sd_pipe = _sampler_invsr.sd_pipe.to(dtype=torch.float32)
+    logger.info("InvSR ready on %s", _DEVICE)
+    return _sampler_invsr
+def upscale_invsr(image_bytes: bytes, num_steps: int = 1) -> bytes:
+    sampler = _load_invsr()
+    sys.path.insert(0, str(_INVSR_PATH))
+    from utils import util_image
+    # Write bytes to temp file for cv2.imread
+    tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
+    try:
+        tmp.write(image_bytes); tmp.close()
+        im = util_image.imread(tmp.name, chn="rgb", dtype="float32")
+    finally:
+        os.unlink(tmp.name)
+    im_cond = util_image.img2tensor(im).to(sampler.device)
+    steps_map = {1: [200], 2: [200, 100], 3: [200, 100, 50], 4: [200, 150, 100, 50], 5: [250, 200, 150, 100, 50]}
+    sampler.configs.timesteps = steps_map.get(num_steps, [200])
+    sampler.configs.basesr.chopping.pch_size = 128
+    result = sampler.sample_func(im_cond).squeeze(0)
+    result = (result * 255).clip(0, 255).astype(np.uint8)
+    img = Image.fromarray(result)
+    buf = BytesIO(); img.save(buf, format="PNG"); buf.seek(0)
+    return buf.getvalue()
+# ── Metrics ─────────────────────────────────────────────────
 def compute_metrics(img: Image.Image) -> dict:
+    arr = np.array(img.convert("L"), dtype=np.float64)
+    lap = ndimage.laplace(arr)
+    hist = np.histogram(arr, bins=256, range=(0, 256))[0]
+    hist = hist[hist > 0] / hist.sum()
+    mag = np.hypot(ndimage.sobel(arr, axis=0), ndimage.sobel(arr, axis=1))
+    return {
+        "size": f"{img.width}x{img.height}",
+        "sharpness": round(float(lap.var()), 4),
+        "entropy": round(float(-np.sum(hist * np.log2(hist))), 4),
+        "edge_density": round(float(np.mean(mag > mag.mean() + mag.std())), 4),
+        "contrast_std": round(float(np.array(img).std()), 2),
+    }
 def generate_comparison(image_bytes: bytes) -> tuple[bytes, dict]:
     original = Image.open(BytesIO(image_bytes)).convert("RGB")
     metrics = {"original": compute_metrics(original)}
     upscaled = {}
+    for scale in MEWZOOM_MODELS:
         t0 = time.perf_counter()
+        rb, info = upscale_mewzoom(image_bytes, scale)
+        t = time.perf_counter() - t0
+        img = Image.open(BytesIO(rb)).convert("RGB")
         upscaled[scale] = img
+        metrics[scale] = {**compute_metrics(img), "time_s": round(t, 3), **info}
     orig_r = original.resize(upscaled["2x"].size, Image.LANCZOS)
     images = [orig_r, upscaled["2x"], upscaled["4x"]]
     labels = ["Original", "MewZoom 2X", "MewZoom 4X"]
+    lh, gap = 30, 8
+    mh = max(i.height for i in images)
+    tw = sum(i.width for i in images) + gap * (len(images) - 1)
+    canvas = Image.new("RGB", (tw, mh + lh), (30, 30, 30))
     draw = ImageDraw.Draw(canvas)
     try:
         font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 14)
         font = ImageFont.load_default()
     x = 0
     for img, lbl in zip(images, labels):
+        canvas.paste(img, (x, lh))
+        bb = draw.textbbox((0, 0), lbl, font=font)
+        tw2 = bb[2] - bb[0]
+        draw.text((x + (img.width - tw2) // 2, 6), lbl, fill=(255, 255, 255), font=font)
         x += img.width + gap
+    buf = BytesIO(); canvas.save(buf, format="PNG"); buf.seek(0)
     return buf.getvalue(), metrics
+# ── FastAPI App ─────────────────────────────────────────────
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    logger.info("Loading MewZoom models...")
+    for s in MEWZOOM_MODELS:
+        _load_mewzoom(s)
     yield
 app = FastAPI(
     title="Super-Resolution API",
+    description="MewZoom 2X/4X + InvSR 4X diffusion + comparison + quality metrics",
+    version="2.0.0",
     lifespan=lifespan,
 )
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 @app.get("/")
 @app.get("/health")
 async def health():
+    return JSONResponse({"status": "healthy", "device": _DEVICE, "models": ["2x", "4x", "invsr"], "gpu": torch.cuda.is_available()})
 @app.post("/upscale/2x")
 async def route_2x(file: UploadFile = File(...)):
+    r, i = upscale_mewzoom(await file.read(), "2x")
+    return StreamingResponse(BytesIO(r), media_type="image/png", headers={"X-Info": json.dumps(i)})
 @app.post("/upscale/4x")
 async def route_4x(file: UploadFile = File(...)):
+    r, i = upscale_mewzoom(await file.read(), "4x")
+    return StreamingResponse(BytesIO(r), media_type="image/png", headers={"X-Info": json.dumps(i)})
 @app.post("/upscale/compare")
 async def route_compare(file: UploadFile = File(...), format: Literal["image", "json", "both"] = Query("both")):
+    img, m = generate_comparison(await file.read())
+    if format == "json": return JSONResponse(m)
+    if format == "image": return StreamingResponse(BytesIO(img), media_type="image/png")
+    return StreamingResponse(BytesIO(img), media_type="image/png", headers={"X-Metrics": json.dumps(m)})
 @app.post("/upscale/metrics")
 async def route_metrics(file: UploadFile = File(...)):
+    _, m = generate_comparison(await file.read())
+    return JSONResponse(m)
 @app.post("/upscale/invsr")
 async def route_invsr(
     file: UploadFile = File(...),
+    num_steps: int = Query(1, ge=1, le=5, description="1=fast, 5=best quality"),
 ):
+    try:
+        result = upscale_invsr(await file.read(), num_steps=num_steps)
+    except Exception as e:
+        raise HTTPException(500, detail=f"InvSR failed: {e}")
+    return StreamingResponse(BytesIO(result), media_type="image/png")

requirements.txt CHANGED Viewed

@@ -7,3 +7,10 @@ torchvision>=0.15.0
 Pillow>=10.0.0
 scipy>=1.10.0
 numpy>=1.23.0

 Pillow>=10.0.0
 scipy>=1.10.0
 numpy>=1.23.0
+diffusers>=0.28.0
+transformers>=4.37.0
+accelerate>=0.28.0
+omegaconf>=2.3.0
+loguru>=0.7.0
+einops>=0.7.0
+opencv-python-headless>=4.8.0