Spaces:

faststager
/

clean_vs_messy

Sleeping

App Files Files Community

Nightfury16 commited on Aug 31, 2025

Commit

7011a64

1 Parent(s): 3579e8e

Initial commit

Browse files

Files changed (11) hide show

Dockerfile +14 -0
README.md +6 -6
app.py +117 -0
checkpoints/convnext_v2_atto_best.pth +3 -0
checkpoints/effnet_b0_best.pth +3 -0
checkpoints/effnet_b3_best.pth +3 -0
checkpoints/vit_b_16_best.pth +3 -0
cm.yaml +21 -0
convnext_config.json +28 -0
main.py +202 -0
requirements.txt +10 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+FROM python:3.9-slim
+ENV TRANSFORMERS_CACHE=/data/.cache/transformers
+ENV HF_HOME=/data/.cache/huggingface
+ENV MPLCONFIGDIR=/data/.cache/matplotlib
+WORKDIR /code
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: Clean Vs Messy
-emoji: 🦀
-colorFrom: green
-colorTo: pink
 sdk: docker
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Messy vs Clean Image Classifier
+emoji: 🔥
+colorFrom: indigo
+colorTo: green
 sdk: docker
+app_file: main.py
 ---
+Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>

app.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+os.environ['TRANSFORMERS_CACHE'] = '/data/.cache/transformers'
+os.environ['HF_HOME'] = '/data/.cache/huggingface'
+os.environ['MPLCONFIGDIR'] = '/data/.cache/matplotlib'
+import torch
+import torch.nn as nn
+import yaml
+from torchvision import models, transforms
+from PIL import Image
+import gradio as gr
+from transformers import ConvNextV2ForImageClassification
+from typing import Dict, Tuple
+MODEL_CHECKPOINTS = {
+    "convnext_tiny_best": "checkpoints/convnext_v2_tiny_best.pth",
+    "efficientnet_b0": "checkpoints/effnet_b0_best.pth",
+    "efficientnet_b3": "checkpoints/effnet_b3_best.pth",
+    "vit_b_16": "checkpoints/vit_b_16_best.pth"
+}
+DEFAULT_MODEL_NAME = "vit_b_16"
+MODELS: Dict[str, Tuple[nn.Module, Dict[int, str]]] = {}
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+class HFConvNeXtWrapper(nn.Module):
+    def __init__(self, model_name, num_labels):
+        super(HFConvNeXtWrapper, self).__init__()
+        self.model = ConvNextV2ForImageClassification.from_pretrained(
+            model_name, num_labels=num_labels, ignore_mismatched_sizes=True)
+    def forward(self, x):
+        return self.model(x).logits
+def get_model(model_name: str, num_classes: int) -> nn.Module:
+    model = None
+    if model_name == "efficientnet_b0":
+        model = models.efficientnet_b0(weights=None)
+        num_ftrs = model.classifier[1].in_features
+        model.classifier[1] = nn.Linear(num_ftrs, num_classes)
+    elif model_name == "efficientnet_b3":
+        model = models.efficientnet_b3(weights=None)
+        num_ftrs = model.classifier[1].in_features
+        model.classifier[1] = nn.Linear(num_ftrs, num_classes)
+    elif model_name == "vit_b_16":
+        model = models.vit_b_16(weights=None)
+        num_ftrs = model.heads.head.in_features
+        model.heads.head = nn.Linear(num_ftrs, num_classes)
+    elif "convnextv2" in model_name:
+        model = HFConvNeXtWrapper(model_name, num_labels=num_classes)
+    else:
+        raise ValueError(f"Model '{model_name}' not supported.")
+    return model
+def load_checkpoint(checkpoint_path: str, device: torch.device) -> Tuple[nn.Module, Dict[int, str]]:
+    if not os.path.exists(checkpoint_path):
+        raise FileNotFoundError(f"Checkpoint file not found at: {checkpoint_path}")
+    checkpoint = torch.load(checkpoint_path, map_location=device)
+    model_name_from_ckpt = checkpoint['model_name']
+    model = get_model(model_name_from_ckpt, num_classes=1)
+    model.load_state_dict(checkpoint['state_dict'])
+    model.to(device)
+    model.eval()
+    # The idx_to_class is no longer needed as we hardcode labels
+    return model, {}
+print("--- Loading all models into memory ---")
+for display_name, ckpt_path in MODEL_CHECKPOINTS.items():
+    if os.path.exists(ckpt_path):
+        model, _ = load_checkpoint(ckpt_path, DEVICE)
+        MODELS[display_name] = model
+        print(f"Loaded '{display_name}' on {DEVICE}.")
+    else:
+        print(f"WARNING: Checkpoint for '{display_name}' not found. Skipping.")
+if not MODELS:
+    raise RuntimeError("No models were loaded. Please check your checkpoints directory.")
+with open('cm_config.yaml', 'r') as f:
+    config = yaml.safe_load(f)
+IMG_SIZE = config['data_params']['image_size']
+inference_transform = transforms.Compose([
+    transforms.Resize((IMG_SIZE, IMG_SIZE)),
+    transforms.ToTensor(),
+    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+def predict(pil_image, model_name: str):
+    if pil_image is None: return None
+    model = MODELS[model_name]
+    pil_image = pil_image.convert("RGB")
+    image_tensor = inference_transform(pil_image).unsqueeze(0).to(DEVICE)
+    with torch.no_grad():
+        output = model(image_tensor)
+        prob = torch.sigmoid(output).item()
+    # Per user request: Class 0 is "clean", Class 1 is "messy"
+    return {"clean": 1 - prob, "messy": prob}
+iface = gr.Interface(
+    fn=predict,
+    inputs=[
+        gr.Image(type="pil", label="Upload Image"),
+        gr.Dropdown(
+            choices=list(MODELS.keys()),
+            value=DEFAULT_MODEL_NAME,
+            label="Select Model"
+        )
+    ],
+    outputs=gr.Label(num_top_classes=2, label="Predictions"),
+    title="Messy vs Clean Image Classifier",
+    description="Upload an image and select a model to see its classification for 'messy' vs 'clean'.",
+)
+iface.launch()

checkpoints/convnext_v2_atto_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d04e828a64aa572a9b9ef741d8a083bf89be2e669a065cca8d3e49f9c69c6da3
+size 111553930

checkpoints/effnet_b0_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b6c672c601de9710c9aa39b93cce5fd3a3332748aadb5a0d3ac878e75602ae5
+size 16336022

checkpoints/effnet_b3_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8012eb905fe5ea97301a1737ad5f340bcac733aa036edf737af0ed4f677cfcb
+size 43350212

checkpoints/vit_b_16_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad719487e643001a11294878db6d7336cacfe4d7e61b31272c27919d4b896e3b
+size 343259114

cm.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+data_params:
+  data_path: "dataset"
+  image_size: 224
+model_params:
+  name: "efficientnet_b0"
+  pretrained: True
+train_params:
+  epochs: 25
+  batch_size: 64
+  optimizer: "AdamW"
+  learning_rate: 0.001
+  unfreeze_epoch: 5
+ddp_params:
+  master_port: '12355'
+output_params:
+  save_dir: "runs/staging_classifier"
+  checkpoint_name: "best_model.pth"

convnext_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "architectures": [
+        "ConvNextV2ForImageClassification"
+    ],
+    "depths": [
+        3,
+        3,
+        9,
+        3
+    ],
+    "drop_path_rate": 0.1,
+    "hidden_act": "gelu",
+    "hidden_sizes": [
+        96,
+        192,
+        384,
+        768
+    ],
+    "image_size": 224,
+    "initializer_range": 0.02,
+    "layer_norm_eps": 1e-06,
+    "model_type": "convnextv2",
+    "num_channels": 3,
+    "num_stages": 4,
+    "patch_size": 4,
+    "torch_dtype": "float32",
+    "transformers_version": "4.35.2"
+}

main.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import os
+import json
+os.environ['HF_HOME'] = './hf_cache'
+os.environ['MPLCONFIGDIR'] = './mpl_cache'
+import torch
+import torch.nn as nn
+import yaml
+from torchvision import models, transforms
+from PIL import Image
+import gradio as gr
+import base64
+import io
+import time
+import threading
+from typing import List, Dict, Union, Tuple, Optional
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from transformers import ConvNextV2Config, ConvNextV2ForImageClassification
+MODEL_CHECKPOINTS = {
+    "convnext_tiny_best": "checkpoints/convnext_v2_tiny_best.pth",
+    "efficientnet_b0": "checkpoints/effnet_b0_best.pth",
+    "efficientnet_b3": "checkpoints/effnet_b3_best.pth",
+    "vit_b_16": "checkpoints/vit_b_16_best.pth"
+}
+DEFAULT_MODEL_NAME = "vit_b_16"
+CONVNEXT_CONFIG_PATH = "convnext_config.json"
+GPU_MODELS: Dict[str, nn.Module] = {}
+CPU_MODELS: Dict[str, nn.Module] = {}
+CONFIG_PATH: str = os.getenv('CONFIG_PATH', 'cm_config.yaml')
+model_lock: threading.Lock = threading.Lock()
+def get_model(model_name: str, num_classes: int) -> nn.Module:
+    model: Optional[nn.Module] = None
+    if model_name == "efficientnet_b0":
+        model = models.efficientnet_b0(weights=None)
+        num_ftrs = model.classifier[1].in_features
+        model.classifier[1] = nn.Linear(num_ftrs, num_classes)
+    elif model_name == "efficientnet_b3":
+        model = models.efficientnet_b3(weights=None)
+        num_ftrs = model.classifier[1].in_features
+        model.classifier[1] = nn.Linear(num_ftrs, num_classes)
+    elif model_name == "vit_b_16":
+        model = models.vit_b_16(weights=None)
+        num_ftrs = model.heads.head.in_features
+        model.heads.head = nn.Linear(num_ftrs, num_classes)
+    elif "convnextv2" in model_name:
+        config = ConvNextV2Config.from_json_file(CONVNEXT_CONFIG_PATH)
+        config.num_labels = num_classes
+        model = ConvNextV2ForImageClassification(config)
+    else:
+        raise ValueError(f"Model '{model_name}' not supported.")
+    return model
+def load_checkpoint(checkpoint_path: str, device: torch.device) -> nn.Module:
+    if not os.path.exists(checkpoint_path):
+        raise FileNotFoundError(f"Checkpoint file not found at: {checkpoint_path}")
+    checkpoint: dict = torch.load(checkpoint_path, map_location=device)
+    model_name_from_ckpt: str = checkpoint['model_name']
+    state_dict = checkpoint['state_dict']
+    if any(key.startswith("model.") for key in state_dict.keys()):
+        print(f"  > Unwrapping state_dict for {model_name_from_ckpt}...")
+        state_dict = {k.replace("model.", ""): v for k, v in state_dict.items()}
+    model: nn.Module = get_model(model_name_from_ckpt, num_classes=1)
+    model.load_state_dict(state_dict)
+    model.to(device)
+    model.eval()
+    return model
+print("--- Loading all models into memory ---")
+cpu_device = torch.device("cpu")
+gpu_device = torch.device("cuda") if torch.cuda.is_available() else None
+for display_name, ckpt_path in MODEL_CHECKPOINTS.items():
+    if os.path.exists(ckpt_path):
+        print(f"Loading '{display_name}'...")
+        try:
+            cpu_model = load_checkpoint(ckpt_path, cpu_device)
+            CPU_MODELS[display_name] = cpu_model
+            print(f"  > Loaded '{display_name}' for CPU.")
+            if gpu_device:
+                gpu_model = load_checkpoint(ckpt_path, gpu_device)
+                GPU_MODELS[display_name] = gpu_model
+                print(f"  > Loaded '{display_name}' for GPU.")
+        except Exception as e:
+            print(f"  > FAILED to load '{display_name}'. Error: {e}")
+    else:
+        print(f"WARNING: Checkpoint for '{display_name}' not found at {ckpt_path}. It will not be available.")
+if not CPU_MODELS:
+    raise RuntimeError("No models were loaded. Please check the `checkpoints` directory.")
+try:
+    with open(CONFIG_PATH, 'r') as f: config: dict = yaml.safe_load(f)
+except FileNotFoundError:
+    raise RuntimeError(f"ERROR: Config file not found at '{CONFIG_PATH}'.")
+IMG_SIZE: int = config['data_params']['image_size']
+inference_transform = transforms.Compose([
+    transforms.Resize((IMG_SIZE, IMG_SIZE)),
+    transforms.ToTensor(),
+    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+def base64_to_pil(base64_str: str) -> Image.Image:
+    try:
+        if "base64," in base64_str: base64_str = base64_str.split("base64,")[1]
+        image_data: bytes = base64.b64decode(base64_str)
+        return Image.open(io.BytesIO(image_data))
+    except Exception as e:
+        raise ValueError(f"Invalid base64 string: {e}")
+class Base64Image(BaseModel): image_data: str
+class BatchBase64Images(BaseModel):
+    image_data_list: List[str]
+    model_name: str = DEFAULT_MODEL_NAME
+    use_gpu: bool = True
+def predict_batch(pil_images: List[Image.Image], use_gpu: bool, model_name: str) -> List[Dict[str, Union[dict, float]]]:
+    model_dict = GPU_MODELS if use_gpu and gpu_device else CPU_MODELS
+    if model_name not in model_dict:
+        raise ValueError(f"Model '{model_name}' not loaded or not available. Available: {list(model_dict.keys())}")
+    model = model_dict[model_name]
+    device = gpu_device if use_gpu and gpu_device else cpu_device
+    image_tensors = [inference_transform(img.convert("RGB")) for img in pil_images]
+    batch_tensor = torch.stack(image_tensors).to(device)
+    with model_lock, torch.no_grad():
+        start_time = time.time()
+        output_obj = model(batch_tensor)
+        batch_time = time.time() - start_time
+        if hasattr(output_obj, 'logits'):
+            logits = output_obj.logits
+        else:
+            logits = output_obj
+    results = []
+    probs = torch.sigmoid(logits).squeeze().tolist()
+    if not isinstance(probs, list): probs = [probs]
+    for prob in probs:
+        results.append({
+            "prediction": {"clean": 1 - prob, "messy": prob},
+            "metadata": {"device": str(device), "inference_ms": (batch_time * 1000) / len(pil_images)}
+        })
+    return results
+app = FastAPI(title="Messy vs Clean Image Classifier API")
+app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
+@app.post("/predict", response_model=dict)
+async def predict_api(request: Base64Image, model_name: str = DEFAULT_MODEL_NAME, use_gpu: bool = True):
+    try:
+        pil_image = base64_to_pil(request.image_data)
+        return predict_batch([pil_image], use_gpu, model_name)[0]
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@app.post("/batch_predict", response_model=List[dict])
+async def batch_predict_api(request: BatchBase64Images):
+    try:
+        pil_images = [base64_to_pil(b64) for b64 in request.image_data_list]
+        return predict_batch(pil_images, request.use_gpu, request.model_name)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@app.get("/models", response_model=List[str])
+async def get_available_models():
+    return list(CPU_MODELS.keys())
+def predict_gradio(pil_image: Image.Image, model_name: str) -> Optional[dict]:
+    if pil_image is None: return None
+    result = predict_batch([pil_image], use_gpu=True, model_name=model_name)[0]
+    return result["prediction"]
+gradio_iface = gr.Interface(
+    fn=predict_gradio,
+    inputs=[
+        gr.Image(type="pil", label="Input Image", sources=["upload", "webcam", "clipboard"]),
+        gr.Dropdown(
+            choices=list(CPU_MODELS.keys()),
+            value=DEFAULT_MODEL_NAME,
+            label="Select Model"
+        )
+    ],
+    outputs=gr.Label(num_top_classes=2, label="Predictions"),
+    title="Messy vs Clean Image Classifier",
+    description="Upload an image and select a model to see its classification for 'messy' vs 'clean'. The API is available at the /docs endpoint.",
+    allow_flagging="never"
+)
+app = gr.mount_gradio_app(app, gradio_iface, path="/")

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch
+torchvision
+fastapi==0.104.1
+uvicorn==0.24.0
+gradio==3.50.2
+gradio-client==0.6.1
+PyYAML==6.0.1
+python-multipart==0.0.6
+pydantic==2.5.2
+transformers