import base64 import io import os import sys from typing import Dict, List, Any import numpy as np import torch from mmcv.runner import load_checkpoint from mmcv.utils import Config from PIL import Image # Add current directory to path to import local modules sys.path.append(os.path.dirname(os.path.realpath(__file__))) # Now we can import from the local mmseg and mmcv_custom from modelsforIML.mmseg.datasets.pipelines import Compose from modelsforIML.mmseg.models import build_segmentor class Pipeline: def __init__(self, model_path: str): """ Initializes the pipeline by loading the model and preprocessing steps. Args: model_path (str): The path to the model checkpoint file. It's automatically passed by the Hugging Face infrastructure. """ # --- Device --- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # --- Model Configuration --- # The config file path is relative to the repository root config_path = 'models for IML/apscnet.py' # The checkpoint path is also relative to the repository root checkpoint_path = 'models for IML/APSC-Net.pth' if not os.path.exists(checkpoint_path): raise FileNotFoundError( f"Checkpoint file not found at {checkpoint_path}. " "Please download it and place it in the 'models for IML' directory." ) cfg = Config.fromfile(config_path) # --- Build Model --- self.model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) load_checkpoint(self.model, checkpoint_path, map_location='cpu') self.model.to(self.device) self.model.eval() # --- Build Preprocessing Pipeline --- # We extract the transforms from the test_pipeline in the config test_pipeline_cfg = cfg.data.test.pipeline[1]['transforms'] self.pipeline = Compose(test_pipeline_cfg) def __call__(self, inputs: Image.Image) -> Dict[str, Any]: """ Performs inference on a single image. Args: inputs (Image.Image): A PIL Image to be processed. Returns: Dict[str, Any]: A dictionary containing the resulting mask as a base64 encoded string. """ # Convert PIL image to numpy array (RGB) img = np.array(inputs.convert('RGB')) # Prepare data for the pipeline data = {'img': img, 'img_shape': img.shape, 'ori_shape': img.shape} data = self.pipeline(data) # Move data to the device img_tensor = data['img'][0].unsqueeze(0).to(self.device) # --- Inference --- with torch.no_grad(): result = self.model(return_loss=False, img=[img_tensor]) # --- Post-process --- # The model output is logits of shape (1, 2, H, W) # We take argmax to get the class (0=authentic, 1=tampered) mask_pred = result[0].argmax(0).astype(np.uint8) # Convert mask to a visual format (0 -> 0, 1 -> 255) mask_pred *= 255 # Create a PIL image from the numpy mask mask_image = Image.fromarray(mask_pred, mode='L') # --- Encode to Base64 --- buffered = io.BytesIO() mask_image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() return {"image": img_str}