HCKLab
/

pixtral-12b-base-endpoint

Model card Files Files and versions

xet

Community

Kalaoke commited on Dec 5, 2025

Commit

7d66d82

verified ·

1 Parent(s): 7a3a24a

Upload handler.py

Browse files

Files changed (1) hide show

handler.py +184 -0

handler.py ADDED Viewed

	@@ -0,0 +1,184 @@

+from __future__ import annotations
+import base64
+from dataclasses import dataclass
+from io import BytesIO
+from typing import Any, Dict, Optional, List
+import torch
+from PIL import Image
+from transformers import AutoProcessor, LlavaForConditionalGeneration
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+logging.set_verbosity_info()
+BASE_MODEL_ID = "mistral-community/pixtral-12b"
+# Prompt par défaut (tu peux l’ajuster ici)
+DEFAULT_PROMPT = (
+    "Here is a photo showing some food waste. "
+    "Identify each type of food item and the corresponding weight in grams. "
+    "Reply like: Milk, 120g; Coffee, 45g. "
+    "Do not add any explanation, no extra text."
+)
+@dataclass
+class GenerationConfig:
+    max_new_tokens: int = 64
+    temperature: float = 0.0
+    no_repeat_ngram_size: int = 6
+    repetition_penalty: float = 1.1
+class EndpointHandler:
+    def __init__(self, path: str = ".") -> None:
+        """
+        Initializes the model and processor from the `path` directory,
+        which contains the merged weights (pixtral-12b-foodwaste-merged).
+        """
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info("Initializing EndpointHandler on device: %s", self.device)
+        self.processor = AutoProcessor.from_pretrained(
+            BASE_MODEL_ID,
+            trust_remote_code=True,
+        )
+        dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
+        self.model = LlavaForConditionalGeneration.from_pretrained(
+            BASE_MODEL_ID,
+            torch_dtype=dtype,
+            low_cpu_mem_usage=True,
+            device_map={"": self.device},
+            trust_remote_code=True,
+        )
+        self.model.eval()
+        logger.info("Model and processor successfully loaded from '%s'.", path)
+        # pad token management
+        tokenizer = getattr(self.processor, "tokenizer", None)
+        if tokenizer is not None and tokenizer.pad_token_id is None:
+            tokenizer.pad_token = tokenizer.eos_token
+            tokenizer.pad_token_id = tokenizer.eos_token_id
+        # Preparation of EOS/PAD IDs for generate
+        eos_candidates: List[int] = []
+        if self.model.config.eos_token_id is not None:
+            eos_candidates.append(self.model.config.eos_token_id)
+        if tokenizer is not None and tokenizer.eos_token_id is not None:
+            eos_candidates.append(tokenizer.eos_token_id)
+        self.eos_token_ids: List[int] = list({i for i in eos_candidates})
+        if not self.eos_token_ids:
+            raise ValueError("No EOS token id found on model or tokenizer.")
+        pad_id: Optional[int] = getattr(self.model.config, "pad_token_id", None)
+        if pad_id is None and tokenizer is not None:
+            pad_id = tokenizer.pad_token_id
+        if pad_id is None:
+            pad_id = self.eos_token_ids[0]
+        self.pad_token_id: int = pad_id
+        self.gen_config = GenerationConfig()
+        logger.info(
+            "Generation config: max_new_tokens=%d, temperature=%.3f",
+            self.gen_config.max_new_tokens,
+            self.gen_config.temperature,
+        )
+    @staticmethod
+    def _decode_image(image_b64: str) -> Image.Image:
+        try:
+            img_bytes = base64.b64decode(image_b64)
+            img = Image.open(BytesIO(img_bytes)).convert("RGB")
+            return img
+        except Exception as exc:  # pragma: no cover - log production
+            raise ValueError(f"Could not decode base64 image: {exc}") from exc
+    def _build_chat_text(self, prompt: str) -> str:
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": prompt},
+                    {"type": "image"},
+                ],
+            }
+        ]
+        chat_text = self.processor.apply_chat_template(
+            messages,
+            add_generation_prompt=True,
+            tokenize=False,
+        )
+        return chat_text
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        inputs = data.get("inputs", data)
+        prompt: str = inputs.get("prompt") or DEFAULT_PROMPT
+        image_b64: Optional[str] = inputs.get("image")
+        if image_b64 is None:
+            raise ValueError("Missing 'image' field (base64-encoded) in 'inputs'.")
+        image = self._decode_image(image_b64)
+        max_new_tokens = int(inputs.get("max_new_tokens", self.gen_config.max_new_tokens))
+        temperature = float(inputs.get("temperature", self.gen_config.temperature))
+        logger.info(
+            "Received request: max_new_tokens=%d, temperature=%.3f",
+            max_new_tokens,
+            temperature,
+        )
+        chat_text = self._build_chat_text(prompt)
+        enc = self.processor(
+            text=[chat_text],
+            images=[image],
+            return_tensors="pt",
+            truncation=False,
+        )
+        enc = {k: v.to(self.device) for k, v in enc.items()}
+        if "pixel_values" in enc:
+            enc["pixel_values"] = enc["pixel_values"].to(self.device, dtype=self.model.dtype)
+        gen_kwargs: Dict[str, Any] = {
+            "max_new_tokens": max_new_tokens,
+            "do_sample": temperature > 0.0,
+            "eos_token_id": self.eos_token_ids,
+            "pad_token_id": self.pad_token_id,
+            "no_repeat_ngram_size": self.gen_config.no_repeat_ngram_size,
+            "repetition_penalty": self.gen_config.repetition_penalty,
+        }
+        if temperature > 0.0:
+            gen_kwargs["temperature"] = temperature
+        with torch.inference_mode():
+            output_ids = self.model.generate(**enc, **gen_kwargs)
+        generated_only = output_ids[:, enc["input_ids"].shape[1]:]
+        generated_text = self.processor.batch_decode(
+            generated_only,
+            skip_special_tokens=True,
+        )[0].strip()
+        logger.info("Generated text: %s", generated_text)
+        return {"generated_text": generated_text}