sreejith8100
/

minicpm-o-handler

Model card Files Files and versions

xet

Community

sreejith8100 commited on May 7, 2025

Commit

856dd67

verified ·

1 Parent(s): b6f80b8

Update handler.py

Browse files

Files changed (1) hide show

handler.py +52 -22

handler.py CHANGED Viewed

@@ -6,26 +6,40 @@ from io import BytesIO
 import base64
 import ssl
 import urllib3
 urllib3.disable_warnings()
 ssl._create_default_https_context = ssl._create_unverified_context
-class EndpointHandler:
-    def __init__(self, path=""):
-        model_name = "openbmb/MiniCPM-V-2_6-int4"
         self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
         self.model = AutoModel.from_pretrained(
             model_name,
             trust_remote_code=True,
-            # Explicitly disable 4-bit loading
-            device_map="auto"
-        ).eval()
-    def __call__(self, data):
-        image_input = data.get("image")
-        question = data.get("question", "What is in this image?")
         if not image_input:
-            return {"error": "Image is required."}
         try:
             if image_input.startswith("http"):
@@ -36,21 +50,37 @@ class EndpointHandler:
         except Exception as e:
             return {"error": f"Failed to load image: {e}"}
-        msgs = [{"role": "user", "content": question}]
-        result_text = ""
         try:
-            with torch.no_grad():
-                for chunk in self.model.chat(
-                    image=image,
                     msgs=msgs,
                     tokenizer=self.tokenizer,
-                    stream=True,
-                    max_new_tokens=128,
-                    temperature=0.3
                 ):
-                    result_text += chunk
         except Exception as e:
-            return {"error": f"Model inference failed: {e}"}
-        return {"output": result_text}

 import base64
 import ssl
 import urllib3
 urllib3.disable_warnings()
 ssl._create_default_https_context = ssl._create_unverified_context
+class ModelHandler:
+    def __init__(self):
+        self.model = None
+        self.tokenizer = None
+    def load_model(self):
+        model_name = "openbmb/MiniCPM-V-2_6"
         self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
         self.model = AutoModel.from_pretrained(
             model_name,
             trust_remote_code=True,
+            attn_implementation="sdpa",
+            torch_dtype=torch.bfloat16
+        ).eval().cuda()
+    def predict(self, request):
+        """
+        Expected request format:
+        {
+            "image": "<url or base64 string>",
+            "question": "What is shown in the image?",
+            "stream": false (optional)
+        }
+        """
+        image_input = request.get("image")
+        question = request.get("question", "What is in the image?")
+        stream = request.get("stream", False)
         if not image_input:
+            return {"error": "Image input is required."}
         try:
             if image_input.startswith("http"):
         except Exception as e:
             return {"error": f"Failed to load image: {e}"}
+        msgs = [{"role": "user", "content": [image, question]}]
         try:
+            if stream:
+                generated_text = ""
+                for new_text in self.model.chat(
+                    image=None,
                     msgs=msgs,
                     tokenizer=self.tokenizer,
+                    sampling=True,
+                    stream=True
                 ):
+                    generated_text += new_text
+                return {"output": generated_text}
+            else:
+                output = self.model.chat(
+                    image=None,
+                    msgs=msgs,
+                    tokenizer=self.tokenizer
+                )
+                return {"output": output}
         except Exception as e:
+            return {"error": f"Inference failed: {e}"}
+# Test block (optional, remove in production)
+if __name__ == "__main__":
+    handler = ModelHandler()
+    handler.load_model()
+    result = handler.predict({
+        "image": "https://upload.wikimedia.org/wikipedia/commons/9/9e/Ours_brun_parcanimalierpyrenees_1.jpg",
+        "question": "What animal is this?"
+    })
+    print(result)