Mindify-AI
/

neon-q12-v1-250M

Model card Files Files and versions

xet

Community

MarkChenX commited on Apr 22, 2025

Commit

573941c

verified ·

1 Parent(s): 31b59c5

Update handler.py

Browse files

Files changed (1) hide show

handler.py +47 -8

handler.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import torch
 from model import GPT, GPTConfig
 class EndpointHandler:
@@ -13,7 +14,7 @@ class EndpointHandler:
         checkpoint_path = f"{path}/ckpt.pt"
         checkpoint = torch.load(checkpoint_path, map_location="cpu")
-        # Check if it's a full training checkpoint or a clean state_dict
         if isinstance(checkpoint, dict) and "model" in checkpoint:
             state_dict = checkpoint["model"]
         else:
@@ -26,33 +27,71 @@ class EndpointHandler:
             new_key = key[len(prefix):] if key.startswith(prefix) else key
             cleaned_state_dict[new_key] = val
-        # Load state dict with non-strict to see any mismatches
         missing, unexpected = self.model.load_state_dict(cleaned_state_dict, strict=False)
         if missing:
             print("Warning: missing keys in state_dict:", missing)
         if unexpected:
             print("Warning: unexpected keys in state_dict:", unexpected)
         self.model.eval()
         print("Model loaded and ready.")
     def __call__(self, data):
         """
-        data: {"inputs": {"input_ids": [[int, int, ...]]}}
-        Returns: {"generated_ids": [[...]]}
         """
         try:
-            input_ids = data.get("inputs", {}).get("input_ids")
-            if not input_ids:
-                return {"error": "Missing 'input_ids' in inputs"}
             input_tensor = torch.tensor(input_ids).long()
             with torch.no_grad():
                 output_tensor = self.model.generate(input_tensor, max_new_tokens=32)
                 output_ids = output_tensor.tolist()
-            return {"generated_ids": output_ids}
         except Exception as e:
             return {"error": str(e)}

 import torch
+import tiktoken
 from model import GPT, GPTConfig
 class EndpointHandler:
         checkpoint_path = f"{path}/ckpt.pt"
         checkpoint = torch.load(checkpoint_path, map_location="cpu")
+        # Extract state_dict if wrapped
         if isinstance(checkpoint, dict) and "model" in checkpoint:
             state_dict = checkpoint["model"]
         else:
             new_key = key[len(prefix):] if key.startswith(prefix) else key
             cleaned_state_dict[new_key] = val
+        # Load state dict non-strict to inspect mismatches
         missing, unexpected = self.model.load_state_dict(cleaned_state_dict, strict=False)
         if missing:
             print("Warning: missing keys in state_dict:", missing)
         if unexpected:
             print("Warning: unexpected keys in state_dict:", unexpected)
+        # Ready model
         self.model.eval()
+        # Initialize tokenizer for text inputs
+        self.tokenizer = tiktoken.get_encoding("gpt2")
         print("Model loaded and ready.")
     def __call__(self, data):
         """
+        Accept either:
+          - A raw prompt string (data is str)
+          - A dict: {"inputs": "prompt text"}
+          - A dict: {"inputs": {"input_ids": [[...]]}}
+        Returns:
+          {"generated_ids": [[...]], optional "generated_text": str}
         """
         try:
+            # Determine input format
+            if isinstance(data, str):
+                text = data
+            elif isinstance(data, dict):
+                inputs = data.get("inputs")
+                if isinstance(inputs, str):
+                    text = inputs
+                elif isinstance(inputs, dict) and "input_ids" in inputs:
+                    input_ids = inputs["input_ids"]
+                else:
+                    return {"error": "Invalid 'inputs'; expected string or dict with 'input_ids'"}
+            else:
+                return {"error": "Invalid request format"}
+            # If text prompt given, tokenize
+            if 'text' in locals():
+                # encode text into token IDs
+                tokens = self.tokenizer.encode(text)
+                input_ids = [tokens]
+            # Convert to tensor
             input_tensor = torch.tensor(input_ids).long()
+            # Generate
             with torch.no_grad():
                 output_tensor = self.model.generate(input_tensor, max_new_tokens=32)
                 output_ids = output_tensor.tolist()
+            # Build response
+            result = {"generated_ids": output_ids}
+            if 'text' in locals():
+                # Decode the first sequence
+                generated_tokens = output_ids[0]
+                try:
+                    generated_text = self.tokenizer.decode(generated_tokens)
+                except Exception:
+                    generated_text = None
+                if generated_text is not None:
+                    result["generated_text"] = generated_text
+            return result
         except Exception as e:
             return {"error": str(e)}