algorythmtechnologies
/

zenith_coder_v1.1

Model card Files Files and versions

algorythmtechnologies commited on Nov 8, 2025

Commit

009ab0e

·

verified ·

1 Parent(s): 218e6fa

Create handler.py

Files changed (1) hide show

handler.py +56 -0

handler.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from typing import Dict, List, Any
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+class EndpointHandler():
+    def __init__(self, path=""):
+        """
+        Initialize the model and tokenizer using the local path.
+        Uses Zenith Coder v1.1 custom code (modeling_deepseek.py, configuration_deepseek.py, tokenization_deepseek_fast.py).
+        """
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            path, trust_remote_code=True
+        )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            path,
+            trust_remote_code=True,
+            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto"
+        )
+        self.model.eval()
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Accepts a dictionary with the prompt and optional `max_new_tokens`.
+        Returns generated text.
+        """
+        prompt = data.get("inputs") or data.get("prompt")
+        if not prompt or not isinstance(prompt, str):
+            return [{"error": "No valid input prompt provided."}]
+        max_new_tokens = int(data.get("max_new_tokens", 256))
+        temperature = float(data.get("temperature", 1.0))
+        top_p = float(data.get("top_p", 0.95))
+        top_k = int(data.get("top_k", 50))
+        input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids
+        if torch.cuda.is_available():
+            input_ids = input_ids.cuda()
+        with torch.no_grad():
+            generated_ids = self.model.generate(
+                input_ids,
+                do_sample=True,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k,
+                pad_token_id=self.tokenizer.pad_token_id,
+                eos_token_id=self.tokenizer.eos_token_id
+            )
+        # Skip the prompt part
+        gen_text = self.tokenizer.decode(
+            generated_ids[0][input_ids.shape[1]:],
+            skip_special_tokens=True
+        )
+        return [{"generated_text": gen_text}]