ChevalierJoseph
/

typtop4

Text Generation

text-generation-inference

4-bit precision

Model card Files Files and versions

ChevalierJoseph commited on Jul 29, 2025

Commit

096d229

·

verified ·

1 Parent(s): e39c5e6

Create handle.py

Files changed (1) hide show

handle.py +58 -0

handle.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from typing import Dict, List, Any
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
+import torch
+class EndpointHandler():
+    def __init__(self, path=""):
+        # Load the model and tokenizer during initialization
+        self.tokenizer = AutoTokenizer.from_pretrained(path)
+        self.model = AutoModelForCausalLM.from_pretrained(path).to("cuda")
+        self.model.eval()  # Set the model to evaluation mode
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        data args:
+            messages (:obj:`List[Dict[str, Any]]`): A list of dictionaries representing the conversation messages.
+        Return:
+            A list containing the responses generated by the model.
+        """
+        # Extract messages from input
+        messages = data.pop("messages", data)
+        # Apply chat template to messages and tokenize
+        inputs = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=True,
+            add_generation_prompt=True,
+            return_tensors="pt"
+        ).to("cuda")
+        # Use TextStreamer to generate text in a streaming fashion
+        text_streamer = TextStreamer(self.tokenizer)
+        # Generate response from the model
+        _ = self.model.generate(
+            input_ids=inputs,
+            streamer=text_streamer,
+            max_new_tokens=6048,
+            use_cache=True
+        )
+        # Retrieve the generated response (here, we are capturing a mock output)
+        # Note: TextStreamer displays the text in a streaming fashion, but does not capture it directly
+        # For this example, we are returning a mock response
+        response = {"generated_text": "Example response generated by the model"}
+        return [response]
+# Example to test the EndpointHandler locally
+if __name__ == "__main__":
+    handler = EndpointHandler(path="ChevalierJoseph/typtop4")
+    # Example conversation
+    messages = [
+        {"from": "human", "value": "Based on the following text, give me the svgpath of the glyphs from A to Z.\nI want a classic LINEAL font"},
+    ]
+    # Simulate a request to the endpoint
+    response = handler({"messages": messages})
+    print(response)