d-s-b
/

Qwen_Imagine

Model card Files Files and versions

d-s-b commited on Aug 21, 2025

Commit

7c170e5

·

verified ·

1 Parent(s): 8a8aa76

adding a handler file

Files changed (1) hide show

handler.py +53 -0

handler.py ADDED Viewed

	@@ -0,0 +1,53 @@

+# handler.py
+from typing import Dict, List, Any
+import torch
+class EndpointHandler:
+   def __init__(self, path=""):
+       from transformers import AutoModelForCausalLM, AutoTokenizer
+       self.tokenizer = AutoTokenizer.from_pretrained(path)
+       self.model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.float16, device_map="auto")
+       self.inference_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
+Write a response that appropriately completes the request.
+Create an engaging and educational story that combines whimsical elements with real-world facts.
+### Instruction:
+You are a creative storyteller who specializes in writing whimsical children's stories that incorporate educational facts about the real world.
+Please create a story based on the following prompt.
+### Prompt:
+{}
+### Response:
+<think>
+"""
+   def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+       question = data.pop("inputs", data)
+       parameters = data.pop("parameters", {})
+       # Set default parameters
+       max_new_tokens = parameters.get("max_new_tokens", 1200)
+       # Format prompt
+       prompt = self.inference_prompt_style.format(question) + self.tokenizer.eos_token
+       # Tokenize
+       inputs = self.tokenizer([prompt], return_tensors="pt")
+       # Generate
+       outputs = self.model.generate(
+           input_ids=inputs.input_ids,
+           attention_mask=inputs.attention_mask,
+           max_new_tokens=max_new_tokens,
+           eos_token_id=self.tokenizer.eos_token_id,
+           use_cache=True,
+           **parameters
+       )
+       # Decode and extract response
+       response = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
+       result = response[0].split("### Response:")[1]
+       return [{"generated_text": result}]