kangqiao-ctrl
/

mistral_pathfinder

Safetensors

Model card Files Files and versions

xet

Community

kangqiao-ctrl commited on Nov 1, 2023

Commit

08c0847

1 Parent(s): 5be07b8

Update handler.py

Browse files

Files changed (1) hide show

handler.py +37 -60

handler.py CHANGED Viewed

@@ -1,67 +1,44 @@
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-import peft
-from peft import PeftModel
-# def run():
-#     base_model_id = "mistralai/Mistral-7B-v0.1"
-#     bnb_config = BitsAndBytesConfig(
-#         load_in_4bit=True,
-#         bnb_4bit_use_double_quant=True,
-#         bnb_4bit_quant_type="nf4",
-#         bnb_4bit_compute_dtype=torch.bfloat16
-#     )
-#     base_model = AutoModelForCausalLM.from_pretrained(
-#         base_model_id,  # Mistral, same as before
-#         quantization_config=bnb_config,  # Same quantization config as before
-#         device_map="auto",
-#         trust_remote_code=True,
-#        # use_auth_token=True
-#     )
-#     tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_bos_token=True, trust_remote_code=True)
-#     ft_model = PeftModel.from_pretrained(base_model, "./checkpoint-100")
-#     return ft_model
 class EndpointHandler():
     def __init__(self, path=""):
-        base_model_id = "mistralai/Mistral-7B-v0.1"
-        bnb_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_compute_dtype=torch.bfloat16
-        )
-        base_model = AutoModelForCausalLM.from_pretrained(
-            base_model_id,  # Mistral, same as before
-            quantization_config=bnb_config,  # Same quantization config as before
-            device_map="auto",
-            trust_remote_code=True,
-           # use_auth_token=True
-        )
-        tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_bos_token=True, trust_remote_code=True)
-        self.ft_model = PeftModel.from_pretrained(base_model, "./checkpoint-100")
-    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
-        eval_prompt = data.get("inputs", data)
-        model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
-        ft_model.eval()
-        with torch.no_grad():
-            prediction = (tokenizer.decode(self.ft_model.generate(**model_input, max_new_tokens=100, repetition_penalty=1.15)[0], skip_special_tokens=True))
-        return prediction

+from typing import Dict, Any
+import logging
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftConfig, PeftModel
+import torch.cuda
+LOGGER = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+device = "cuda" if torch.cuda.is_available() else "cpu"
 class EndpointHandler():
     def __init__(self, path=""):
+        config = PeftConfig.from_pretrained(path)
+        model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, load_in_8bit=True, device_map='auto')
+        self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
+        # Load the Lora model
+        self.model = PeftModel.from_pretrained(model, path)
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Args:
+            data (Dict): The payload with the text prompt and generation parameters.
+        """
+        LOGGER.info(f"Received data: {data}")
+        # Get inputs
+        prompt = data.pop("inputs", None)
+        parameters = data.pop("parameters", None)
+        if prompt is None:
+            raise ValueError("Missing prompt.")
+        # Preprocess
+        input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+        # Forward
+        LOGGER.info(f"Start generation.")
+        if parameters is not None:
+            output = self.model.generate(input_ids=input_ids, **parameters)
+        else:
+            output = self.model.generate(input_ids=input_ids)
+        # Postprocess
+        prediction = self.tokenizer.decode(output[0])
+        LOGGER.info(f"Generated text: {prediction}")
+        return {"generated_text": prediction}