parallelstudios
/

mpt-7b-instruct-parallel-colony-memory-importance-ft

@@ -3,23 +3,44 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from typing import Any, Dict
-class EndpointHandler:
-    def __init__(self, path='', torch_dtype=torch.bfloat16, trust_remote_code=True):
         self.model = AutoModelForCausalLM.from_pretrained(
             path,
             torch_dtype=torch_dtype,
             trust_remote_code=trust_remote_code
         )
-        tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
         if tokenizer.pad_token_id is None:
             warnings.warn(
                 "pad_token_id is not set for the tokenizer. Using eos_token_id as pad_token_id."
             )
             tokenizer.pad_token = tokenizer.eos_token
-        tokenizer.padding_side = "right" # "left"
         self.tokenizer = tokenizer
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -39,21 +60,7 @@ class EndpointHandler:
         }
     def format_instruction(self, instruction):
-        INSTRUCTION_KEY = "### Instruction:"
-        RESPONSE_KEY = "### Response:"
-        END_KEY = "### End"
-        INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
-        PROMPT_FOR_GENERATION_FORMAT = """{intro}
-        {instruction_key}
-        {instruction}
-        {response_key}
-        """.format(
-            intro=INTRO_BLURB,
-            instruction_key=INSTRUCTION_KEY,
-            instruction="{instruction}",
-            response_key=RESPONSE_KEY,
-        )
-        return PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
     def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
         # process input
@@ -61,7 +68,7 @@ class EndpointHandler:
         parameters = data.pop("parameters", None)
         # preprocess
-        s = PROMPT_FOR_GENERATION_FORMAT.format(instruction=inputs)
         input_ids = self.tokenizer(s, return_tensors="pt").input_ids.to(self.device)
         gkw = {**self.generate_kwargs, **parameters}
         # pass inputs with all kwargs in data

 from transformers import AutoModelForCausalLM, AutoTokenizer
 from typing import Any, Dict
+class InstructionTextGenerationPipeline:
+    INSTRUCTION_KEY = "### Instruction:"
+    RESPONSE_KEY = "### Response:"
+    END_KEY = "### End"
+    INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
+    PROMPT_FOR_GENERATION_FORMAT = """{intro}
+    {instruction_key}
+    {instruction}
+    {response_key}
+    """.format(
+        intro=INTRO_BLURB,
+        instruction_key=INSTRUCTION_KEY,
+        instruction="{instruction}",
+        response_key=RESPONSE_KEY,
+    )
+    def __init__(
+        self,
+        path,
+        torch_dtype=torch.bfloat16,
+        trust_remote_code=True,
+    ) -> None:
         self.model = AutoModelForCausalLM.from_pretrained(
             path,
             torch_dtype=torch_dtype,
             trust_remote_code=trust_remote_code
         )
+        tokenizer = AutoTokenizer.from_pretrained(
+            "mosaicml/mpt-7b-instruct",
+            trust_remote_code=trust_remote_code
+        )
         if tokenizer.pad_token_id is None:
             warnings.warn(
                 "pad_token_id is not set for the tokenizer. Using eos_token_id as pad_token_id."
             )
             tokenizer.pad_token = tokenizer.eos_token
+        tokenizer.padding_side = "right"
         self.tokenizer = tokenizer
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         }
     def format_instruction(self, instruction):
+        return self.PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
     def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
         # process input
         parameters = data.pop("parameters", None)
         # preprocess
+        s = self.format_instruction(instruction=inputs)
         input_ids = self.tokenizer(s, return_tensors="pt").input_ids.to(self.device)
         gkw = {**self.generate_kwargs, **parameters}
         # pass inputs with all kwargs in data