mrm8488
/

mistral-7b-ft-AgentInstruct

@@ -63,76 +63,57 @@ TBD
 ```py
 from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteria
 tokenizer = AutoTokenizer.from_pretrained("mrm8488/mistral-7b-ft-AgentInstruct")
-model = AutoModelForCausalLM.from_pretrained("mrm8488/mistral-7b-ft-AgentInstruct")
 class MyStoppingCriteria(StoppingCriteria):
-  def __init__(self, target_sequence, prompt):
-      self.target_sequence = target_sequence
-      self.prompt=prompt
-  def __call__(self, input_ids, scores, **kwargs):
-      # Get the generated text as a string
-      generated_text = tokenizer.decode(input_ids[0])
-      generated_text = generated_text.replace(self.prompt,'')
-      # Check if the target sequence appears in the generated text
-      if self.target_sequence in generated_text:
-          return True  # Stop generation
-      return False  # Continue generation
-  def __len__(self):
-      return 1
-  def __iter__(self):
-      yield self
-def generate(
-        context,
-        max_new_tokens=256,
-        min_new_tokens=64,
-        temperature=0.3,
-        top_p=0.75,
-        top_k=40,
-        do_sample=False,
-        num_beams=2,
-        **kwargs,
-):
-    prompt = context
-    #print(prompt)
-    inputs = tokenizer(prompt, return_tensors="pt")
     input_ids = inputs["input_ids"].to("cuda")
     attention_mask = inputs["attention_mask"].to("cuda")
-    generation_config = GenerationConfig(
-        temperature=temperature,
-        top_p=top_p,
-        top_k=top_k,
-        do_sample=do_sample,
-        num_beams=num_beams,
-        **kwargs,
-    )
     with torch.no_grad():
-        generation_output = model.generate(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            #generation_config=generation_config,
-            do_sample=True,
-            return_dict_in_generate=True,
-            output_scores=True,
-            max_new_tokens=max_new_tokens,
-            min_new_tokens=min_new_tokens,
-            early_stopping=False,
-            use_cache=True,
-            stopping_criteria=MyStoppingCriteria("### human:", prompt)
-        )
-    s = generation_output.sequences[0]
-    output = tokenizer.decode(s)
     return output
 human = """### human: Among the reference ID of under 10 who got response by marketing department, compare their education status.
 There are 2 tables involved with this task. The name of the 1st table is Customers, and the headers of this table are ID,SEX,MARITAL_STATUS,GEOID,EDUCATIONNUM,OCCUPATION,age. The name of the 2nd table is Mailings1_2, and the headers of this table are REFID,REF_DATE,RESPONSE."""
-context = context + '\n' + human
 solution = generate(context)
 print(solution)

 ```py
 from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteria
+import torch
+# Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("mrm8488/mistral-7b-ft-AgentInstruct")
+model = AutoModelForCausalLM.from_pretrained("mrm8488/mistral-7b-ft-AgentInstruct").to("cuda")
 class MyStoppingCriteria(StoppingCriteria):
+    def __init__(self, target_sequence, prompt):
+        self.target_sequence = target_sequence
+        self.prompt = prompt
+    def __call__(self, input_ids, scores, **kwargs):
+        # Decode without prompt and check for target sequence
+        generated_text = tokenizer.decode(input_ids[0]).replace(self.prompt, '')
+        return self.target_sequence in generated_text
+    def __len__(self):
+        return 1
+def generate(context, max_new_tokens=256, min_new_tokens=64, temperature=0.3, top_p=0.75, top_k=40, do_sample=True, num_beams=2):
+    # Prepare input data
+    inputs = tokenizer(context, return_tensors="pt")
     input_ids = inputs["input_ids"].to("cuda")
     attention_mask = inputs["attention_mask"].to("cuda")
+    # Generation settings
+    generation_settings = {
+        "max_new_tokens": max_new_tokens,
+        "min_new_tokens": min_new_tokens,
+        "temperature": temperature,
+        "top_p": top_p,
+        "top_k": top_k,
+        "do_sample": do_sample,
+        "num_beams": num_beams,
+        "early_stopping": False,
+        "use_cache": True,
+        "stopping_criteria": MyStoppingCriteria("### human:", context)
+    }
+    # Generate response
     with torch.no_grad():
+        generation_output = model.generate(input_ids, attention_mask, **generation_settings)
+    output = tokenizer.decode(generation_output.sequences[0])
     return output
+# Example usage
+context = ""
 human = """### human: Among the reference ID of under 10 who got response by marketing department, compare their education status.
 There are 2 tables involved with this task. The name of the 1st table is Customers, and the headers of this table are ID,SEX,MARITAL_STATUS,GEOID,EDUCATIONNUM,OCCUPATION,age. The name of the 2nd table is Mailings1_2, and the headers of this table are REFID,REF_DATE,RESPONSE."""
+context = human
 solution = generate(context)
 print(solution)