Spaces:

ppaihack
/

zLlamaskClear

Sleeping

theostos commited on Sep 28, 2024

Commit

67c3a59

1 Parent(s): a65b38c

double output

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ device = 'cuda'
 model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16, token=access_token)
 model = model.to(device)
 tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
 prepare_tokenizer(tokenizer)
@@ -27,6 +28,7 @@ def respond(
     max_tokens,
     temperature,
 ):
     prompt = f"""<|start_header_id|>system<|end_header_id|>
     You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
@@ -34,12 +36,18 @@ def respond(
     <|eot_id|><|start_header_id|>assistant<|end_header_id|>
     """
     model_inputs = generate_custom_mask(tokenizer, [prompt], device)
     outputs = model.generate(temperature=0.7, max_tokens=32, **model_inputs)
     outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
-    result = tokenizer.batch_decode(outputs, skip_special_tokens=True)
-    return result
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface

 model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16, token=access_token)
 model = model.to(device)
+model.load_adapter('theostos/zLlamask', adapter_name="zzlamask")
 tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
 prepare_tokenizer(tokenizer)
     max_tokens,
     temperature,
 ):
     prompt = f"""<|start_header_id|>system<|end_header_id|>
     You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
     <|eot_id|><|start_header_id|>assistant<|end_header_id|>
     """
     model_inputs = generate_custom_mask(tokenizer, [prompt], device)
+    model.disable_adapters()
     outputs = model.generate(temperature=0.7, max_tokens=32, **model_inputs)
     outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
+    result_no_ft = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    model.enable_adapters()
+    outputs = model.generate(temperature=0.7, max_tokens=32, **model_inputs)
+    outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
+    result_ft = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    return f"Without finetuning:\n{result_no_ft}\n\nWith finetuning:\n{result_ft}"
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface