b-mc2/sql-create-context
Viewer • Updated • 78.6k • 4.08k • 498
How to use delayedkarma/mistral-7b-text-to-sql with PEFT:
from peft import PeftModel
from transformers import AutoModelForCausalLM
base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
model = PeftModel.from_pretrained(base_model, "delayedkarma/mistral-7b-text-to-sql")import torch
from transformers import AutoTokenizer, pipeline
from datasets import load_dataset
from peft import AutoPeftModelForCausalLM
from random import randint
peft_model_id = "delayedkarma/mistral-7b-text-to-sql"
# Load Model with PEFT adapter
model = AutoPeftModelForCausalLM.from_pretrained(
peft_model_id,
device_map="auto",
torch_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
# load into pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
# Load dataset and Convert dataset to OAI messages
system_message = """You are a text to SQL query translator. Users will ask you questions in English and you will generate a SQL query based on the provided SCHEMA.
SCHEMA:
{schema}"""
def create_conversation(sample):
return {
"messages": [
{"role": "system", "content": system_message.format(schema=sample["context"])},
{"role": "user", "content": sample["question"]},
{"role": "assistant", "content": sample["answer"]}
]
}
# Load dataset from the hub
dataset = load_dataset("b-mc2/sql-create-context", split="train")
dataset = dataset.shuffle().select(range(100))
# Convert dataset to OAI messages
dataset = dataset.map(create_conversation, remove_columns=dataset.features, batched=False)
dataset = dataset.train_test_split(test_size=20/100)
# Evaluate
eval_dataset = dataset['test']
rand_idx = randint(0, len(eval_dataset))
# Test on sample
prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx]["messages"][:2], tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)
print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}")
print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}")
print(f"Generated Answer:\n{outputs[0]['generated_text'][len(prompt):].strip()}")
The following hyperparameters were used during training:
Base model
mistralai/Mistral-7B-v0.1
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1") model = PeftModel.from_pretrained(base_model, "delayedkarma/mistral-7b-text-to-sql")