| | import torch |
| | from datasets import Dataset |
| | from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig |
| | from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training |
| | model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" |
| |
|
| | bnb_config = BitsAndBytesConfig( |
| | load_in_4bit=True, |
| | bnb_4bit_compute_dtype=torch.float16, |
| | bnb_4bit_use_double_quant=True, |
| | bnb_4bit_quant_type="nf4", |
| | ) |
| |
|
| | model = AutoModelForCausalLM.from_pretrained( |
| | model_name, |
| | quantization_config=bnb_config, |
| | device_map="auto" |
| | ) |
| |
|
| | tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) |
| | tokenizer.pad_token = tokenizer.eos_token |
| | model.config.use_cache = False |
| | model.gradient_checkpointing_enable() |
| | model = prepare_model_for_kbit_training(model) |
| |
|
| | lora_config = LoraConfig( |
| | r=8, |
| | lora_alpha=32, |
| | target_modules=["q_proj", "v_proj"], |
| | lora_dropout=0.05, |
| | bias="none", |
| | task_type="CAUSAL_LM" |
| | ) |
| |
|
| | model = get_peft_model(model, lora_config) |
| | import pandas as pd |
| | from datasets import Dataset |
| |
|
| |
|
| | |
| | df = pd.read_csv("Customer-Support.csv") |
| |
|
| |
|
| | |
| | df = df.rename(columns={"query": "instruction", "response": "output"}) |
| |
|
| |
|
| | |
| | data = df[["instruction", "output"]].fillna("") |
| |
|
| |
|
| | |
| | data = data.to_dict(orient="records") |
| |
|
| |
|
| | |
| | dataset = Dataset.from_list(data) |
| |
|
| |
|
| | |
| | def format_instruction(example): |
| | return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}" |
| |
|
| |
|
| | |
| | dataset = dataset.map(lambda x: {"text": format_instruction(x)}) |
| |
|
| | def tokenize_function(example): |
| | tokenized = tokenizer(example["text"], truncation=True, padding="max_length", max_length=512) |
| | tokenized["labels"] = tokenized["input_ids"].copy() |
| | return tokenized |
| |
|
| | tokenized_dataset = dataset.map(tokenize_function, batched=True) |
| | training_args = TrainingArguments( |
| | output_dir="./tinyllama-qlora-support-bot", |
| | per_device_train_batch_size=2, |
| | gradient_accumulation_steps=4, |
| | learning_rate=2e-4, |
| | logging_dir="./logs", |
| | num_train_epochs=3, |
| | logging_steps=10, |
| | save_total_limit=2, |
| | save_strategy="epoch", |
| | bf16=True, |
| | optim="paged_adamw_8bit" |
| | ) |
| | trainer = Trainer( |
| | model=model, |
| | args=training_args, |
| | train_dataset=tokenized_dataset, |
| | tokenizer=tokenizer |
| | ) |
| |
|
| | trainer.train() |
| | model.save_pretrained("tinyllama-qlora-support-bot") |
| | tokenizer.save_pretrained("tinyllama-qlora-support-bot") |
| | from transformers import pipeline |
| |
|
| | pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) |
| |
|
| | instruction = "How do I update the app?" |
| | prompt = f"### Instruction:\n{instruction}\n\n### Response:\n" |
| |
|
| | output = pipe(prompt, max_new_tokens=100) |
| | print(output[0]['generated_text']) |
| | import gradio as gr |
| |
|
| | def generate_response(instruction): |
| | prompt = f"### Instruction:\n{instruction}\n\n### Response:\n" |
| | output = pipe(prompt, max_new_tokens=100, do_sample=True, temperature=0.7) |
| | |
| | response = output[0]["generated_text"].split("### Response:\n")[-1].strip() |
| | return response |
| |
|
| | gr.Interface( |
| | fn=generate_response, |
| | inputs=gr.Textbox(lines=3, label="Enter your question"), |
| | outputs=gr.Textbox(lines=5, label="Support Bot's Response"), |
| | title="π Customer Support Chatbot", |
| | description="Ask a question and get a response from your fine-tuned TinyLLaMA model.", |
| | ).launch() |
| |
|