File size: 3,506 Bytes
917310d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token
model.config.use_cache = False
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
import pandas as pd
from datasets import Dataset


# Load data from CSV
df = pd.read_csv("Customer-Support.csv")


# Rename columns to match expected keys
df = df.rename(columns={"query": "instruction", "response": "output"})


# Select required columns
data = df[["instruction", "output"]].fillna("")


# Convert DataFrame to list of dictionaries
data = data.to_dict(orient="records")


# Create Hugging Face Dataset
dataset = Dataset.from_list(data)


# Format each example
def format_instruction(example):
    return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"


# Map formatted text
dataset = dataset.map(lambda x: {"text": format_instruction(x)})

def tokenize_function(example):
    tokenized = tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = dataset.map(tokenize_function, batched=True)
training_args = TrainingArguments(
    output_dir="./tinyllama-qlora-support-bot",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    logging_dir="./logs",
    num_train_epochs=3,
    logging_steps=10,
    save_total_limit=2,
    save_strategy="epoch",
    bf16=True,
    optim="paged_adamw_8bit"
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

trainer.train()
model.save_pretrained("tinyllama-qlora-support-bot")
tokenizer.save_pretrained("tinyllama-qlora-support-bot")
from transformers import pipeline

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

instruction = "How do I update the app?"
prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"

output = pipe(prompt, max_new_tokens=100)
print(output[0]['generated_text'])
import gradio as gr

def generate_response(instruction):
    prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
    output = pipe(prompt, max_new_tokens=100, do_sample=True, temperature=0.7)
    # Extract only the response part
    response = output[0]["generated_text"].split("### Response:\n")[-1].strip()
    return response

gr.Interface(
    fn=generate_response,
    inputs=gr.Textbox(lines=3, label="Enter your question"),
    outputs=gr.Textbox(lines=5, label="Support Bot's Response"),
    title="πŸ“ž Customer Support Chatbot",
    description="Ask a question and get a response from your fine-tuned TinyLLaMA model.",
).launch()