import torch from datasets import Dataset from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", ) model = AutoModelForCausalLM.from_pretrained( model_name, quantization_config=bnb_config, device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) tokenizer.pad_token = tokenizer.eos_token model.config.use_cache = False model.gradient_checkpointing_enable() model = prepare_model_for_kbit_training(model) lora_config = LoraConfig( r=8, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" ) model = get_peft_model(model, lora_config) import pandas as pd from datasets import Dataset # Load data from CSV df = pd.read_csv("Customer-Support.csv") # Rename columns to match expected keys df = df.rename(columns={"query": "instruction", "response": "output"}) # Select required columns data = df[["instruction", "output"]].fillna("") # Convert DataFrame to list of dictionaries data = data.to_dict(orient="records") # Create Hugging Face Dataset dataset = Dataset.from_list(data) # Format each example def format_instruction(example): return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}" # Map formatted text dataset = dataset.map(lambda x: {"text": format_instruction(x)}) def tokenize_function(example): tokenized = tokenizer(example["text"], truncation=True, padding="max_length", max_length=512) tokenized["labels"] = tokenized["input_ids"].copy() return tokenized tokenized_dataset = dataset.map(tokenize_function, batched=True) training_args = TrainingArguments( output_dir="./tinyllama-qlora-support-bot", per_device_train_batch_size=2, gradient_accumulation_steps=4, learning_rate=2e-4, logging_dir="./logs", num_train_epochs=3, logging_steps=10, save_total_limit=2, save_strategy="epoch", bf16=True, optim="paged_adamw_8bit" ) trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset, tokenizer=tokenizer ) trainer.train() model.save_pretrained("tinyllama-qlora-support-bot") tokenizer.save_pretrained("tinyllama-qlora-support-bot") from transformers import pipeline pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) instruction = "How do I update the app?" prompt = f"### Instruction:\n{instruction}\n\n### Response:\n" output = pipe(prompt, max_new_tokens=100) print(output[0]['generated_text']) import gradio as gr def generate_response(instruction): prompt = f"### Instruction:\n{instruction}\n\n### Response:\n" output = pipe(prompt, max_new_tokens=100, do_sample=True, temperature=0.7) # Extract only the response part response = output[0]["generated_text"].split("### Response:\n")[-1].strip() return response gr.Interface( fn=generate_response, inputs=gr.Textbox(lines=3, label="Enter your question"), outputs=gr.Textbox(lines=5, label="Support Bot's Response"), title="📞 Customer Support Chatbot", description="Ask a question and get a response from your fine-tuned TinyLLaMA model.", ).launch()