Spaces:
Sleeping
Sleeping
| import torch | |
| from datasets import Dataset | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig | |
| from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training | |
| model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4", | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| quantization_config=bnb_config, | |
| device_map="auto" | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model.config.use_cache = False | |
| model.gradient_checkpointing_enable() | |
| model = prepare_model_for_kbit_training(model) | |
| lora_config = LoraConfig( | |
| r=8, | |
| lora_alpha=32, | |
| target_modules=["q_proj", "v_proj"], | |
| lora_dropout=0.05, | |
| bias="none", | |
| task_type="CAUSAL_LM" | |
| ) | |
| model = get_peft_model(model, lora_config) | |
| import pandas as pd | |
| from datasets import Dataset | |
| # Load data from CSV | |
| df = pd.read_csv("Customer-Support.csv") | |
| # Rename columns to match expected keys | |
| df = df.rename(columns={"query": "instruction", "response": "output"}) | |
| # Select required columns | |
| data = df[["instruction", "output"]].fillna("") | |
| # Convert DataFrame to list of dictionaries | |
| data = data.to_dict(orient="records") | |
| # Create Hugging Face Dataset | |
| dataset = Dataset.from_list(data) | |
| # Format each example | |
| def format_instruction(example): | |
| return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}" | |
| # Map formatted text | |
| dataset = dataset.map(lambda x: {"text": format_instruction(x)}) | |
| def tokenize_function(example): | |
| tokenized = tokenizer(example["text"], truncation=True, padding="max_length", max_length=512) | |
| tokenized["labels"] = tokenized["input_ids"].copy() | |
| return tokenized | |
| tokenized_dataset = dataset.map(tokenize_function, batched=True) | |
| training_args = TrainingArguments( | |
| output_dir="./tinyllama-qlora-support-bot", | |
| per_device_train_batch_size=2, | |
| gradient_accumulation_steps=4, | |
| learning_rate=2e-4, | |
| logging_dir="./logs", | |
| num_train_epochs=3, | |
| logging_steps=10, | |
| save_total_limit=2, | |
| save_strategy="epoch", | |
| bf16=True, | |
| optim="paged_adamw_8bit" | |
| ) | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=tokenized_dataset, | |
| tokenizer=tokenizer | |
| ) | |
| trainer.train() | |
| model.save_pretrained("tinyllama-qlora-support-bot") | |
| tokenizer.save_pretrained("tinyllama-qlora-support-bot") | |
| from transformers import pipeline | |
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| instruction = "How do I update the app?" | |
| prompt = f"### Instruction:\n{instruction}\n\n### Response:\n" | |
| output = pipe(prompt, max_new_tokens=100) | |
| print(output[0]['generated_text']) | |
| import gradio as gr | |
| def generate_response(instruction): | |
| prompt = f"### Instruction:\n{instruction}\n\n### Response:\n" | |
| output = pipe(prompt, max_new_tokens=100, do_sample=True, temperature=0.7) | |
| # Extract only the response part | |
| response = output[0]["generated_text"].split("### Response:\n")[-1].strip() | |
| return response | |
| gr.Interface( | |
| fn=generate_response, | |
| inputs=gr.Textbox(lines=3, label="Enter your question"), | |
| outputs=gr.Textbox(lines=5, label="Support Bot's Response"), | |
| title="📞 Customer Support Chatbot", | |
| description="Ask a question and get a response from your fine-tuned TinyLLaMA model.", | |
| ).launch() | |