import transformers import torch from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline from huggingface_hub import notebook_login from datasets import Dataset import pandas as pd from IPython.display import display from google.colab import userdata !rm -rf ~/.cache/huggingface/ notebook_login() import pandas as pd df = pd.read_csv("/content/sample.csv") print("Shape of dataset:", df.shape) display(df.head(5)) inbound_df = df[df["inbound"] == True] outbound_df = df[df["inbound"] == False] merged_df = pd.merge( inbound_df, outbound_df, left_on="tweet_id", right_on="in_response_to_tweet_id", suffixes=("_customer", "_brand") ) merged_df = merged_df[["tweet_id_customer", "text_customer", "tweet_id_brand", "text_brand"]] display(merged_df.head()) def build_chat_example(row): return { "prompt": f"User: {row['text_customer']}\nAssistant:", "response": row["text_brand"] } paired_data = merged_df.apply(build_chat_example, axis=1).to_list() from datasets import Dataset dataset = Dataset.from_list(paired_data) dataset = dataset.train_test_split(test_size=0.1, seed=42) train_dataset = dataset["train"] eval_dataset = dataset["test"] print(train_dataset[0]) model_id = "meta-llama/Meta-Llama-3-8B" import bitsandbytes as bnb from transformers import BitsAndBytesConfig import torch # Import torch here bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant = True, bnb_4bit_quant_type = "nf4", bnb_4bit_compute_dtype = torch.bfloat16 ) tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( model_id, device_map = "auto", quantization_config = bnb_config, ) llama_pipeline = pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, device_map="auto" ) from peft import LoraConfig, get_peft_model, TaskType lora_config = LoraConfig( r=16, lora_alpha=32, lora_dropout=0.05, bias="none", task_type=TaskType.CAUSAL_LM, target_modules=["q_proj", "v_proj"] ) model = get_peft_model(model, lora_config) print("LoRA layers added to the model!") def tokenize_function(examples): full_texts = [ f"{p}\n{r}" for p, r in zip(examples["prompt"], examples["response"]) ] return tokenizer(full_texts, truncation=True, max_length=512) train_tokenized = train_dataset.map(tokenize_function, batched=True) eval_tokenized = eval_dataset.map(tokenize_function, batched=True) from transformers import TrainingArguments, Trainer training_args = TrainingArguments( output_dir="./results", overwrite_output_dir=True, num_train_epochs=3, per_device_train_batch_size=2, per_device_eval_batch_size=2, eval_strategy="epoch", save_strategy="epoch", logging_steps=50, fp16=True, report_to="none" ) def tokenize_function(examples): full_texts = [ f"{p}\n{r}" for p, r in zip(examples["prompt"], examples["response"]) ] tokenized_inputs = tokenizer( full_texts, truncation=True, max_length=512, padding="max_length", return_tensors="pt" ) return tokenized_inputs train_tokenized = train_dataset.map(tokenize_function, batched=True) eval_tokenized = eval_dataset.map(tokenize_function, batched=True) model.eval() test_prompt = "User: What is an IPhone?\nAssistant:" inputs = tokenizer(test_prompt, return_tensors="pt").to("cuda") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.9 ) print("=== Model Reply ===") print(tokenizer.decode(outputs[0], skip_special_tokens=True))