{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 🏥 Fine-Tune Mistral/Llama-3 for Nursing SBAR Summarization\n", "\n", "This notebook fine-tunes a Large Language Model (LLM) to convert clinical transcripts into SBAR (Situation, Background, Assessment, Recommendation) summaries.\n", "\n", "**Hardware Requirement**: NVIDIA T4 GPU (Google Colab Free Tier is sufficient)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 1. Install Dependencies\n", "%%capture\n", "!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n", "!pip install --no-deps \"xformers<0.0.26\" trl peft accelerate bitsandbytes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 2. Load Base Model (Unsloth optimized)\n", "from unsloth import FastLanguageModel\n", "import torch\n", "\n", "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n", "dtype = None # None for auto detection. Float16 for Tesla T4, Bfloat16 for Ampere+\n", "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/llama-3-8b-bnb-4bit\", # or \"unsloth/mistral-7b-v0.3-bnb-4bit\"\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 3. Add LoRA Adapters\n", "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, # Supports any, but = 0 is optimized\n", " bias = \"none\", # Supports any, but = \"none\" is optimized\n", " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n", " random_state = 3407,\n", " use_rslora = False, # We support rank stabilized LoRA\n", " loftq_config = None, # And LoftQ\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 4. Load Dataset (Update with YOUR repo name)\n", "from datasets import load_dataset\n", "\n", "# REPLACE WITH YOUR REPO NAME from the previous step\n", "HF_REPO_NAME = \"YOUR_USERNAME/nursing-sbar-instruct\"\n", "\n", "dataset = load_dataset(HF_REPO_NAME, split = \"train\")\n", "\n", "# Format function\n", "def formatting_prompts_func(examples):\n", " # Must match the ChatML format or similar\n", " convos = examples[\"messages\"]\n", " texts = []\n", " for convo in convos:\n", " # Simple Chat Format: USER: ... ASSISTANT: ...\n", " user_msg = convo[0][\"content\"]\n", " assistant_msg = convo[1][\"content\"]\n", " text = f\"<|user|>\\n{user_msg}\\n<|assistant|>\\n{assistant_msg}<|endoftext|>\"\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "\n", "dataset = dataset.map(formatting_prompts_func, batched = True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 5. Train\n", "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 5,\n", " max_steps = 60, # Adjust based on dataset size (e.g. 1 epoch)\n", " learning_rate = 2e-4,\n", " fp16 = not torch.cuda.is_bf16_supported(),\n", " bf16 = torch.cuda.is_bf16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"outputs\",\n", " ),\n", ")\n", "\n", "trainer.train()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 6. Inference Test\n", "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n", "\n", "inputs = tokenizer(\n", "[\n", " \"<|user|>\\nTranscript: I have a headache and my BP is 150/90. What should I do?\\n<|assistant|>\"\n", "], return_tensors = \"pt\").to(\"cuda\")\n", "\n", "outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)\n", "print(tokenizer.batch_decode(outputs))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 7. Save Model to Hugging Face\n", "# Authorization\n", "from huggingface_hub import login\n", "login(\"YOUR_HF_TOKEN_HERE\")\n", "\n", "model.push_to_hub_merged(\"YOUR_USERNAME/nursing-llama3-sbar\", tokenizer, save_method = \"lora\")\n", "# Or save locally:\n", "# model.save_pretrained(\"lora_model\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 }