| from pathlib import Path | |
| # Re-define and save the notebook after kernel reset | |
| notebook_code = ''' | |
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# π§ MedicalChatBot Training Hub (Google Colab T4)\n", | |
| "\n", | |
| "Fine-tune your MedicalChatBot using LoRA + Hugging Face on a T4 GPU." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# β Install required libraries\n", | |
| "!pip install -q transformers datasets peft accelerate evaluate bitsandbytes" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# β Login to Hugging Face\n", | |
| "from huggingface_hub import notebook_login\n", | |
| "notebook_login()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# β Load dataset (MedQuAD or PubMedQA)\n", | |
| "from datasets import load_dataset\n", | |
| "\n", | |
| "# Use medquad or pubmed_qa\n", | |
| "dataset = load_dataset('medquad')\n", | |
| "dataset = dataset['train'].train_test_split(test_size=0.1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# β Load tokenizer & model (e.g., Mistral 7B or base model)\n", | |
| "from transformers import AutoModelForCausalLM, AutoTokenizer\n", | |
| "\n", | |
| "base_model = 'mistralai/Mistral-7B-v0.1' # change if needed\n", | |
| "tokenizer = AutoTokenizer.from_pretrained(base_model)\n", | |
| "model = AutoModelForCausalLM.from_pretrained(base_model, device_map='auto', load_in_4bit=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# β Apply LoRA with PEFT\n", | |
| "from peft import get_peft_model, LoraConfig, TaskType\n", | |
| "\n", | |
| "peft_config = LoraConfig(\n", | |
| " task_type=TaskType.CAUSAL_LM,\n", | |
| " inference_mode=False,\n", | |
| " r=8,\n", | |
| " lora_alpha=16,\n", | |
| " lora_dropout=0.1\n", | |
| ")\n", | |
| "model = get_peft_model(model, peft_config)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# β Tokenize dataset\n", | |
| "def tokenize(example):\n", | |
| " return tokenizer(example['question'] + ' ' + example['answer'], truncation=True)\n", | |
| "\n", | |
| "tokenized = dataset.map(tokenize, batched=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# β Train with Trainer\n", | |
| "from transformers import TrainingArguments, Trainer\n", | |
| "\n", | |
| "args = TrainingArguments(\n", | |
| " output_dir='./results',\n", | |
| " per_device_train_batch_size=2,\n", | |
| " per_device_eval_batch_size=2,\n", | |
| " num_train_epochs=2,\n", | |
| " logging_steps=10,\n", | |
| " evaluation_strategy='epoch',\n", | |
| " save_strategy='epoch',\n", | |
| " fp16=True\n", | |
| ")\n", | |
| "\n", | |
| "trainer = Trainer(\n", | |
| " model=model,\n", | |
| " args=args,\n", | |
| " train_dataset=tokenized['train'],\n", | |
| " eval_dataset=tokenized['test']\n", | |
| ")\n", | |
| "trainer.train()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# β Save and push model to Hugging Face Hub\n", | |
| "model.push_to_hub('kberta2014/MedicalChatBot-Lora')\n", | |
| "tokenizer.push_to_hub('kberta2014/MedicalChatBot-Lora')" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "name": "python", | |
| "version": "3.9" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } | |
| ''' | |
| # Save to notebook file | |
| notebook_path = Path("/mnt/data/MedicalChatBot_TrainingHub_Colab_T4.ipynb") | |
| notebook_path.write_text(notebook_code) | |
| notebook_path.name | |