Kberta2014
/

Medical_Chat_Bot

+from pathlib import Path
+# Re-define and save the notebook after kernel reset
+notebook_code = '''
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🧠 MedicalChatBot Training Hub (Google Colab T4)\n",
+    "\n",
+    "Fine-tune your MedicalChatBot using LoRA + Hugging Face on a T4 GPU."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ Install required libraries\n",
+    "!pip install -q transformers datasets peft accelerate evaluate bitsandbytes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ Login to Hugging Face\n",
+    "from huggingface_hub import notebook_login\n",
+    "notebook_login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ Load dataset (MedQuAD or PubMedQA)\n",
+    "from datasets import load_dataset\n",
+    "\n",
+    "# Use medquad or pubmed_qa\n",
+    "dataset = load_dataset('medquad')\n",
+    "dataset = dataset['train'].train_test_split(test_size=0.1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ Load tokenizer & model (e.g., Mistral 7B or base model)\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "\n",
+    "base_model = 'mistralai/Mistral-7B-v0.1'  # change if needed\n",
+    "tokenizer = AutoTokenizer.from_pretrained(base_model)\n",
+    "model = AutoModelForCausalLM.from_pretrained(base_model, device_map='auto', load_in_4bit=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ Apply LoRA with PEFT\n",
+    "from peft import get_peft_model, LoraConfig, TaskType\n",
+    "\n",
+    "peft_config = LoraConfig(\n",
+    "    task_type=TaskType.CAUSAL_LM,\n",
+    "    inference_mode=False,\n",
+    "    r=8,\n",
+    "    lora_alpha=16,\n",
+    "    lora_dropout=0.1\n",
+    ")\n",
+    "model = get_peft_model(model, peft_config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ Tokenize dataset\n",
+    "def tokenize(example):\n",
+    "    return tokenizer(example['question'] + ' ' + example['answer'], truncation=True)\n",
+    "\n",
+    "tokenized = dataset.map(tokenize, batched=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ Train with Trainer\n",
+    "from transformers import TrainingArguments, Trainer\n",
+    "\n",
+    "args = TrainingArguments(\n",
+    "    output_dir='./results',\n",
+    "    per_device_train_batch_size=2,\n",
+    "    per_device_eval_batch_size=2,\n",
+    "    num_train_epochs=2,\n",
+    "    logging_steps=10,\n",
+    "    evaluation_strategy='epoch',\n",
+    "    save_strategy='epoch',\n",
+    "    fp16=True\n",
+    ")\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=args,\n",
+    "    train_dataset=tokenized['train'],\n",
+    "    eval_dataset=tokenized['test']\n",
+    ")\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ Save and push model to Hugging Face Hub\n",
+    "model.push_to_hub('kberta2014/MedicalChatBot-Lora')\n",
+    "tokenizer.push_to_hub('kberta2014/MedicalChatBot-Lora')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
+'''
+# Save to notebook file
+notebook_path = Path("/mnt/data/MedicalChatBot_TrainingHub_Colab_T4.ipynb")
+notebook_path.write_text(notebook_code)
+notebook_path.name