Kberta2014
/

Medical_Chat_Bot

Text Generation

Model card Files Files and versions

Medical_Chat_Bot / Sample Dataset

Kberta2014's picture

Sample Dataset

e68c9a0 verified about 1 year ago

History Blame Contribute Delete

3.95 kB

	from pathlib import Path

	# Re-define and save the notebook after kernel reset
	notebook_code = '''
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# 🧠 MedicalChatBot Training Hub (Google Colab T4)\n",
	"\n",
	"Fine-tune your MedicalChatBot using LoRA + Hugging Face on a T4 GPU."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# ✅ Install required libraries\n",
	"!pip install -q transformers datasets peft accelerate evaluate bitsandbytes"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# ✅ Login to Hugging Face\n",
	"from huggingface_hub import notebook_login\n",
	"notebook_login()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# ✅ Load dataset (MedQuAD or PubMedQA)\n",
	"from datasets import load_dataset\n",
	"\n",
	"# Use medquad or pubmed_qa\n",
	"dataset = load_dataset('medquad')\n",
	"dataset = dataset['train'].train_test_split(test_size=0.1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# ✅ Load tokenizer & model (e.g., Mistral 7B or base model)\n",
	"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
	"\n",
	"base_model = 'mistralai/Mistral-7B-v0.1' # change if needed\n",
	"tokenizer = AutoTokenizer.from_pretrained(base_model)\n",
	"model = AutoModelForCausalLM.from_pretrained(base_model, device_map='auto', load_in_4bit=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# ✅ Apply LoRA with PEFT\n",
	"from peft import get_peft_model, LoraConfig, TaskType\n",
	"\n",
	"peft_config = LoraConfig(\n",
	" task_type=TaskType.CAUSAL_LM,\n",
	" inference_mode=False,\n",
	" r=8,\n",
	" lora_alpha=16,\n",
	" lora_dropout=0.1\n",
	")\n",
	"model = get_peft_model(model, peft_config)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# ✅ Tokenize dataset\n",
	"def tokenize(example):\n",
	" return tokenizer(example['question'] + ' ' + example['answer'], truncation=True)\n",
	"\n",
	"tokenized = dataset.map(tokenize, batched=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# ✅ Train with Trainer\n",
	"from transformers import TrainingArguments, Trainer\n",
	"\n",
	"args = TrainingArguments(\n",
	" output_dir='./results',\n",
	" per_device_train_batch_size=2,\n",
	" per_device_eval_batch_size=2,\n",
	" num_train_epochs=2,\n",
	" logging_steps=10,\n",
	" evaluation_strategy='epoch',\n",
	" save_strategy='epoch',\n",
	" fp16=True\n",
	")\n",
	"\n",
	"trainer = Trainer(\n",
	" model=model,\n",
	" args=args,\n",
	" train_dataset=tokenized['train'],\n",
	" eval_dataset=tokenized['test']\n",
	")\n",
	"trainer.train()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# ✅ Save and push model to Hugging Face Hub\n",
	"model.push_to_hub('kberta2014/MedicalChatBot-Lora')\n",
	"tokenizer.push_to_hub('kberta2014/MedicalChatBot-Lora')"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"name": "python",
	"version": "3.9"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}
	'''

	# Save to notebook file
	notebook_path = Path("/mnt/data/MedicalChatBot_TrainingHub_Colab_T4.ipynb")
	notebook_path.write_text(notebook_code)

	notebook_path.name