File size: 9,601 Bytes
6d12932 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# π§ͺ Relational Ai for Nursing Evaluation Notebook\n",
"\n",
"This notebook evaluates the fine-tuned nursing model using Azure GPT-4o as an \"Expert Judge\".\n",
"\n",
"**Model:** `NurseCitizenDeveloper/nursing-llama-3-8b-fons`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 1. Install Dependencies (Run this first, then restart runtime)\n",
"!pip install -U bitsandbytes transformers accelerate langchain-openai -q\n",
"print(\"β
Installed! Now go to Runtime β Restart runtime, then run Cell 2\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 2. Load Model from Hugging Face\n",
"import torch\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
"\n",
"HF_MODEL = \"NurseCitizenDeveloper/nursing-llama-3-8b-fons\"\n",
"print(f\"π Loading model: {HF_MODEL}\")\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)\n",
"model = AutoModelForCausalLM.from_pretrained(\n",
" HF_MODEL,\n",
" device_map=\"auto\",\n",
" torch_dtype=torch.float16,\n",
")\n",
"print(\"β
Model loaded successfully!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 3. Setup Azure OpenAI Judge\n",
"import os\n",
"from langchain_openai import AzureChatOpenAI\n",
"from langchain_core.messages import HumanMessage\n",
"\n",
"os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"https://nursing-brain-uk-685.openai.azure.com/\"\n",
"# Secret removed for security - please enter your key when running\n",
"os.environ[\"AZURE_OPENAI_API_KEY\"] = \"YOUR_AZURE_KEY\"\n",
"os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = \"gpt-4o\"\n",
"os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"2024-08-01-preview\"\n",
"\n",
"llm = AzureChatOpenAI(\n",
" azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT\"],\n",
" openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n",
" azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\n",
" api_key=os.environ[\"AZURE_OPENAI_API_KEY\"],\n",
")\n",
"print(\"β
Azure GPT-4o Judge ready!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 4. Define Test Cases\n",
"alpaca_prompt = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
"\n",
"### Instruction:\n",
"{}\n",
"\n",
"### Input:\n",
"{}\n",
"\n",
"### Response:\n",
"{}\"\"\"\n",
"\n",
"test_cases = [\n",
" {\n",
" \"instruction\": \"Summarize the key nursing interventions for a patient with delirium.\",\n",
" \"input\": \"Patient is an 85-year-old male with acute confusion, fluctuating consciousness, and visual hallucinations.\"\n",
" },\n",
" {\n",
" \"instruction\": \"What are the FONS principles for person-centred care?\",\n",
" \"input\": \"A nurse is documenting care for a patient with dementia.\"\n",
" },\n",
" {\n",
" \"instruction\": \"Explain why skin tone documentation is important in pressure ulcer risk assessment.\",\n",
" \"input\": \"Using the Braden Scale for a patient with darker skin.\"\n",
" },\n",
" {\n",
" \"instruction\": \"How should a nurse communicate using person-centred language?\",\n",
" \"input\": \"Writing clinical notes about a patient with mental health needs.\"\n",
" },\n",
" {\n",
" \"instruction\": \"Describe the ADPIE nursing process.\",\n",
" \"input\": \"Training a new nursing student on documentation.\"\n",
" },\n",
"]\n",
"print(f\"π {len(test_cases)} test cases loaded\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 5. Run Evaluation\n",
"print(\"\\n\" + \"=\"*60)\n",
"print(\"π Relational Ai for Nursing EVALUATION\")\n",
"print(\"=\"*60)\n",
"\n",
"results = []\n",
"\n",
"for i, case in enumerate(test_cases, 1):\n",
" print(f\"\\n--- Test {i}/{len(test_cases)} ---\")\n",
" print(f\"π Instruction: {case['instruction']}\")\n",
" \n",
" # Generate response\n",
" prompt = alpaca_prompt.format(case[\"instruction\"], case[\"input\"], \"\")\n",
" inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n",
" \n",
" with torch.no_grad():\n",
" outputs = model.generate(\n",
" **inputs, \n",
" max_new_tokens=200,\n",
" do_sample=True,\n",
" temperature=0.7,\n",
" top_p=0.9,\n",
" )\n",
" \n",
" response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
" response = response.split(\"### Response:\")[-1].strip() if \"### Response:\" in response else response\n",
" \n",
" print(f\"π€ Model Response: {response[:300]}...\")\n",
" \n",
" # Azure Judge Evaluation\n",
" eval_prompt = f\"\"\"You are an expert nursing educator. Evaluate this AI response on a scale of 1-10:\n",
"\n",
"1. **Clinical Accuracy** (1-10): Is the information clinically correct?\n",
"2. **Person-Centred Language** (1-10): Does it use respectful, dignified language?\n",
"3. **FONS Alignment** (1-10): Does it reflect FONS principles (relational care, practice development)?\n",
"\n",
"**Instruction:** {case['instruction']}\n",
"**Context:** {case['input']}\n",
"**Model Response:** {response}\n",
"\n",
"Provide scores and brief rationale for each:\"\"\"\n",
" \n",
" evaluation = llm.invoke([HumanMessage(content=eval_prompt)])\n",
" print(f\"\\nβοΈ Expert Evaluation:\\n{evaluation.content}\")\n",
" print(\"-\" * 50)\n",
" \n",
" results.append({\n",
" \"test\": case[\"instruction\"],\n",
" \"response\": response,\n",
" \"evaluation\": evaluation.content\n",
" })\n",
"\n",
"print(\"\\n\" + \"=\"*60)\n",
"print(\"β
EVALUATION COMPLETE\")\n",
"print(\"=\"*60)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 6. Summary Report\n",
"print(\"\\nπ EVALUATION SUMMARY\")\n",
"print(\"=\"*40)\n",
"for i, r in enumerate(results, 1):\n",
" print(f\"\\nTest {i}: {r['test'][:50]}...\")\n",
" print(f\"Response preview: {r['response'][:100]}...\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
} |