{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# ๐Ÿงช Relational Ai for Nursing Evaluation Notebook\n", "\n", "This notebook evaluates the fine-tuned nursing model using Azure GPT-4o as an \"Expert Judge\".\n", "\n", "**Model:** `NurseCitizenDeveloper/nursing-llama-3-8b-fons`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 1. Install Dependencies (Run this first, then restart runtime)\n", "!pip install -U bitsandbytes transformers accelerate langchain-openai -q\n", "print(\"โœ… Installed! Now go to Runtime โ†’ Restart runtime, then run Cell 2\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 2. Load Model from Hugging Face\n", "import torch\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "\n", "HF_MODEL = \"NurseCitizenDeveloper/nursing-llama-3-8b-fons\"\n", "print(f\"๐Ÿ”„ Loading model: {HF_MODEL}\")\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)\n", "model = AutoModelForCausalLM.from_pretrained(\n", " HF_MODEL,\n", " device_map=\"auto\",\n", " torch_dtype=torch.float16,\n", ")\n", "print(\"โœ… Model loaded successfully!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 3. Setup Azure OpenAI Judge\n", "import os\n", "from langchain_openai import AzureChatOpenAI\n", "from langchain_core.messages import HumanMessage\n", "\n", "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"https://nursing-brain-uk-685.openai.azure.com/\"\n", "# Secret removed for security - please enter your key when running\n", "os.environ[\"AZURE_OPENAI_API_KEY\"] = \"YOUR_AZURE_KEY\"\n", "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = \"gpt-4o\"\n", "os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"2024-08-01-preview\"\n", "\n", "llm = AzureChatOpenAI(\n", " azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT\"],\n", " openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n", " azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\n", " api_key=os.environ[\"AZURE_OPENAI_API_KEY\"],\n", ")\n", "print(\"โœ… Azure GPT-4o Judge ready!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 4. Define Test Cases\n", "alpaca_prompt = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "test_cases = [\n", " {\n", " \"instruction\": \"Summarize the key nursing interventions for a patient with delirium.\",\n", " \"input\": \"Patient is an 85-year-old male with acute confusion, fluctuating consciousness, and visual hallucinations.\"\n", " },\n", " {\n", " \"instruction\": \"What are the FONS principles for person-centred care?\",\n", " \"input\": \"A nurse is documenting care for a patient with dementia.\"\n", " },\n", " {\n", " \"instruction\": \"Explain why skin tone documentation is important in pressure ulcer risk assessment.\",\n", " \"input\": \"Using the Braden Scale for a patient with darker skin.\"\n", " },\n", " {\n", " \"instruction\": \"How should a nurse communicate using person-centred language?\",\n", " \"input\": \"Writing clinical notes about a patient with mental health needs.\"\n", " },\n", " {\n", " \"instruction\": \"Describe the ADPIE nursing process.\",\n", " \"input\": \"Training a new nursing student on documentation.\"\n", " },\n", "]\n", "print(f\"๐Ÿ“‹ {len(test_cases)} test cases loaded\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 5. Run Evaluation\n", "print(\"\\n\" + \"=\"*60)\n", "print(\"๐Ÿ Relational Ai for Nursing EVALUATION\")\n", "print(\"=\"*60)\n", "\n", "results = []\n", "\n", "for i, case in enumerate(test_cases, 1):\n", " print(f\"\\n--- Test {i}/{len(test_cases)} ---\")\n", " print(f\"๐Ÿ“ Instruction: {case['instruction']}\")\n", " \n", " # Generate response\n", " prompt = alpaca_prompt.format(case[\"instruction\"], case[\"input\"], \"\")\n", " inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n", " \n", " with torch.no_grad():\n", " outputs = model.generate(\n", " **inputs, \n", " max_new_tokens=200,\n", " do_sample=True,\n", " temperature=0.7,\n", " top_p=0.9,\n", " )\n", " \n", " response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", " response = response.split(\"### Response:\")[-1].strip() if \"### Response:\" in response else response\n", " \n", " print(f\"๐Ÿค– Model Response: {response[:300]}...\")\n", " \n", " # Azure Judge Evaluation\n", " eval_prompt = f\"\"\"You are an expert nursing educator. Evaluate this AI response on a scale of 1-10:\n", "\n", "1. **Clinical Accuracy** (1-10): Is the information clinically correct?\n", "2. **Person-Centred Language** (1-10): Does it use respectful, dignified language?\n", "3. **FONS Alignment** (1-10): Does it reflect FONS principles (relational care, practice development)?\n", "\n", "**Instruction:** {case['instruction']}\n", "**Context:** {case['input']}\n", "**Model Response:** {response}\n", "\n", "Provide scores and brief rationale for each:\"\"\"\n", " \n", " evaluation = llm.invoke([HumanMessage(content=eval_prompt)])\n", " print(f\"\\nโš–๏ธ Expert Evaluation:\\n{evaluation.content}\")\n", " print(\"-\" * 50)\n", " \n", " results.append({\n", " \"test\": case[\"instruction\"],\n", " \"response\": response,\n", " \"evaluation\": evaluation.content\n", " })\n", "\n", "print(\"\\n\" + \"=\"*60)\n", "print(\"โœ… EVALUATION COMPLETE\")\n", "print(\"=\"*60)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 6. Summary Report\n", "print(\"\\n๐Ÿ“Š EVALUATION SUMMARY\")\n", "print(\"=\"*40)\n", "for i, r in enumerate(results, 1):\n", " print(f\"\\nTest {i}: {r['test'][:50]}...\")\n", " print(f\"Response preview: {r['response'][:100]}...\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 4 }