Spaces:

NurseCitizenDeveloper
/

PNA-Assistant

Running on Zero

App Files Files Community

Nursing Citizen Development commited on 2 days ago

Commit

2b9519d

1 Parent(s): 5ee6662

Feat: Switch to relational-intelligence-unsloth-medgemma with token auth

Browse files

Files changed (2) hide show

README.md +2 -1
pna_client.py +36 -22

README.md CHANGED Viewed

@@ -17,8 +17,9 @@ An AI-powered tutor designed to guide nursing professionals through the **A-EQUI
 Automatically synced from GitHub via GitHub Actions.
 ## 🧠 Model Strategy
-- **Base Voice**: `NurseCitizenDeveloper/nursing-llama-3-8b-fons`
 - **Knowledge Base**: RAG implementation using the official PNA A-EQUIP guide.
 ## ⚖️ Disclaimer
 This tool is for educational and supportive purposes for Professional Nurse Advocates and nursing staff. It does not provide direct clinical advice.

 Automatically synced from GitHub via GitHub Actions.
 ## 🧠 Model Strategy
+- **Base Model**: `NurseCitizenDeveloper/relational-intelligence-unsloth-medgemma` (person-centred, fine-tuned)
 - **Knowledge Base**: RAG implementation using the official PNA A-EQUIP guide.
+- **Persona**: Strong PNA Tutor system prompting for restorative supervision focus.
 ## ⚖️ Disclaimer
 This tool is for educational and supportive purposes for Professional Nurse Advocates and nursing staff. It does not provide direct clinical advice.

pna_client.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import os
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, Pipeline
 import spaces
 class PNAAssistantClient:
-    def __init__(self, model_id="NurseCitizenDeveloper/nursing-llama-3-8b-fons"):
         self.model_id = model_id
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.tokenizer = None
@@ -16,44 +17,57 @@ class PNAAssistantClient:
     def _load_model(self):
         if self.model is None:
             print(f"Loading model {self.model_id}...")
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
             self.model = AutoModelForCausalLM.from_pretrained(
                 self.model_id,
-                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
-                device_map="auto" if self.device == "cuda" else None
             )
     @spaces.GPU()
     def generate_response(self, prompt, context="", history=[]):
         self._load_model()
-        system_prompt = f"""You are a Professional Nurse Advocate (PNA) AI tutor.
-Your goal is to guide users in understanding the PNA role and the A-EQUIP model (Normative, Formative, Restorative, Personal Action).
-You focus heavily on Restorative Supervision.
-CONSTRAINTS:
-1. Diversity: Always include one of these emojis in every response: {', '.join(self.diversity_emojis)}.
-2. Pedagogical Style: Use open-ended questions. Avoid giving immediate answers. Guide the user to reflect.
-3. Content Scope: Only assist with PNA, A-EQUIP, or listed nursing fields.
-4. Voice: Maintain the person-centred, compassionate tone you were trained on.
-5. Formatting: Max 2 short paragraphs or 6 bullet points.
-CONTEXT FROM A-EQUIP GUIDE:
 {context}
 """
-        full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:"
-        inputs = self.tokenizer(full_prompt, return_tensors="pt").to(self.device)
         with torch.no_grad():
             outputs = self.model.generate(
-                **inputs,
-                max_new_tokens=256,
                 temperature=0.7,
                 do_sample=True,
                 pad_token_id=self.tokenizer.eos_token_id
             )
-        response = self.tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
         return response.strip()

 import os
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import spaces
 class PNAAssistantClient:
+    # Using user's fine-tuned MedGemma model trained on person-centred language
+    def __init__(self, model_id="NurseCitizenDeveloper/relational-intelligence-unsloth-medgemma"):
         self.model_id = model_id
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.tokenizer = None
     def _load_model(self):
         if self.model is None:
             print(f"Loading model {self.model_id}...")
+            # Use token=True to leverage HF_TOKEN for gated models
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=True)
             self.model = AutoModelForCausalLM.from_pretrained(
                 self.model_id,
+                torch_dtype=torch.bfloat16 if self.device == "cuda" else torch.float32,
+                device_map="auto" if self.device == "cuda" else None,
+                token=True
             )
+            print("Model loaded successfully!")
     @spaces.GPU()
     def generate_response(self, prompt, context="", history=[]):
         self._load_model()
+        system_prompt = f"""You are a Professional Nurse Advocate (PNA) AI tutor. Your role is to guide nursing professionals through the A-EQUIP model (Advocating and Educating for Quality Improvement).
+**Your Core Functions (A-EQUIP):**
+1. Normative: Monitoring, evaluation, quality control
+2. Formative: Education and development
+3. Restorative: Clinical supervision (your primary focus)
+4. Personal Action: Quality improvement
+**Communication Style:**
+- Use person-centred, compassionate language
+- Always include a diversity emoji: {', '.join(self.diversity_emojis)}
+- Ask open-ended questions before giving answers
+- Focus on reflection and restorative supervision
+- Keep responses to 2 short paragraphs or 6 bullet points max
+**Scope:**
+- Only discuss PNA, A-EQUIP, nursing fields
+- For out-of-scope topics: "I can only assist with topics related to the Professional Nurse Advocate role and the A-EQUIP model."
+**Reference Material:**
 {context}
 """
+        messages = [
+            {"role": "user", "content": f"{system_prompt}\n\nUser question: {prompt}"}
+        ]
+        inputs = self.tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True).to(self.device)
         with torch.no_grad():
             outputs = self.model.generate(
+                inputs,
+                max_new_tokens=300,
                 temperature=0.7,
                 do_sample=True,
                 pad_token_id=self.tokenizer.eos_token_id
             )
+        response = self.tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True)
         return response.strip()