SsebaA commited on
Commit
c3d84fa
·
verified ·
1 Parent(s): 57b898e

Update vips_classifier.py

Browse files
Files changed (1) hide show
  1. vips_classifier.py +73 -118
vips_classifier.py CHANGED
@@ -1,9 +1,5 @@
1
  """
2
  VoiceNote AI - VIPS Classifier
3
- Classifies patient information into VIPS categories using three prompt strategies.
4
-
5
- Input: English-translated anonymized text (via DeepL)
6
- Output: VIPS dict for each of zero_shot / few_shot / chain_of_thought
7
  """
8
 
9
  import logging
@@ -11,150 +7,116 @@ from config import Config
11
 
12
  logger = logging.getLogger(__name__)
13
 
 
 
 
 
 
14
 
15
- # ══════════════════════════════════════════════════════════
16
- # SYSTEM INSTRUCTION (shared base for all prompts)
17
- # ══════════════════════════════════════════════════════════
18
-
19
- _SYSTEM = """You are a clinical documentation assistant for Swedish nursing.
20
- Classify patient-nurse conversations into the four VIPS categories below.
21
-
22
- VIPS categories:
23
- - V (Wellbeing / Välbefinnande): Subjective symptoms — pain, fatigue, nausea, mood, sleep, appetite
24
- - I (Integrity / Integritet): Autonomy, personal habits, living situation, social relations, preferences
25
- - P (Prevention / Prevention): Risk factors, preventive measures, lifestyle (smoking, diet, exercise)
26
- - S (Safety / Säkerhet): Allergies, fall risk, medication, infection risk, safety concerns
27
 
28
- Rules:
29
- 1. ONLY include information explicitly stated in the conversation.
30
- 2. NEVER invent or assume information that was not mentioned.
31
- 3. If a category has no relevant information, write exactly: Ingen relevant information.
32
- 4. Keep each category concise and clinical."""
33
 
 
 
34
 
35
- # ══════════════════════════════════════════════════════════
36
- # 1. ZERO-SHOT
37
- # ══════════════════════════════════════════════════════════
38
 
39
- def build_prompt_zero_shot(english_text: str) -> str:
40
- """
41
- Zero-shot: no examples — pure instruction only.
42
- Reference: Sivarajkumar et al. (2022), HealthPrompt
43
- """
44
- return f"""{_SYSTEM}
45
 
46
- Patient-nurse conversation:
47
  \"\"\"{english_text}\"\"\"
48
 
49
- Generate the VIPS nursing note:
50
- V:
51
- I:
52
- P:
53
- S:"""
54
 
 
 
 
 
55
 
56
- # ══════════════════════════════════════════════════════════
57
- # 2. FEW-SHOT
58
- # ══════════════════════════════════════════════════════════
59
 
60
  def build_prompt_few_shot(english_text: str) -> str:
61
- """
62
- Few-shot: 3 concrete examples before the task.
63
- Reference: Brown et al. (2020), Language Models are Few-Shot Learners
64
- """
65
- return f"""{_SYSTEM}
66
 
67
  --- EXAMPLE 1 ---
68
  Conversation: "I have a headache and feel very tired. I haven't slept well in three days."
69
- V: Headache reported. Severe fatigue. Sleep disturbance for three days.
70
- I: Ingen relevant information.
71
- P: Ingen relevant information.
72
- S: Ingen relevant information.
73
 
74
  --- EXAMPLE 2 ---
75
- Conversation: "I take Metoprolol every day. I smoke about ten cigarettes a day and drink alcohol on weekends. I'm allergic to penicillin."
76
- V: Ingen relevant information.
77
- I: Smoker (approx. 10 cigarettes/day). Alcohol consumption on weekends.
78
- P: Smoking cessation may be discussed. Lifestyle risk factors identified.
79
- S: Known allergy to penicillin. Daily medication: Metoprolol.
80
 
81
  --- EXAMPLE 3 ---
82
- Conversation: "I have chest pain and difficulty breathing. I feel dizzy when I stand up. I live alone and have no one to help me."
83
- V: Chest pain and dyspnea reported. Dizziness on standing.
84
- I: Lives alone. No social support available.
85
- P: Ingen relevant information.
86
- S: Acute symptoms immediate clinical assessment required. Elevated fall risk due to dizziness.
87
 
88
- --- NOW YOUR TURN ---
89
  Conversation: \"\"\"{english_text}\"\"\"
90
 
91
- V:
92
- I:
93
- P:
94
- S:"""
95
-
96
 
97
- # ══════════════════════════════════════════════════════════
98
- # 3. CHAIN-OF-THOUGHT
99
- # ══════════════════════════════════════════════════════════
100
 
101
  def build_prompt_chain_of_thought(english_text: str) -> str:
102
- """
103
- Chain-of-Thought: explicit step-by-step reasoning before output.
104
- Reference: Wei et al. (2022), Chain-of-Thought Prompting Elicits Reasoning
105
- """
106
- return f"""{_SYSTEM}
107
 
108
- Use this step-by-step process:
109
 
110
- STEP 1 — List every clinical detail mentioned in the conversation.
111
- STEP 2 — Assign each detail to the correct VIPS category.
112
- STEP 3 — Verify: Does every item come directly from the conversation? Remove hallucinated content.
113
- STEP 4 — Write the final VIPS note.
114
 
115
  Patient-nurse conversation:
116
  \"\"\"{english_text}\"\"\"
117
 
118
- STEP 1 Clinical details identified:
119
- (list all relevant information)
120
 
121
- STEP 2VIPS mapping:
122
- (assign each item to V / I / P / S)
123
 
124
- STEP 3Verification:
125
- (confirm nothing was invented)
 
 
 
126
 
127
- STEP 4Final VIPS Note:
128
- V:
129
- I:
130
- P:
131
- S:"""
132
 
 
 
 
 
 
133
 
134
- # ══════════════════════════════════════════════════════════
135
- # PARSER
136
- # ══════════════════════════════════════════════════════════
137
 
138
  def parse_vips_response(response: str) -> dict:
139
- """
140
- Parse raw LLM response into a VIPS dict.
141
- Handles both 'V:' and 'V (Wellbeing):' label formats.
142
-
143
- Returns:
144
- {"V": "...", "I": "...", "P": "...", "S": "..."}
145
- """
146
  default = "Ingen relevant information."
147
  vips = {"V": default, "I": default, "P": default, "S": default}
148
 
149
- # Only parse the STEP 4 section if Chain-of-Thought
150
  if "STEP 4" in response:
151
  response = response.split("STEP 4")[-1]
152
 
153
  for line in response.strip().splitlines():
154
  line = line.strip()
155
  for key in ["V", "I", "P", "S"]:
156
- if line.startswith(f"{key}:") or line.startswith(f"{key} ("):
157
- # Extract content after the first colon
158
  content = line.split(":", 1)[-1].strip()
159
  if content:
160
  vips[key] = content
@@ -163,25 +125,18 @@ def parse_vips_response(response: str) -> dict:
163
  return vips
164
 
165
 
166
- # ═══════════════════════════════���══════════════════════════
167
- # MAIN: run all 3 strategies
168
- # ══════════════════════════════════════════════════════════
 
 
 
 
 
 
 
169
 
170
  def classify_all(english_text: str, mistral_client) -> dict:
171
- """
172
- Run all three prompt strategies against the same English text.
173
-
174
- Args:
175
- english_text: DeepL-translated, GDPR-anonymized text
176
- mistral_client: MistralClient instance
177
-
178
- Returns:
179
- {
180
- "zero_shot": {"V":..., "I":..., "P":..., "S":...},
181
- "few_shot": {"V":..., "I":..., "P":..., "S":...},
182
- "chain_of_thought":{"V":..., "I":..., "P":..., "S":...},
183
- }
184
- """
185
  strategies = {
186
  "zero_shot": (build_prompt_zero_shot, Config.LLM_MAX_TOKENS_ZERO_SHOT),
187
  "few_shot": (build_prompt_few_shot, Config.LLM_MAX_TOKENS_FEW_SHOT),
 
1
  """
2
  VoiceNote AI - VIPS Classifier
 
 
 
 
3
  """
4
 
5
  import logging
 
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
+ _VIPS_DEFINITIONS = """VIPS categories — extract ANY mention of:
11
+ V (Wellbeing): pain levels, fatigue, nausea, dizziness, sleep quality, mood, anxiety, appetite, physical symptoms
12
+ I (Integrity): living situation, mobility needs, personal habits, social support, preferences, independence level
13
+ P (Prevention): mobilization plans, exercises, lifestyle factors (smoking, diet), follow-up plans, physiotherapy
14
+ S (Safety): fall risk, allergies, medications, postoperative risks, clot risk, infection risk, safety equipment needed"""
15
 
16
+ _RULES = """CRITICAL RULES:
17
+ 1. Read EVERY sentence carefully extract ALL clinical details mentioned.
18
+ 2. Write "Ingen relevant information." ONLY if the category has ZERO mentions.
19
+ 3. Be specific (e.g. "Pain 3/10 at rest, 6/10 on movement" not just "pain reported").
20
+ 4. Never invent information not stated in the conversation."""
 
 
 
 
 
 
 
21
 
 
 
 
 
 
22
 
23
+ def build_prompt_zero_shot(english_text: str) -> str:
24
+ return f"""You are a Swedish clinical documentation specialist generating nursing notes in VIPS format.
25
 
26
+ {_VIPS_DEFINITIONS}
 
 
27
 
28
+ {_RULES}
 
 
 
 
 
29
 
30
+ Patient-nurse conversation to document:
31
  \"\"\"{english_text}\"\"\"
32
 
33
+ Extract all relevant clinical information and write the VIPS note now.
34
+ Use this exact format — one line per category:
 
 
 
35
 
36
+ V (Välbefinnande):
37
+ I (Integritet):
38
+ P (Prevention):
39
+ S (Säkerhet):"""
40
 
 
 
 
41
 
42
  def build_prompt_few_shot(english_text: str) -> str:
43
+ return f"""You are a Swedish clinical documentation specialist generating nursing notes in VIPS format.
44
+
45
+ {_VIPS_DEFINITIONS}
46
+
47
+ {_RULES}
48
 
49
  --- EXAMPLE 1 ---
50
  Conversation: "I have a headache and feel very tired. I haven't slept well in three days."
51
+ V (Välbefinnande): Headache reported. Severe fatigue. Sleep disturbance for three days.
52
+ I (Integritet): Ingen relevant information.
53
+ P (Prevention): Ingen relevant information.
54
+ S (Säkerhet): Ingen relevant information.
55
 
56
  --- EXAMPLE 2 ---
57
+ Conversation: "I take Metoprolol every day. I smoke about ten cigarettes a day. I'm allergic to penicillin. I feel dizzy when I stand up."
58
+ V (Välbefinnande): Dizziness on standing reported.
59
+ I (Integritet): Active smoker (approx. 10 cigarettes/day).
60
+ P (Prevention): Smoking cessation recommended. Daily medication management ongoing.
61
+ S (Säkerhet): Known allergy to penicillin. Daily medication: Metoprolol. Fall risk due to dizziness on standing.
62
 
63
  --- EXAMPLE 3 ---
64
+ Conversation: "I have chest pain and difficulty breathing. I live alone. The doctor said I need to start walking tomorrow."
65
+ V (Välbefinnande): Chest pain and dyspnea reported.
66
+ I (Integritet): Lives alone. No social support available at home.
67
+ P (Prevention): Mobilization plan initiated — walking planned from tomorrow per physician order.
68
+ S (Säkerhet): Acute cardiopulmonary symptoms require assessment. Elevated fall risk. Lives alone increases safety concern.
69
 
70
+ --- YOUR TURN ---
71
  Conversation: \"\"\"{english_text}\"\"\"
72
 
73
+ V (Välbefinnande):
74
+ I (Integritet):
75
+ P (Prevention):
76
+ S (Säkerhet):"""
 
77
 
 
 
 
78
 
79
  def build_prompt_chain_of_thought(english_text: str) -> str:
80
+ return f"""You are a Swedish clinical documentation specialist generating nursing notes in VIPS format.
 
 
 
 
81
 
82
+ {_VIPS_DEFINITIONS}
83
 
84
+ {_RULES}
 
 
 
85
 
86
  Patient-nurse conversation:
87
  \"\"\"{english_text}\"\"\"
88
 
89
+ Follow these steps carefully:
 
90
 
91
+ STEP 1List EVERY clinical detail from the conversation (pain, symptoms, medications, plans, risks, living situation, etc.):
92
+ -
93
 
94
+ STEP 2Assign each detail to the correct VIPS category:
95
+ V (Wellbeing) items:
96
+ I (Integrity) items:
97
+ P (Prevention) items:
98
+ S (Safety) items:
99
 
100
+ STEP 3Check: Is every item above directly from the conversation? Remove anything invented.
 
 
 
 
101
 
102
+ STEP 4 — Write the final VIPS note:
103
+ V (Välbefinnande):
104
+ I (Integritet):
105
+ P (Prevention):
106
+ S (Säkerhet):"""
107
 
 
 
 
108
 
109
  def parse_vips_response(response: str) -> dict:
 
 
 
 
 
 
 
110
  default = "Ingen relevant information."
111
  vips = {"V": default, "I": default, "P": default, "S": default}
112
 
 
113
  if "STEP 4" in response:
114
  response = response.split("STEP 4")[-1]
115
 
116
  for line in response.strip().splitlines():
117
  line = line.strip()
118
  for key in ["V", "I", "P", "S"]:
119
+ if line.startswith(f"{key} (") or line.startswith(f"{key}:"):
 
120
  content = line.split(":", 1)[-1].strip()
121
  if content:
122
  vips[key] = content
 
125
  return vips
126
 
127
 
128
+ def format_vips_for_display(vips: dict) -> str:
129
+ labels = {
130
+ "V": "V (Välbefinnande)",
131
+ "I": "I (Integritet)",
132
+ "P": "P (Prevention)",
133
+ "S": "S (Säkerhet)",
134
+ }
135
+ return "\n".join(f"{labels[k]}: {vips.get(k, 'Ingen relevant information.')}"
136
+ for k in ["V", "I", "P", "S"])
137
+
138
 
139
  def classify_all(english_text: str, mistral_client) -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  strategies = {
141
  "zero_shot": (build_prompt_zero_shot, Config.LLM_MAX_TOKENS_ZERO_SHOT),
142
  "few_shot": (build_prompt_few_shot, Config.LLM_MAX_TOKENS_FEW_SHOT),