File size: 7,670 Bytes
345077a aa1688d 106cfd6 345077a 6a2d781 d6c4269 345077a 106cfd6 aa1688d 106cfd6 345077a 5bc3d37 106cfd6 5bc3d37 106cfd6 1fe6ec4 5bc3d37 106cfd6 5bc3d37 6a2d781 345077a c3d84fa 106cfd6 aa1688d 106cfd6 d6c4269 aa1688d 6a2d781 5bc3d37 c3d84fa 1fe6ec4 6a2d781 106cfd6 6a2d781 106cfd6 6a2d781 5bc3d37 106cfd6 345077a 106cfd6 aa1688d 106cfd6 aa1688d 106cfd6 aa1688d 106cfd6 aa1688d 106cfd6 aa1688d 106cfd6 aa1688d 106cfd6 345077a 106cfd6 5bc3d37 106cfd6 aa1688d 0be7cdb aa1688d 0be7cdb aa1688d 106cfd6 aa1688d 0be7cdb 106cfd6 345077a 0be7cdb 106cfd6 c3d84fa 106cfd6 345077a 106cfd6 6a2d781 106cfd6 5bc3d37 d6c4269 106cfd6 d6c4269 106cfd6 6a2d781 0be7cdb 106cfd6 6a2d781 106cfd6 6a2d781 106cfd6 6a2d781 106cfd6 6a2d781 106cfd6 6a2d781 d6c4269 6a2d781 345077a 5bc3d37 106cfd6 c3d84fa d6c4269 106cfd6 5bc3d37 d6c4269 106cfd6 aa1688d 106cfd6 aa1688d 0be7cdb 6a2d781 aa1688d 106cfd6 aa1688d 106cfd6 aa1688d 6a2d781 106cfd6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 | """
VoiceNote AI - VIPS Classifier
Three SEPARATE prompt strategies for proper experimental comparison.
Methodology note: Earlier versions combined all three strategies into a single
API call to reduce latency. This caused output priming - the model's first
classification influenced the subsequent ones, masking real differences between
strategies. Three separate API calls now ensure independent evaluation.
"""
import logging
import re
from config import Config
logger = logging.getLogger(__name__)
# ==========================================================
# SHARED COMPONENTS
# ==========================================================
_VIPS_DEFINITIONS = """VIPS categories:
V (Valbefinnande): pain, fatigue, nausea, dizziness, sleep, mood, anxiety, appetite, physical symptoms
I (Integritet): living situation, mobility needs, habits, social support, preferences
P (Prevention): mobilization plans, lifestyle factors, follow-up, physiotherapy
S (Sakerhet): fall risk, allergies, medications, postoperative risks, infection risk"""
_RULES = """RULES:
- Write output in Swedish only
- Do NOT quote the conversation - reformulate as professional clinical documentation
- Write "Ingen relevant information." if a category has zero relevant content
- Never invent information not stated in the conversation
- Output the four VIPS lines in plain text, no markdown, no numbering"""
# ==========================================================
# THREE INDEPENDENT PROMPT STRATEGIES
# ==========================================================
def build_prompt_zero_shot(text: str) -> str:
"""Strategy 1: Pure task instruction with explicit format anchor."""
return f"""You are a Swedish clinical documentation specialist.
{_VIPS_DEFINITIONS}
{_RULES}
Output format (exactly these 4 lines, no preamble, no markdown):
V (Valbefinnande): [Swedish content]
I (Integritet): [Swedish content]
P (Prevention): [Swedish content]
S (Sakerhet): [Swedish content]
Conversation:
{text}
V (Valbefinnande):"""
def build_prompt_few_shot(text: str) -> str:
"""Strategy 2: Two complete input-to-output examples."""
return f"""You are a Swedish clinical documentation specialist.
{_VIPS_DEFINITIONS}
{_RULES}
Here are two complete examples to learn from:
Example 1:
Conversation: Patient reports knee pain 7/10 since yesterday. Lives alone in apartment, no family nearby. Allergic to penicillin. No mobility aids.
V (Valbefinnande): Smarta 7/10 i hoger knaled sedan igar.
I (Integritet): Bor ensam i lagenhet utan nara familj.
P (Prevention): Ingen relevant information.
S (Sakerhet): Kand penicillinallergi. Inga hjalpmedel.
Example 2:
Conversation: Patient anxious, hasn't slept in a week. Daughter helps with daily tasks. Takes 6 medications but cannot name them. Fell once last week.
V (Valbefinnande): Angest och somnsvarigheter sedan en vecka.
I (Integritet): Dotter assisterar med dagliga aktiviteter.
P (Prevention): Behov av lakemedelsgenomgang och fallpreventionsbedomning.
S (Sakerhet): Tar 6 lakemedel utan kannedom om namn. Tidigare fall senaste veckan.
Now classify this conversation in the same format:
Conversation: {text}
V (Valbefinnande):
I (Integritet):
P (Prevention):
S (Sakerhet):"""
def build_prompt_chain_of_thought(text: str) -> str:
"""Strategy 3: Brief reasoning, then final note with reliable marker."""
return f"""You are a Swedish clinical documentation specialist.
{_VIPS_DEFINITIONS}
{_RULES}
Reason BRIEFLY (max 3-4 short bullet points per step), then write the final note.
Conversation:
{text}
REASONING:
- Clinical details mentioned: (list briefly)
- Category assignments (V/I/P/S): (one short sentence per detail)
- Verification - only stated info, nothing invented: (yes/no)
===FINAL NOTE===
V (Valbefinnande):
I (Integritet):
P (Prevention):
S (Sakerhet):"""
# ==========================================================
# ROBUST PARSER
# ==========================================================
def parse_vips_response(response: str) -> dict:
"""Parse VIPS response - robust to markdown, list markers, prefixes."""
default = "Ingen relevant information."
vips = {"V": default, "I": default, "P": default, "S": default}
# For CoT: extract section after final-note marker
for marker in ["===FINAL NOTE===", "FINAL NOTE", "Final Note",
"STEP 4", "===FINAL===", "FINAL:"]:
if marker in response:
response = response.split(marker)[-1]
break
# Match Swedish category names (handles both A and aa-style typing)
full_names = {
"V": r"V[aä]lbefinnande",
"I": r"Integritet",
"P": r"Prevention",
"S": r"S[aä]kerhet",
}
for key, full in full_names.items():
# Try patterns from most specific to most lenient
patterns = [
rf'(?im)^[\s\*\-\d\.]*{key}\s*\({full}\)\s*[:\-]\s*\**\s*(.+?)\s*\**\s*$',
rf'(?im)^[\s\*\-\d\.]*{full}\s*[:\-]\s*\**\s*(.+?)\s*\**\s*$',
rf'(?im)^[\s\*\-\d\.]*{key}\s*[:\-]\s*\**\s*(.+?)\s*\**\s*$',
]
for pattern in patterns:
match = re.search(pattern, response)
if match:
content = match.group(1).strip().strip('*').strip()
# Skip placeholder text
if content and content.lower() not in ['[swedish content]', 'content', '...']:
vips[key] = content
break
return vips
def format_vips_for_display(vips: dict) -> str:
labels = {"V": "V (Valbefinnande)", "I": "I (Integritet)",
"P": "P (Prevention)", "S": "S (Sakerhet)"}
return "\n".join(f"{labels[k]}: {vips.get(k, 'Ingen relevant information.')}"
for k in ["V", "I", "P", "S"])
# ==========================================================
# MAIN - three SEPARATE API calls + DEBUG LOGGING
# ==========================================================
def classify_all(english_text: str, mistral_client) -> dict:
"""
Run three independent VIPS classifications via separate API calls.
Why separate calls instead of one combined prompt:
- Avoids output priming (model copying its own previous output)
- Each strategy gets a fresh context
- Results become scientifically comparable
Trade-off: ~3x latency, but methodologically sound.
"""
logger.info("Running 3 independent prompt strategies...")
results = {}
strategies = {
"zero_shot": (build_prompt_zero_shot, 500),
"few_shot": (build_prompt_few_shot, 500),
"chain_of_thought": (build_prompt_chain_of_thought, 1200),
}
for name, (builder, max_tok) in strategies.items():
try:
logger.info(f" -> {name} (max_tokens={max_tok})...")
raw = mistral_client.generate(
prompt=builder(english_text),
max_tokens=max_tok,
temperature=0.15,
)
# DEBUG: log first 300 chars of raw response
logger.info(f" [RAW] {name} (first 300 chars): {raw[:300]}")
parsed = parse_vips_response(raw)
# Sanity check: warn if all categories are empty
if all(v == "Ingen relevant information." for v in parsed.values()):
logger.warning(f" [WARN] {name} parsed empty - check raw output above")
results[name] = parsed
logger.info(f" [OK] {name} done ({len(raw)} chars)")
except Exception as e:
logger.error(f" [ERR] {name} failed: {e}")
results[name] = {k: f"[FEL: {e}]" for k in ["V", "I", "P", "S"]}
return results
|