| """ |
| VoiceNote AI - VIPS Classifier |
| Three SEPARATE prompt strategies for proper experimental comparison. |
| |
| Methodology note: Earlier versions combined all three strategies into a single |
| API call to reduce latency. This caused output priming - the model's first |
| classification influenced the subsequent ones, masking real differences between |
| strategies. Three separate API calls now ensure independent evaluation. |
| """ |
| import logging |
| import re |
| from config import Config |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| |
| |
| |
|
|
| _VIPS_DEFINITIONS = """VIPS categories: |
| V (Valbefinnande): pain, fatigue, nausea, dizziness, sleep, mood, anxiety, appetite, physical symptoms |
| I (Integritet): living situation, mobility needs, habits, social support, preferences |
| P (Prevention): mobilization plans, lifestyle factors, follow-up, physiotherapy |
| S (Sakerhet): fall risk, allergies, medications, postoperative risks, infection risk""" |
|
|
| _RULES = """RULES: |
| - Write output in Swedish only |
| - Do NOT quote the conversation - reformulate as professional clinical documentation |
| - Write "Ingen relevant information." if a category has zero relevant content |
| - Never invent information not stated in the conversation |
| - Output the four VIPS lines in plain text, no markdown, no numbering""" |
|
|
|
|
| |
| |
| |
|
|
| def build_prompt_zero_shot(text: str) -> str: |
| """Strategy 1: Pure task instruction with explicit format anchor.""" |
| return f"""You are a Swedish clinical documentation specialist. |
| {_VIPS_DEFINITIONS} |
| {_RULES} |
| |
| Output format (exactly these 4 lines, no preamble, no markdown): |
| V (Valbefinnande): [Swedish content] |
| I (Integritet): [Swedish content] |
| P (Prevention): [Swedish content] |
| S (Sakerhet): [Swedish content] |
| |
| Conversation: |
| {text} |
| |
| V (Valbefinnande):""" |
|
|
|
|
| def build_prompt_few_shot(text: str) -> str: |
| """Strategy 2: Two complete input-to-output examples.""" |
| return f"""You are a Swedish clinical documentation specialist. |
| {_VIPS_DEFINITIONS} |
| {_RULES} |
| |
| Here are two complete examples to learn from: |
| |
| Example 1: |
| Conversation: Patient reports knee pain 7/10 since yesterday. Lives alone in apartment, no family nearby. Allergic to penicillin. No mobility aids. |
| V (Valbefinnande): Smarta 7/10 i hoger knaled sedan igar. |
| I (Integritet): Bor ensam i lagenhet utan nara familj. |
| P (Prevention): Ingen relevant information. |
| S (Sakerhet): Kand penicillinallergi. Inga hjalpmedel. |
| |
| Example 2: |
| Conversation: Patient anxious, hasn't slept in a week. Daughter helps with daily tasks. Takes 6 medications but cannot name them. Fell once last week. |
| V (Valbefinnande): Angest och somnsvarigheter sedan en vecka. |
| I (Integritet): Dotter assisterar med dagliga aktiviteter. |
| P (Prevention): Behov av lakemedelsgenomgang och fallpreventionsbedomning. |
| S (Sakerhet): Tar 6 lakemedel utan kannedom om namn. Tidigare fall senaste veckan. |
| |
| Now classify this conversation in the same format: |
| Conversation: {text} |
| |
| V (Valbefinnande): |
| I (Integritet): |
| P (Prevention): |
| S (Sakerhet):""" |
|
|
|
|
| def build_prompt_chain_of_thought(text: str) -> str: |
| """Strategy 3: Brief reasoning, then final note with reliable marker.""" |
| return f"""You are a Swedish clinical documentation specialist. |
| {_VIPS_DEFINITIONS} |
| {_RULES} |
| |
| Reason BRIEFLY (max 3-4 short bullet points per step), then write the final note. |
| |
| Conversation: |
| {text} |
| |
| REASONING: |
| - Clinical details mentioned: (list briefly) |
| - Category assignments (V/I/P/S): (one short sentence per detail) |
| - Verification - only stated info, nothing invented: (yes/no) |
| |
| ===FINAL NOTE=== |
| V (Valbefinnande): |
| I (Integritet): |
| P (Prevention): |
| S (Sakerhet):""" |
|
|
|
|
| |
| |
| |
|
|
| def parse_vips_response(response: str) -> dict: |
| """Parse VIPS response - robust to markdown, list markers, prefixes.""" |
| default = "Ingen relevant information." |
| vips = {"V": default, "I": default, "P": default, "S": default} |
|
|
| |
| for marker in ["===FINAL NOTE===", "FINAL NOTE", "Final Note", |
| "STEP 4", "===FINAL===", "FINAL:"]: |
| if marker in response: |
| response = response.split(marker)[-1] |
| break |
|
|
| |
| full_names = { |
| "V": r"V[aä]lbefinnande", |
| "I": r"Integritet", |
| "P": r"Prevention", |
| "S": r"S[aä]kerhet", |
| } |
|
|
| for key, full in full_names.items(): |
| |
| patterns = [ |
| rf'(?im)^[\s\*\-\d\.]*{key}\s*\({full}\)\s*[:\-]\s*\**\s*(.+?)\s*\**\s*$', |
| rf'(?im)^[\s\*\-\d\.]*{full}\s*[:\-]\s*\**\s*(.+?)\s*\**\s*$', |
| rf'(?im)^[\s\*\-\d\.]*{key}\s*[:\-]\s*\**\s*(.+?)\s*\**\s*$', |
| ] |
| for pattern in patterns: |
| match = re.search(pattern, response) |
| if match: |
| content = match.group(1).strip().strip('*').strip() |
| |
| if content and content.lower() not in ['[swedish content]', 'content', '...']: |
| vips[key] = content |
| break |
| return vips |
|
|
|
|
| def format_vips_for_display(vips: dict) -> str: |
| labels = {"V": "V (Valbefinnande)", "I": "I (Integritet)", |
| "P": "P (Prevention)", "S": "S (Sakerhet)"} |
| return "\n".join(f"{labels[k]}: {vips.get(k, 'Ingen relevant information.')}" |
| for k in ["V", "I", "P", "S"]) |
|
|
|
|
| |
| |
| |
|
|
| def classify_all(english_text: str, mistral_client) -> dict: |
| """ |
| Run three independent VIPS classifications via separate API calls. |
| |
| Why separate calls instead of one combined prompt: |
| - Avoids output priming (model copying its own previous output) |
| - Each strategy gets a fresh context |
| - Results become scientifically comparable |
| |
| Trade-off: ~3x latency, but methodologically sound. |
| """ |
| logger.info("Running 3 independent prompt strategies...") |
|
|
| results = {} |
| strategies = { |
| "zero_shot": (build_prompt_zero_shot, 500), |
| "few_shot": (build_prompt_few_shot, 500), |
| "chain_of_thought": (build_prompt_chain_of_thought, 1200), |
| } |
|
|
| for name, (builder, max_tok) in strategies.items(): |
| try: |
| logger.info(f" -> {name} (max_tokens={max_tok})...") |
| raw = mistral_client.generate( |
| prompt=builder(english_text), |
| max_tokens=max_tok, |
| temperature=0.15, |
| ) |
| |
| logger.info(f" [RAW] {name} (first 300 chars): {raw[:300]}") |
|
|
| parsed = parse_vips_response(raw) |
|
|
| |
| if all(v == "Ingen relevant information." for v in parsed.values()): |
| logger.warning(f" [WARN] {name} parsed empty - check raw output above") |
|
|
| results[name] = parsed |
| logger.info(f" [OK] {name} done ({len(raw)} chars)") |
| except Exception as e: |
| logger.error(f" [ERR] {name} failed: {e}") |
| results[name] = {k: f"[FEL: {e}]" for k in ["V", "I", "P", "S"]} |
|
|
| return results |
| |