Spaces:

SsebaA
/

x

Running on Zero

File size: 7,670 Bytes

345077a
 
aa1688d
106cfd6
 
 
 
 
345077a
 
6a2d781
d6c4269
345077a
 
 
 
106cfd6
aa1688d
106cfd6
345077a
5bc3d37
106cfd6
5bc3d37
 
106cfd6
1fe6ec4
5bc3d37
 
106cfd6
5bc3d37
 
6a2d781
345077a
c3d84fa
106cfd6
aa1688d
106cfd6
d6c4269
aa1688d
6a2d781
5bc3d37
c3d84fa
 
1fe6ec4
6a2d781
106cfd6
6a2d781
 
106cfd6
6a2d781
5bc3d37
106cfd6
345077a
106cfd6
aa1688d
 
 
106cfd6
aa1688d
 
 
 
 
 
 
106cfd6
 
 
aa1688d
106cfd6
aa1688d
 
106cfd6
 
aa1688d
106cfd6
 
aa1688d
 
106cfd6
345077a
106cfd6
5bc3d37
 
106cfd6
aa1688d
 
 
0be7cdb
aa1688d
 
 
 
0be7cdb
aa1688d
 
106cfd6
aa1688d
0be7cdb
 
 
106cfd6
345077a
0be7cdb
106cfd6
c3d84fa
 
106cfd6
345077a
 
106cfd6
6a2d781
106cfd6
5bc3d37
d6c4269
106cfd6
d6c4269
 
106cfd6
6a2d781
0be7cdb
 
 
 
 
106cfd6
 
6a2d781
106cfd6
 
 
 
6a2d781
106cfd6
6a2d781
106cfd6
6a2d781
 
 
 
 
 
 
 
 
106cfd6
6a2d781
d6c4269
6a2d781
345077a
 
 
5bc3d37
106cfd6
 
c3d84fa
 
 
d6c4269
106cfd6
 
 
5bc3d37
d6c4269
106cfd6
 
 
 
 
 
 
 
 
 
aa1688d
106cfd6
aa1688d
 
0be7cdb
 
6a2d781
aa1688d
106cfd6
aa1688d
 
106cfd6
aa1688d
 
 
 
 
6a2d781
106cfd6

"""
VoiceNote AI - VIPS Classifier
Three SEPARATE prompt strategies for proper experimental comparison.

Methodology note: Earlier versions combined all three strategies into a single
API call to reduce latency. This caused output priming - the model's first
classification influenced the subsequent ones, masking real differences between
strategies. Three separate API calls now ensure independent evaluation.
"""
import logging
import re
from config import Config

logger = logging.getLogger(__name__)


# ==========================================================
# SHARED COMPONENTS
# ==========================================================

_VIPS_DEFINITIONS = """VIPS categories:
V (Valbefinnande): pain, fatigue, nausea, dizziness, sleep, mood, anxiety, appetite, physical symptoms
I (Integritet): living situation, mobility needs, habits, social support, preferences
P (Prevention): mobilization plans, lifestyle factors, follow-up, physiotherapy
S (Sakerhet): fall risk, allergies, medications, postoperative risks, infection risk"""

_RULES = """RULES:
- Write output in Swedish only
- Do NOT quote the conversation - reformulate as professional clinical documentation
- Write "Ingen relevant information." if a category has zero relevant content
- Never invent information not stated in the conversation
- Output the four VIPS lines in plain text, no markdown, no numbering"""


# ==========================================================
# THREE INDEPENDENT PROMPT STRATEGIES
# ==========================================================

def build_prompt_zero_shot(text: str) -> str:
    """Strategy 1: Pure task instruction with explicit format anchor."""
    return f"""You are a Swedish clinical documentation specialist.
{_VIPS_DEFINITIONS}
{_RULES}

Output format (exactly these 4 lines, no preamble, no markdown):
V (Valbefinnande): [Swedish content]
I (Integritet): [Swedish content]
P (Prevention): [Swedish content]
S (Sakerhet): [Swedish content]

Conversation:
{text}

V (Valbefinnande):"""


def build_prompt_few_shot(text: str) -> str:
    """Strategy 2: Two complete input-to-output examples."""
    return f"""You are a Swedish clinical documentation specialist.
{_VIPS_DEFINITIONS}
{_RULES}

Here are two complete examples to learn from:

Example 1:
Conversation: Patient reports knee pain 7/10 since yesterday. Lives alone in apartment, no family nearby. Allergic to penicillin. No mobility aids.
V (Valbefinnande): Smarta 7/10 i hoger knaled sedan igar.
I (Integritet): Bor ensam i lagenhet utan nara familj.
P (Prevention): Ingen relevant information.
S (Sakerhet): Kand penicillinallergi. Inga hjalpmedel.

Example 2:
Conversation: Patient anxious, hasn't slept in a week. Daughter helps with daily tasks. Takes 6 medications but cannot name them. Fell once last week.
V (Valbefinnande): Angest och somnsvarigheter sedan en vecka.
I (Integritet): Dotter assisterar med dagliga aktiviteter.
P (Prevention): Behov av lakemedelsgenomgang och fallpreventionsbedomning.
S (Sakerhet): Tar 6 lakemedel utan kannedom om namn. Tidigare fall senaste veckan.

Now classify this conversation in the same format:
Conversation: {text}

V (Valbefinnande):
I (Integritet):
P (Prevention):
S (Sakerhet):"""


def build_prompt_chain_of_thought(text: str) -> str:
    """Strategy 3: Brief reasoning, then final note with reliable marker."""
    return f"""You are a Swedish clinical documentation specialist.
{_VIPS_DEFINITIONS}
{_RULES}

Reason BRIEFLY (max 3-4 short bullet points per step), then write the final note.

Conversation:
{text}

REASONING:
- Clinical details mentioned: (list briefly)
- Category assignments (V/I/P/S): (one short sentence per detail)
- Verification - only stated info, nothing invented: (yes/no)

===FINAL NOTE===
V (Valbefinnande):
I (Integritet):
P (Prevention):
S (Sakerhet):"""


# ==========================================================
# ROBUST PARSER
# ==========================================================

def parse_vips_response(response: str) -> dict:
    """Parse VIPS response - robust to markdown, list markers, prefixes."""
    default = "Ingen relevant information."
    vips = {"V": default, "I": default, "P": default, "S": default}

    # For CoT: extract section after final-note marker
    for marker in ["===FINAL NOTE===", "FINAL NOTE", "Final Note",
                   "STEP 4", "===FINAL===", "FINAL:"]:
        if marker in response:
            response = response.split(marker)[-1]
            break

    # Match Swedish category names (handles both A and aa-style typing)
    full_names = {
        "V": r"V[aä]lbefinnande",
        "I": r"Integritet",
        "P": r"Prevention",
        "S": r"S[aä]kerhet",
    }

    for key, full in full_names.items():
        # Try patterns from most specific to most lenient
        patterns = [
            rf'(?im)^[\s\*\-\d\.]*{key}\s*\({full}\)\s*[:\-]\s*\**\s*(.+?)\s*\**\s*$',
            rf'(?im)^[\s\*\-\d\.]*{full}\s*[:\-]\s*\**\s*(.+?)\s*\**\s*$',
            rf'(?im)^[\s\*\-\d\.]*{key}\s*[:\-]\s*\**\s*(.+?)\s*\**\s*$',
        ]
        for pattern in patterns:
            match = re.search(pattern, response)
            if match:
                content = match.group(1).strip().strip('*').strip()
                # Skip placeholder text
                if content and content.lower() not in ['[swedish content]', 'content', '...']:
                    vips[key] = content
                    break
    return vips


def format_vips_for_display(vips: dict) -> str:
    labels = {"V": "V (Valbefinnande)", "I": "I (Integritet)",
              "P": "P (Prevention)", "S": "S (Sakerhet)"}
    return "\n".join(f"{labels[k]}: {vips.get(k, 'Ingen relevant information.')}"
                     for k in ["V", "I", "P", "S"])


# ==========================================================
# MAIN - three SEPARATE API calls + DEBUG LOGGING
# ==========================================================

def classify_all(english_text: str, mistral_client) -> dict:
    """
    Run three independent VIPS classifications via separate API calls.

    Why separate calls instead of one combined prompt:
    - Avoids output priming (model copying its own previous output)
    - Each strategy gets a fresh context
    - Results become scientifically comparable

    Trade-off: ~3x latency, but methodologically sound.
    """
    logger.info("Running 3 independent prompt strategies...")

    results = {}
    strategies = {
        "zero_shot":        (build_prompt_zero_shot,        500),
        "few_shot":         (build_prompt_few_shot,         500),
        "chain_of_thought": (build_prompt_chain_of_thought, 1200),
    }

    for name, (builder, max_tok) in strategies.items():
        try:
            logger.info(f"  -> {name} (max_tokens={max_tok})...")
            raw = mistral_client.generate(
                prompt=builder(english_text),
                max_tokens=max_tok,
                temperature=0.15,
            )
            # DEBUG: log first 300 chars of raw response
            logger.info(f"  [RAW] {name} (first 300 chars): {raw[:300]}")

            parsed = parse_vips_response(raw)

            # Sanity check: warn if all categories are empty
            if all(v == "Ingen relevant information." for v in parsed.values()):
                logger.warning(f"  [WARN] {name} parsed empty - check raw output above")

            results[name] = parsed
            logger.info(f"  [OK] {name} done ({len(raw)} chars)")
        except Exception as e:
            logger.error(f"  [ERR] {name} failed: {e}")
            results[name] = {k: f"[FEL: {e}]" for k in ["V", "I", "P", "S"]}

    return results