Spaces:

SsebaA
/

x

Running on Zero

App Files Files Community

SsebaA commited on Apr 22

Commit

bdb791b

verified ·

1 Parent(s): b03f59f

Update vips_classifier.py

Browse files

Files changed (1) hide show

vips_classifier.py +92 -34

vips_classifier.py CHANGED Viewed

@@ -103,27 +103,65 @@ def parse_vips_response(response: str) -> dict:
 def parse_combined_response(response: str) -> dict:
-    """Split combined response by section headers and parse each block."""
-    headers = {
-        "zero_shot":        "===ZERO-SHOT===",
-        "few_shot":         "===FEW-SHOT===",
-        "chain_of_thought": "===CHAIN-OF-THOUGHT===",
     }
-    results = {}
-    header_positions = {k: response.find(h) for k, h in headers.items()}
-    for key, header in headers.items():
-        pos = header_positions[key]
-        if pos == -1:
-            results[key] = {k: "Ingen relevant information." for k in ["V", "I", "P", "S"]}
-            continue
-        # Section ends at next header
-        next_pos = [p for p in header_positions.values() if p > pos]
-        end = min(next_pos) if next_pos else len(response)
-        section = response[pos:end]
-        results[key] = parse_vips_response(section)
-    return results
 def format_vips_for_display(vips: dict) -> str:
@@ -139,18 +177,38 @@ def format_vips_for_display(vips: dict) -> str:
 def classify_all(english_text: str, mistral_client) -> dict:
     """
-    Run all three VIPS strategies in ONE Mistral API call.
-    3x faster than sequential calls. Retry handles rate limits automatically.
     """
-    logger.info("Classifying with all 3 strategies in single API call...")
-    try:
-        raw = mistral_client.generate(
-            prompt=build_prompt_combined(english_text),
-            max_tokens=1000,
-            temperature=Config.LLM_TEMPERATURE,
-        )
-        return parse_combined_response(raw)
-    except Exception as e:
-        logger.error(f"Combined classification failed: {e}")
-        empty = {k: f"[FEL: {e}]" for k in ["V", "I", "P", "S"]}
-        return {"zero_shot": empty, "few_shot": empty, "chain_of_thought": empty}

 def parse_combined_response(response: str) -> dict:
+    """
+    Parse combined response containing 3 VIPS sections.
+    Robust against marker variations: ===ZERO-SHOT===, ZERO-SHOT:, ### ZERO-SHOT, etc.
+    Splits by finding the 3 VIPS blocks (each has V/I/P/S lines).
+    """
+    import re
+    # Try to find section headers with flexible matching
+    # Match: ZERO-SHOT, ZERO SHOT, Zero-shot, ZEROSHOT, Method 1, etc.
+    patterns = [
+        ("zero_shot",        r'(?i)(?:=+\s*)?(?:###\s*)?(?:method\s*1|zero[\s\-]?shot)(?:\s*=+)?'),
+        ("few_shot",         r'(?i)(?:=+\s*)?(?:###\s*)?(?:method\s*2|few[\s\-]?shot)(?:\s*=+)?'),
+        ("chain_of_thought", r'(?i)(?:=+\s*)?(?:###\s*)?(?:method\s*3|chain[\s\-]?of[\s\-]?thought|cot)(?:\s*=+)?'),
+    ]
+    # Find position of each section
+    positions = {}
+    for key, pattern in patterns:
+        matches = list(re.finditer(pattern, response))
+        if matches:
+            # Take the first occurrence (the header, not VIPS content)
+            positions[key] = matches[0].start()
+    # If we found all 3, split by them
+    if len(positions) == 3:
+        sorted_keys = sorted(positions.keys(), key=lambda k: positions[k])
+        results = {}
+        for i, key in enumerate(sorted_keys):
+            start = positions[key]
+            end = positions[sorted_keys[i+1]] if i+1 < len(sorted_keys) else len(response)
+            section = response[start:end]
+            results[key] = parse_vips_response(section)
+        return results
+    # Fallback: split by VIPS blocks
+    # Find all "V (Välbefinnande):" or "V:" lines and their positions
+    v_matches = list(re.finditer(r'^V\s*(?:\(|:)', response, re.MULTILINE))
+    if len(v_matches) >= 3:
+        # We have at least 3 V-blocks, treat them as the 3 strategies in order
+        results = {}
+        keys = ["zero_shot", "few_shot", "chain_of_thought"]
+        for i, key in enumerate(keys):
+            if i < len(v_matches):
+                start = v_matches[i].start()
+                end = v_matches[i+1].start() if i+1 < len(v_matches) else len(response)
+                section = response[start:end]
+                results[key] = parse_vips_response(section)
+            else:
+                results[key] = {k: "Ingen relevant information." for k in ["V", "I", "P", "S"]}
+        return results
+    # Final fallback: same result for all 3
+    single = parse_vips_response(response)
+    return {
+        "zero_shot":        single,
+        "few_shot":         single,
+        "chain_of_thought": single,
     }
 def format_vips_for_display(vips: dict) -> str:
 def classify_all(english_text: str, mistral_client) -> dict:
     """
+    Run all three prompt strategies in PARALLEL (Scaleway is fast ~2-3s).
+    Each strategy gets its own API call, ensuring distinct results.
     """
+    from concurrent.futures import ThreadPoolExecutor, as_completed
+    strategies = {
+        "zero_shot":        (build_prompt_zero_shot,        300),
+        "few_shot":         (build_prompt_few_shot,         300),
+        "chain_of_thought": (build_prompt_chain_of_thought, 500),
+    }
+    def run_one(name, prompt_fn, max_tokens):
+        try:
+            raw = mistral_client.generate(
+                prompt=prompt_fn(english_text),
+                max_tokens=max_tokens,
+                temperature=Config.LLM_TEMPERATURE,
+            )
+            logger.info(f"{name}: {len(raw)} chars")
+            return name, parse_vips_response(raw)
+        except Exception as e:
+            logger.error(f"{name} failed: {e}")
+            return name, {k: f"[FEL: {e}]" for k in ["V", "I", "P", "S"]}
+    results = {}
+    with ThreadPoolExecutor(max_workers=3) as executor:
+        futures = {
+            executor.submit(run_one, name, fn, tokens): name
+            for name, (fn, tokens) in strategies.items()
+        }
+        for future in as_completed(futures):
+            name, result = future.result()
+            results[name] = result
+    return results