Spaces:

maxime-antoine-dev
/

fades-api

Sleeping

App Files Files Community

maxime-antoine-dev commited on 30 days ago

Commit

afd3da3

1 Parent(s): 992feaf

refactored code

Browse files

Files changed (5) hide show

logger_utils.py +29 -0
main.py +96 -497
model_runtime.py +129 -0
prompts.py +113 -0
utils.py +171 -0

logger_utils.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import time
+from contextlib import contextmanager
+def log(rid: str, msg: str) -> None:
+    print(f"[{rid}] {msg}", flush=True)
+class StepLogger:
+    """
+    Lightweight structured step logger for server logs.
+    """
+    def __init__(self, rid: str, route: str):
+        self.rid = rid
+        self.route = route
+    def info(self, message: str) -> None:
+        log(self.rid, f"{self.route} {message}")
+    @contextmanager
+    def step(self, name: str):
+        t0 = time.time()
+        self.info(f"step={name} start")
+        try:
+            yield
+            dt = time.time() - t0
+            self.info(f"step={name} ok ({dt:.3f}s)")
+        except Exception as e:
+            dt = time.time() - t0
+            self.info(f"step={name} fail ({dt:.3f}s) err={repr(e)}")
+            raise

main.py CHANGED Viewed

@@ -1,18 +1,17 @@
-# main.py
 import os
 import json
 import time
 import uuid
 import asyncio
-import re
-from typing import Any, Dict, Optional, List
-from functools import lru_cache
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
-from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
 # ============================
@@ -67,11 +66,9 @@ class GenParams(BaseModel):
     temperature: Optional[float] = None
     top_p: Optional[float] = None
 class AnalyzeRequest(GenParams):
     text: str
 class RewriteRequest(GenParams):
     text: str
     quote: str = Field(..., description="Verbatim substring that must be replaced.")
@@ -81,251 +78,17 @@ class RewriteRequest(GenParams):
 # ============================
-# Labels & Prompts
-# ============================
-ALLOWED_LABELS = [
-    "none",
-    "faulty generalization",
-    "false causality",
-    "circular reasoning",
-    "ad populum",
-    "ad hominem",
-    "fallacy of logic",
-    "appeal to emotion",
-    "false dilemma",
-    "equivocation",
-    "fallacy of extension",
-    "fallacy of relevance",
-    "fallacy of credibility",
-    "miscellaneous",
-    "intentional",
-]
-LABELS_STR = ", ".join([f'"{x}"' for x in ALLOWED_LABELS])
-ANALYZE_PROMPT = f"""You are a fallacy detection assistant.
-You MUST choose labels ONLY from this list (exact string):
-{LABELS_STR}
-You MUST return ONLY valid JSON with this schema:
-{{
-  "has_fallacy": boolean,
-  "fallacies": [
-    {{
-      "type": string,
-      "confidence": number,
-      "evidence_quotes": [string],
-      "rationale": string
-    }}
-  ],
-  "overall_explanation": string
-}}
-Hard rules:
-- Output ONLY JSON. No markdown. No extra text.
-- evidence_quotes MUST be verbatim substrings copied from the input text (no paraphrase).
-- Keep each evidence quote short (prefer 1–2 sentences; max 240 chars).
-- confidence MUST be a real probability between 0.0 and 1.0 (use 2 decimals) and MUST vary when appropriate.
-  Calibrate it:
-  * 0.90–1.00: very explicit, unambiguous match, clear cue words.
-  * 0.70–0.89: strong match but some ambiguity or missing premise.
-  * 0.40–0.69: plausible but weak/partial evidence.
-  * 0.10–0.39: very uncertain.
-About rationale vs overall_explanation:
-- Each fallacy.rationale MUST be QUOTE-LOCAL (2–4 sentences):
-  (1) restate what the quote is asserting,
-  (2) identify the missing/invalid inference step,
-  (3) explain why that matches the selected fallacy label.
-  Mention at least one concrete cue from the quote (e.g., escalation, popularity claim, personal attack, etc.).
-- overall_explanation MUST be GLOBAL and MUST NOT restate rationales sentence-by-sentence.
-  Instead (2–5 sentences):
-  (a) summarize the overall reasoning pattern(s),
-  (b) explain why that pattern is harmful,
-  (c) give plausible consequences (bad decisions, distorted debate, polarization, unjustified fear, scapegoating).
-Anti-template rule:
-- DO NOT use generic filler or stock phrases.
-- You MUST NOT output this sentence (or close variants):
-  "The input contains fallacious reasoning consistent with the predicted type(s)."
-If no fallacy:
-- has_fallacy=false
-- fallacies=[]
-- overall_explanation briefly explains why the reasoning is acceptable.
-INPUT:
-{{text}}
-OUTPUT:"""
-# IMPORTANT: do NOT use .format() on a template containing JSON braces.
-# Use custom tokens and .replace() to avoid KeyError.
-REWRITE_PROMPT = """You are rewriting a small quoted span inside a larger text.
-Goal:
-- You MUST propose a replacement for the QUOTE only.
-- The replacement should remove the fallacious reasoning described, while keeping the same tone/style/tense/entities.
-- The replacement MUST be plausible in the surrounding context and similar length (roughly +/- 40%).
-- Do NOT change anything outside the quote. Do NOT add new facts not implied by the original.
-- Do NOT introduce new fallacies.
-Return ONLY valid JSON with this schema:
-{
-  "replacement_quote": string,
-  "why_this_fix": string
-}
-Hard rules:
-- Output ONLY JSON. No markdown. No extra text.
-- replacement_quote should be standalone text (no surrounding quotes).
-- why_this_fix: 1–3 sentences, specific.
-INPUT_TEXT:
-<<TEXT>>
-QUOTE_TO_REWRITE:
-<<QUOTE>>
-FALLACY_TYPE:
-<<FALLACY_TYPE>>
-WHY_FALLACIOUS:
-<<RATIONALE>>
-OUTPUT:"""
-def build_analyze_messages(text: str) -> List[Dict[str, str]]:
-    return [
-        {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
-        {"role": "user", "content": ANALYZE_PROMPT.replace("{text}", text)},
-    ]
-def build_rewrite_messages(text: str, quote: str, fallacy_type: str, rationale: str) -> List[Dict[str, str]]:
-    prompt = (
-        REWRITE_PROMPT
-        .replace("<<TEXT>>", text)
-        .replace("<<QUOTE>>", quote)
-        .replace("<<FALLACY_TYPE>>", fallacy_type)
-        .replace("<<RATIONALE>>", rationale)
-    )
-    return [
-        {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
-        {"role": "user", "content": prompt},
-    ]
-# ============================
-# Logging
-# ============================
-def _log(rid: str, msg: str):
-    print(f"[{rid}] {msg}", flush=True)
-# ============================
-# Robust JSON extraction
-# ============================
-def stop_at_complete_json(text: str) -> Optional[str]:
-    start = text.find("{")
-    if start == -1:
-        return None
-    depth = 0
-    in_str = False
-    esc = False
-    for i in range(start, len(text)):
-        ch = text[i]
-        if in_str:
-            if esc:
-                esc = False
-            elif ch == "\\":
-                esc = True
-            elif ch == '"':
-                in_str = False
-            continue
-        if ch == '"':
-            in_str = True
-            continue
-        if ch == "{":
-            depth += 1
-        elif ch == "}":
-            depth -= 1
-            if depth == 0:
-                return text[start : i + 1]
-    return None
-def extract_first_json_obj(s: str) -> Optional[Dict[str, Any]]:
-    cut = stop_at_complete_json(s) or s
-    start = cut.find("{")
-    end = cut.rfind("}")
-    if start == -1 or end == -1 or end <= start:
-        return None
-    cand = cut[start : end + 1].strip()
-    try:
-        return json.loads(cand)
-    except Exception:
-        return None
-# ============================
-# Model load
 # ============================
-llm: Optional[Llama] = None
-model_path: Optional[str] = None
-load_error: Optional[str] = None
-loaded_at_ts: Optional[float] = None
-def load_llama() -> None:
-    global llm, model_path, load_error, loaded_at_ts
-    print("=== FADES startup ===", flush=True)
-    print(f"GGUF_REPO_ID={GGUF_REPO_ID}", flush=True)
-    print(f"GGUF_FILENAME={GGUF_FILENAME}", flush=True)
-    print(f"N_CTX={N_CTX} N_THREADS={N_THREADS} N_BATCH={N_BATCH}", flush=True)
-    try:
-        t0 = time.time()
-        mp = hf_hub_download(
-            repo_id=GGUF_REPO_ID,
-            filename=GGUF_FILENAME,
-            token=os.getenv("HF_TOKEN"),
-        )
-        t1 = time.time()
-        print(f"✅ GGUF downloaded: {mp} ({t1 - t0:.1f}s)", flush=True)
-        t2 = time.time()
-        llm_local = Llama(
-            model_path=mp,
-            n_ctx=N_CTX,
-            n_threads=N_THREADS,
-            n_batch=N_BATCH,
-            n_gpu_layers=0,
-            verbose=False,
-        )
-        t3 = time.time()
-        print(f"✅ Model loaded: ({t3 - t2:.1f}s) n_ctx={N_CTX} threads={N_THREADS} batch={N_BATCH}", flush=True)
-        llm = llm_local
-        model_path = mp
-        load_error = None
-        loaded_at_ts = time.time()
-        print("=== Startup OK ===", flush=True)
-    except Exception as e:
-        load_error = repr(e)
-        print(f"❌ Startup FAILED: {load_error}", flush=True)
 @app.on_event("startup")
 def _startup():
-    load_llama()
 @app.get("/")
@@ -335,22 +98,17 @@ def root():
 @app.get("/health")
 def health():
-    return {
-        "ok": llm is not None and load_error is None,
-        "model_loaded": llm is not None,
-        "load_error": load_error,
-        "gguf_repo": GGUF_REPO_ID,
-        "gguf_filename": GGUF_FILENAME,
-        "model_path": model_path,
-        "n_ctx": N_CTX,
-        "n_threads": N_THREADS,
-        "n_batch": N_BATCH,
-        "loaded_at_ts": loaded_at_ts,
-    }
 # ============================
-# Param selection
 # ============================
 def pick_params(req: GenParams) -> Dict[str, Any]:
     if req.light:
@@ -382,219 +140,47 @@ def pick_params(req: GenParams) -> Dict[str, Any]:
     return params
-# ============================
-# Post-processing: remove template sentence
-# ============================
-# This catches the exact sentence + small punctuation variations (case-insensitive).
-# Also works if the model prefixes rationales with it.
-_TEMPLATE_RE = re.compile(
-    r"\bthe input contains fallacious reasoning consistent with the predicted type\(s\)\b\.?",
-    flags=re.IGNORECASE,
-)
-def strip_template_sentence(text: str) -> str:
-    if not isinstance(text, str):
-        return ""
-    out = _TEMPLATE_RE.sub("", text)
-    # Cleanup common leftovers (double spaces, leading punctuation)
-    out = out.replace("..", ".").strip()
-    out = re.sub(r"\s{2,}", " ", out)
-    out = re.sub(r"^\s*[\-–—:;,\.\s]+", "", out).strip()
-    return out
-# ============================
-# Output sanitation / validation
-# ============================
-def _clamp01(x: Any, default: float = 0.5) -> float:
-    try:
-        v = float(x)
-    except Exception:
-        return default
-    return 0.0 if v < 0.0 else (1.0 if v > 1.0 else v)
-def _is_allowed_label(lbl: Any) -> bool:
-    return isinstance(lbl, str) and lbl in ALLOWED_LABELS and lbl != "none"
-def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]:
-    has_fallacy = bool(obj.get("has_fallacy", False))
-    fallacies_in = obj.get("fallacies", [])
-    if not isinstance(fallacies_in, list):
-        fallacies_in = []
-    fallacies_out = []
-    for f in fallacies_in:
-        if not isinstance(f, dict):
-            continue
-        f_type = f.get("type")
-        if not _is_allowed_label(f_type):
-            continue
-        conf = _clamp01(f.get("confidence", 0.5))
-        conf = float(f"{conf:.2f}")
-        ev = f.get("evidence_quotes", [])
-        if not isinstance(ev, list):
-            ev = []
-        ev_clean: List[str] = []
-        for q in ev:
-            if not isinstance(q, str):
-                continue
-            qq = q.strip()
-            if not qq:
-                continue
-            if qq in input_text:
-                if len(qq) <= 240:
-                    ev_clean.append(qq)
-                else:
-                    short = qq[:240]
-                    ev_clean.append(short if short in input_text else qq)
-        rationale = f.get("rationale", "")
-        rationale = strip_template_sentence(rationale.strip())
-        fallacies_out.append(
-            {
-                "type": f_type,
-                "confidence": conf,
-                "evidence_quotes": ev_clean[:3],
-                "rationale": rationale,
-            }
-        )
-    overall = obj.get("overall_explanation", "")
-    overall = strip_template_sentence(overall.strip())
-    if len(fallacies_out) == 0:
-        has_fallacy = False
-    return {
-        "has_fallacy": has_fallacy,
-        "fallacies": fallacies_out,
-        "overall_explanation": overall,
-    }
-# ============================
-# Cached generation (task-aware)
-# ============================
-@lru_cache(maxsize=512)
-def _cached_chat_completion(
-    task: str,
-    payload: str,
-    light: bool,
-    max_new_tokens: int,
-    temperature: float,
-    top_p: float,
-    n_batch: int,
-) -> Dict[str, Any]:
-    if llm is None:
-        return {"ok": False, "error": "model_not_loaded", "detail": load_error}
-    try:
-        llm.n_batch = int(n_batch)  # type: ignore[attr-defined]
-    except Exception:
-        pass
-    try:
-        data = json.loads(payload)
-    except Exception:
-        return {"ok": False, "error": "bad_payload"}
-    if task == "analyze":
-        messages = build_analyze_messages(data["text"])
-    elif task == "rewrite":
-        messages = build_rewrite_messages(
-            data["text"],
-            data["quote"],
-            data["fallacy_type"],
-            data["rationale"],
-        )
-    else:
-        return {"ok": False, "error": "unknown_task"}
-    out = llm.create_chat_completion(
-        messages=messages,
-        max_tokens=int(max_new_tokens),
-        temperature=float(temperature),
-        top_p=float(top_p),
-        stream=False,
-    )
-    raw = out["choices"][0]["message"]["content"]
-    obj = extract_first_json_obj(raw)
-    if obj is None:
-        return {"ok": False, "error": "json_parse_error", "raw": raw}
-    return {"ok": True, "result": obj}
-def _occurrence_index(text: str, sub: str, occurrence: int) -> int:
-    if occurrence < 0:
-        return -1
-    start = 0
-    for _ in range(occurrence + 1):
-        idx = text.find(sub, start)
-        if idx == -1:
-            return -1
-        start = idx + max(1, len(sub))
-    return idx
-def _replace_nth(text: str, old: str, new: str, occurrence: int) -> Dict[str, Any]:
-    idx = _occurrence_index(text, old, occurrence)
-    if idx == -1:
-        return {"ok": False, "error": "quote_not_found"}
-    return {
-        "ok": True,
-        "rewritten_text": text[:idx] + new + text[idx + len(old) :],
-        "start_char": idx,
-        "end_char": idx + len(new),
-        "old_start_char": idx,
-        "old_end_char": idx + len(old),
-    }
 # ============================
 # Routes
 # ============================
 @app.post("/analyze")
 async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
     rid = uuid.uuid4().hex[:10]
     t0 = time.time()
-    _log(rid, f"📩 /analyze received (light={req.light}) chars={len(req.text) if req.text else 0}")
-    if not req.text or not req.text.strip():
-        return {"ok": False, "error": "empty_text"}
-    params = pick_params(req)
     payload = json.dumps({"text": req.text}, ensure_ascii=False)
-    async with GEN_LOCK:
-        t_lock = time.time()
-        t_gen0 = time.time()
-        res = _cached_chat_completion(
-            "analyze",
-            payload,
-            bool(req.light),
-            int(params["max_new_tokens"]),
-            float(params["temperature"]),
-            float(params["top_p"]),
-            int(params["n_batch"]),
-        )
-        t_gen1 = time.time()
     elapsed_total = time.time() - t0
-    elapsed_lock = time.time() - t_lock
     if not res.get("ok"):
         return {
             **res,
             "meta": {
@@ -610,8 +196,10 @@ async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
             },
         }
-    clean = sanitize_analyze_output(res["result"], req.text)
     return {
         "ok": True,
         "result": clean,
@@ -636,22 +224,30 @@ async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
 @app.post("/rewrite")
 async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
     rid = uuid.uuid4().hex[:10]
     t0 = time.time()
-    if not req.text or not req.text.strip():
-        return {"ok": False, "error": "empty_text"}
-    if not req.quote or not req.quote.strip():
-        return {"ok": False, "error": "empty_quote"}
     quote = req.quote.strip()
     occurrence = int(req.occurrence or 0)
-    if _occurrence_index(req.text, quote, occurrence) == -1:
-        return {"ok": False, "error": "quote_not_found", "detail": {"occurrence": occurrence}}
-    params = pick_params(req)
-    if req.light and req.max_new_tokens is None:
-        params["max_new_tokens"] = max(params["max_new_tokens"], 80)
     payload = json.dumps(
         {
@@ -663,26 +259,27 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
         ensure_ascii=False,
     )
-    async with GEN_LOCK:
-        t_lock = time.time()
-        t_gen0 = time.time()
-        res = _cached_chat_completion(
-            "rewrite",
-            payload,
-            bool(req.light),
-            int(params["max_new_tokens"]),
-            float(params["temperature"]),
-            float(params["top_p"]),
-            int(params["n_batch"]),
-        )
-        t_gen1 = time.time()
     elapsed_total = time.time() - t0
-    elapsed_lock = time.time() - t_lock
     if not res.get("ok"):
         return {
             **res,
             "meta": {
@@ -698,25 +295,27 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
             },
         }
-    obj = res["result"]
-    if not isinstance(obj, dict):
-        return {"ok": False, "error": "bad_rewrite_output"}
-    replacement = obj.get("replacement_quote")
-    if not isinstance(replacement, str):
-        return {"ok": False, "error": "missing_replacement_quote", "raw": obj}
-    replacement = replacement.strip()
-    if not replacement:
-        return {"ok": False, "error": "empty_replacement_quote", "raw": obj}
-    why = obj.get("why_this_fix", "")
-    why = strip_template_sentence(why.strip())
-    rep = _replace_nth(req.text, quote, replacement, occurrence)
-    if not rep.get("ok"):
-        return {"ok": False, "error": rep.get("error", "replace_failed")}
     return {
         "ok": True,
         "result": {

 import os
 import json
 import time
 import uuid
 import asyncio
+from typing import Any, Dict, Optional
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
+from logger_utils import StepLogger
+from utils import sanitize_analyze_output, occurrence_index, replace_nth, strip_template_sentence
+from model_runtime import load_llama, get_health, cached_chat_completion
 # ============================
     temperature: Optional[float] = None
     top_p: Optional[float] = None
 class AnalyzeRequest(GenParams):
     text: str
 class RewriteRequest(GenParams):
     text: str
     quote: str = Field(..., description="Verbatim substring that must be replaced.")
 # ============================
+# Startup
 # ============================
 @app.on_event("startup")
 def _startup():
+    load_llama(
+        gguf_repo_id=GGUF_REPO_ID,
+        gguf_filename=GGUF_FILENAME,
+        n_ctx=N_CTX,
+        n_threads=N_THREADS,
+        n_batch=N_BATCH,
+    )
 @app.get("/")
 @app.get("/health")
 def health():
+    return get_health(
+        gguf_repo_id=GGUF_REPO_ID,
+        gguf_filename=GGUF_FILENAME,
+        n_ctx=N_CTX,
+        n_threads=N_THREADS,
+        n_batch=N_BATCH,
+    )
 # ============================
+# Params selection
 # ============================
 def pick_params(req: GenParams) -> Dict[str, Any]:
     if req.light:
     return params
 # ============================
 # Routes
 # ============================
 @app.post("/analyze")
 async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
     rid = uuid.uuid4().hex[:10]
+    L = StepLogger(rid, "/analyze")
     t0 = time.time()
+    L.info(f"received light={req.light} chars={len(req.text) if req.text else 0}")
+    with L.step("validate"):
+        if not req.text or not req.text.strip():
+            return {"ok": False, "error": "empty_text"}
+    with L.step("pick_params"):
+        params = pick_params(req)
     payload = json.dumps({"text": req.text}, ensure_ascii=False)
+    with L.step("generate_under_lock"):
+        async with GEN_LOCK:
+            t_lock = time.time()
+            t_gen0 = time.time()
+            res = cached_chat_completion(
+                "analyze",
+                payload,
+                int(params["max_new_tokens"]),
+                float(params["temperature"]),
+                float(params["top_p"]),
+                int(params["n_batch"]),
+            )
+            t_gen1 = time.time()
+            elapsed_lock = time.time() - t_lock
     elapsed_total = time.time() - t0
     if not res.get("ok"):
+        L.info(f"failed err={res.get('error')}")
         return {
             **res,
             "meta": {
             },
         }
+    with L.step("sanitize"):
+        clean = sanitize_analyze_output(res["result"], req.text)
+    L.info(f"ok fallacies={len(clean.get('fallacies', []))} total={elapsed_total:.2f}s")
     return {
         "ok": True,
         "result": clean,
 @app.post("/rewrite")
 async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
     rid = uuid.uuid4().hex[:10]
+    L = StepLogger(rid, "/rewrite")
     t0 = time.time()
+    L.info(
+        f"received light={req.light} text_chars={len(req.text) if req.text else 0} quote_chars={len(req.quote) if req.quote else 0}"
+    )
+    with L.step("validate"):
+        if not req.text or not req.text.strip():
+            return {"ok": False, "error": "empty_text"}
+        if not req.quote or not req.quote.strip():
+            return {"ok": False, "error": "empty_quote"}
     quote = req.quote.strip()
     occurrence = int(req.occurrence or 0)
+    with L.step("quote_check"):
+        if occurrence_index(req.text, quote, occurrence) == -1:
+            return {"ok": False, "error": "quote_not_found", "detail": {"occurrence": occurrence}}
+    with L.step("pick_params"):
+        params = pick_params(req)
+        if req.light and req.max_new_tokens is None:
+            params["max_new_tokens"] = max(params["max_new_tokens"], 80)
     payload = json.dumps(
         {
         ensure_ascii=False,
     )
+    with L.step("generate_under_lock"):
+        async with GEN_LOCK:
+            t_lock = time.time()
+            t_gen0 = time.time()
+            res = cached_chat_completion(
+                "rewrite",
+                payload,
+                int(params["max_new_tokens"]),
+                float(params["temperature"]),
+                float(params["top_p"]),
+                int(params["n_batch"]),
+            )
+            t_gen1 = time.time()
+            elapsed_lock = time.time() - t_lock
     elapsed_total = time.time() - t0
     if not res.get("ok"):
+        L.info(f"failed err={res.get('error')}")
         return {
             **res,
             "meta": {
             },
         }
+    with L.step("validate_model_output"):
+        obj = res["result"]
+        if not isinstance(obj, dict):
+            return {"ok": False, "error": "bad_rewrite_output"}
+        replacement = obj.get("replacement_quote")
+        if not isinstance(replacement, str):
+            return {"ok": False, "error": "missing_replacement_quote", "raw": obj}
+        replacement = replacement.strip()
+        if not replacement:
+            return {"ok": False, "error": "empty_replacement_quote", "raw": obj}
+        why = obj.get("why_this_fix", "")
+        why = strip_template_sentence(str(why).strip())
+    with L.step("replace"):
+        rep = replace_nth(req.text, quote, replacement, occurrence)
+        if not rep.get("ok"):
+            return {"ok": False, "error": rep.get("error", "replace_failed")}
+    L.info(f"ok total={elapsed_total:.2f}s")
     return {
         "ok": True,
         "result": {

model_runtime.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import os
+import time
+import json
+from functools import lru_cache
+from typing import Any, Dict, Optional
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+from prompts import build_analyze_messages, build_rewrite_messages
+from utils import extract_first_json_obj
+llm: Optional[Llama] = None
+model_path: Optional[str] = None
+load_error: Optional[str] = None
+loaded_at_ts: Optional[float] = None
+def load_llama(
+    gguf_repo_id: str,
+    gguf_filename: str,
+    n_ctx: int,
+    n_threads: int,
+    n_batch: int,
+) -> None:
+    global llm, model_path, load_error, loaded_at_ts
+    print("=== FADES startup ===", flush=True)
+    print(f"GGUF_REPO_ID={gguf_repo_id}", flush=True)
+    print(f"GGUF_FILENAME={gguf_filename}", flush=True)
+    print(f"N_CTX={n_ctx} N_THREADS={n_threads} N_BATCH={n_batch}", flush=True)
+    try:
+        t0 = time.time()
+        mp = hf_hub_download(
+            repo_id=gguf_repo_id,
+            filename=gguf_filename,
+            token=os.getenv("HF_TOKEN"),
+        )
+        t1 = time.time()
+        print(f"✅ GGUF downloaded: {mp} ({t1 - t0:.1f}s)", flush=True)
+        t2 = time.time()
+        llm_local = Llama(
+            model_path=mp,
+            n_ctx=n_ctx,
+            n_threads=n_threads,
+            n_batch=n_batch,
+            n_gpu_layers=0,
+            verbose=False,
+        )
+        t3 = time.time()
+        print(f"✅ Model loaded: ({t3 - t2:.1f}s) n_ctx={n_ctx} threads={n_threads} batch={n_batch}", flush=True)
+        llm = llm_local
+        model_path = mp
+        load_error = None
+        loaded_at_ts = time.time()
+        print("=== Startup OK ===", flush=True)
+    except Exception as e:
+        load_error = repr(e)
+        llm = None
+        print(f"❌ Startup FAILED: {load_error}", flush=True)
+def get_health(gguf_repo_id: str, gguf_filename: str, n_ctx: int, n_threads: int, n_batch: int) -> Dict[str, Any]:
+    return {
+        "ok": llm is not None and load_error is None,
+        "model_loaded": llm is not None,
+        "load_error": load_error,
+        "gguf_repo": gguf_repo_id,
+        "gguf_filename": gguf_filename,
+        "model_path": model_path,
+        "n_ctx": n_ctx,
+        "n_threads": n_threads,
+        "n_batch": n_batch,
+        "loaded_at_ts": loaded_at_ts,
+    }
+@lru_cache(maxsize=512)
+def cached_chat_completion(
+    task: str,
+    payload: str,
+    max_new_tokens: int,
+    temperature: float,
+    top_p: float,
+    n_batch: int,
+) -> Dict[str, Any]:
+    """
+    Cached llama chat completion.
+    NOTE: GEN_LOCK is managed by FastAPI routes (outside).
+    """
+    if llm is None:
+        return {"ok": False, "error": "model_not_loaded", "detail": load_error}
+    try:
+        llm.n_batch = int(n_batch)  # type: ignore[attr-defined]
+    except Exception:
+        pass
+    try:
+        data = json.loads(payload)
+    except Exception:
+        return {"ok": False, "error": "bad_payload"}
+    if task == "analyze":
+        messages = build_analyze_messages(data["text"])
+    elif task == "rewrite":
+        messages = build_rewrite_messages(
+            data["text"],
+            data["quote"],
+            data["fallacy_type"],
+            data["rationale"],
+        )
+    else:
+        return {"ok": False, "error": "unknown_task"}
+    out = llm.create_chat_completion(
+        messages=messages,
+        max_tokens=int(max_new_tokens),
+        temperature=float(temperature),
+        top_p=float(top_p),
+        stream=False,
+    )
+    raw = out["choices"][0]["message"]["content"]
+    obj = extract_first_json_obj(raw)
+    if obj is None:
+        return {"ok": False, "error": "json_parse_error", "raw": raw}
+    return {"ok": True, "result": obj}

prompts.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from typing import Dict, List
+ALLOWED_LABELS = [
+    "none",
+    "faulty generalization",
+    "false causality",
+    "circular reasoning",
+    "ad populum",
+    "ad hominem",
+    "fallacy of logic",
+    "appeal to emotion",
+    "false dilemma",
+    "equivocation",
+    "fallacy of extension",
+    "fallacy of relevance",
+    "fallacy of credibility",
+    "miscellaneous",
+    "intentional",
+]
+LABELS_STR = ", ".join([f'"{x}"' for x in ALLOWED_LABELS])
+# Stronger /analyze prompt: forces specificity and forbids the "template" sentence
+ANALYZE_PROMPT = f"""You are a fallacy detection assistant.
+You MUST choose labels ONLY from this list (exact string):
+{LABELS_STR}
+You MUST return ONLY valid JSON with this schema:
+{{
+  "has_fallacy": boolean,
+  "fallacies": [
+    {{
+      "type": string,
+      "confidence": number,
+      "evidence_quotes": [string],
+      "rationale": string
+    }}
+  ],
+  "overall_explanation": string
+}}
+Hard rules:
+- Output ONLY JSON. No markdown. No extra text.
+- evidence_quotes MUST be verbatim substrings copied from the input text (no paraphrase).
+- Keep each evidence quote short (prefer 1–2 sentences; max 240 chars).
+- confidence MUST be a real probability between 0.0 and 1.0 (use 2 decimals).
+  It MUST NOT be always the same across examples. Calibrate it.
+- The rationale MUST be specific to the evidence (2–4 sentences):
+  Explain (1) what the quote claims, (2) why that matches the fallacy label,
+  (3) what logical step is invalid or missing.
+  DO NOT use generic filler. Do NOT reuse stock phrases.
+- If no fallacy: has_fallacy=false and fallacies=[] and overall_explanation explains briefly why.
+INPUT:
+{{text}}
+OUTPUT:"""
+# /rewrite prompt: returns ONLY a replacement substring for the quote (server does the replacement)
+REWRITE_PROMPT = """You are rewriting a small quoted span inside a larger text.
+Goal:
+- You MUST propose a replacement for the QUOTE only.
+- The replacement should remove the fallacious reasoning described, while keeping the same tone/style/tense/entities.
+- The replacement MUST be plausible in the surrounding context and should be similar length (roughly +/- 40%).
+- Do NOT change anything outside the quote. Do NOT add new facts not implied by the original.
+- Do NOT introduce new fallacies.
+Return ONLY valid JSON with this schema:
+{
+  "replacement_quote": string,
+  "why_this_fix": string
+}
+Hard rules:
+- Output ONLY JSON. No markdown. No extra text.
+- replacement_quote should be standalone text (no surrounding quotes).
+- why_this_fix: 1–3 sentences, specific.
+INPUT_TEXT:
+{text}
+QUOTE_TO_REWRITE:
+{quote}
+FALLACY_TYPE:
+{fallacy_type}
+WHY_FALLACIOUS:
+{rationale}
+OUTPUT:"""
+def build_analyze_messages(text: str) -> List[Dict[str, str]]:
+    return [
+        {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
+        {"role": "user", "content": ANALYZE_PROMPT.replace("{text}", text)},
+    ]
+def build_rewrite_messages(text: str, quote: str, fallacy_type: str, rationale: str) -> List[Dict[str, str]]:
+    prompt = (
+        REWRITE_PROMPT
+        .replace("<<TEXT>>", text)
+        .replace("<<QUOTE>>", quote)
+        .replace("<<FALLACY_TYPE>>", fallacy_type)
+        .replace("<<RATIONALE>>", rationale)
+    )
+    return [
+        {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
+        {"role": "user", "content": prompt},
+    ]

utils.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import json
+import re
+from typing import Any, Dict, Optional, List
+from prompts import ALLOWED_LABELS
+# ----------------------------
+# Robust JSON extraction
+# ----------------------------
+def stop_at_complete_json(text: str) -> Optional[str]:
+    start = text.find("{")
+    if start == -1:
+        return None
+    depth = 0
+    in_str = False
+    esc = False
+    for i in range(start, len(text)):
+        ch = text[i]
+        if in_str:
+            if esc:
+                esc = False
+            elif ch == "\\":
+                esc = True
+            elif ch == '"':
+                in_str = False
+            continue
+        if ch == '"':
+            in_str = True
+            continue
+        if ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0:
+                return text[start : i + 1]
+    return None
+def extract_first_json_obj(s: str) -> Optional[Dict[str, Any]]:
+    cut = stop_at_complete_json(s) or s
+    start = cut.find("{")
+    end = cut.rfind("}")
+    if start == -1 or end == -1 or end <= start:
+        return None
+    cand = cut[start : end + 1].strip()
+    try:
+        return json.loads(cand)
+    except Exception:
+        return None
+# ----------------------------
+# Post-processing: remove template sentence
+# ----------------------------
+_TEMPLATE_RE = re.compile(
+    r"\bthe input contains fallacious reasoning consistent with the predicted type\(s\)\b\.?",
+    flags=re.IGNORECASE,
+)
+def strip_template_sentence(text: str) -> str:
+    if not isinstance(text, str):
+        return ""
+    out = _TEMPLATE_RE.sub("", text)
+    out = out.replace("..", ".").strip()
+    out = re.sub(r"\s{2,}", " ", out)
+    out = re.sub(r"^\s*[\-–—:;,\.\s]+", "", out).strip()
+    return out
+# ----------------------------
+# Output sanitation / validation
+# ----------------------------
+def _clamp01(x: Any, default: float = 0.5) -> float:
+    try:
+        v = float(x)
+    except Exception:
+        return default
+    return 0.0 if v < 0.0 else (1.0 if v > 1.0 else v)
+def _is_allowed_label(lbl: Any) -> bool:
+    return isinstance(lbl, str) and lbl in ALLOWED_LABELS and lbl != "none"
+def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]:
+    has_fallacy = bool(obj.get("has_fallacy", False))
+    fallacies_in = obj.get("fallacies", [])
+    if not isinstance(fallacies_in, list):
+        fallacies_in = []
+    fallacies_out = []
+    for f in fallacies_in:
+        if not isinstance(f, dict):
+            continue
+        f_type = f.get("type")
+        if not _is_allowed_label(f_type):
+            continue
+        conf = _clamp01(f.get("confidence", 0.5))
+        conf = float(f"{conf:.2f}")
+        ev = f.get("evidence_quotes", [])
+        if not isinstance(ev, list):
+            ev = []
+        ev_clean: List[str] = []
+        for q in ev:
+            if not isinstance(q, str):
+                continue
+            qq = q.strip()
+            if not qq:
+                continue
+            if qq in input_text:
+                if len(qq) <= 240:
+                    ev_clean.append(qq)
+                else:
+                    short = qq[:240]
+                    ev_clean.append(short if short in input_text else qq)
+        rationale = strip_template_sentence(str(f.get("rationale", "")).strip())
+        fallacies_out.append(
+            {
+                "type": f_type,
+                "confidence": conf,
+                "evidence_quotes": ev_clean[:3],
+                "rationale": rationale,
+            }
+        )
+    overall = strip_template_sentence(str(obj.get("overall_explanation", "")).strip())
+    if len(fallacies_out) == 0:
+        has_fallacy = False
+    return {
+        "has_fallacy": has_fallacy,
+        "fallacies": fallacies_out,
+        "overall_explanation": overall,
+    }
+# ----------------------------
+# Replace helpers
+# ----------------------------
+def occurrence_index(text: str, sub: str, occurrence: int) -> int:
+    if occurrence < 0:
+        return -1
+    start = 0
+    for _ in range(occurrence + 1):
+        idx = text.find(sub, start)
+        if idx == -1:
+            return -1
+        start = idx + max(1, len(sub))
+    return idx
+def replace_nth(text: str, old: str, new: str, occurrence: int) -> Dict[str, Any]:
+    idx = occurrence_index(text, old, occurrence)
+    if idx == -1:
+        return {"ok": False, "error": "quote_not_found"}
+    return {
+        "ok": True,
+        "rewritten_text": text[:idx] + new + text[idx + len(old) :],
+        "start_char": idx,
+        "end_char": idx + len(new),
+        "old_start_char": idx,
+        "old_end_char": idx + len(old),
+    }