Spaces:

maxime-antoine-dev
/

fades-api

Sleeping

App Files Files Community

maxime-antoine-dev commited on Jan 23

Commit

81e2856

1 Parent(s): afd3da3

rollback

Browse files

Files changed (5) hide show

logger_utils.py +0 -29
main.py +538 -99
model_runtime.py +0 -129
prompts.py +0 -113
utils.py +0 -171

logger_utils.py DELETED Viewed

@@ -1,29 +0,0 @@
-import time
-from contextlib import contextmanager
-def log(rid: str, msg: str) -> None:
-    print(f"[{rid}] {msg}", flush=True)
-class StepLogger:
-    """
-    Lightweight structured step logger for server logs.
-    """
-    def __init__(self, rid: str, route: str):
-        self.rid = rid
-        self.route = route
-    def info(self, message: str) -> None:
-        log(self.rid, f"{self.route} {message}")
-    @contextmanager
-    def step(self, name: str):
-        t0 = time.time()
-        self.info(f"step={name} start")
-        try:
-            yield
-            dt = time.time() - t0
-            self.info(f"step={name} ok ({dt:.3f}s)")
-        except Exception as e:
-            dt = time.time() - t0
-            self.info(f"step={name} fail ({dt:.3f}s) err={repr(e)}")
-            raise

main.py CHANGED Viewed

@@ -1,17 +1,17 @@
 import os
 import json
 import time
 import uuid
 import asyncio
-from typing import Any, Dict, Optional
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
-from logger_utils import StepLogger
-from utils import sanitize_analyze_output, occurrence_index, replace_nth, strip_template_sentence
-from model_runtime import load_llama, get_health, cached_chat_completion
 # ============================
@@ -20,27 +20,33 @@ from model_runtime import load_llama, get_health, cached_chat_completion
 GGUF_REPO_ID = os.getenv("GGUF_REPO_ID", "maxime-antoine-dev/fades-mistral-v02-gguf")
 GGUF_FILENAME = os.getenv("GGUF_FILENAME", "mistral_v02_fades.Q4_K_M.gguf")
 N_CTX = int(os.getenv("N_CTX", "1536"))
 CPU_COUNT = os.cpu_count() or 4
 N_THREADS = int(os.getenv("N_THREADS", str(min(8, max(1, CPU_COUNT - 1)))))
 N_BATCH = int(os.getenv("N_BATCH", "256"))
-MAX_NEW_TOKENS_DEFAULT = int(os.getenv("MAX_NEW_TOKENS", "300"))
 TEMPERATURE_DEFAULT = float(os.getenv("TEMPERATURE", "0.0"))
 TOP_P_DEFAULT = float(os.getenv("TOP_P", "0.95"))
 LIGHT_MAX_NEW_TOKENS = int(os.getenv("LIGHT_MAX_NEW_TOKENS", "60"))
 LIGHT_TEMPERATURE = float(os.getenv("LIGHT_TEMPERATURE", "0.0"))
 LIGHT_TOP_P = float(os.getenv("LIGHT_TOP_P", "0.9"))
 LIGHT_N_BATCH = int(os.getenv("LIGHT_N_BATCH", "64"))
 GEN_LOCK = asyncio.Lock()
 app = FastAPI(title="FADES Fallacy Detector (GGUF / llama.cpp)")
 # ============================
-# CORS
 # ============================
 _CORS_ORIGINS = os.getenv("CORS_ALLOW_ORIGINS", "*").strip()
 if _CORS_ORIGINS == "*" or not _CORS_ORIGINS:
@@ -61,14 +67,18 @@ app.add_middleware(
 # Schemas
 # ============================
 class GenParams(BaseModel):
     light: bool = False
     max_new_tokens: Optional[int] = None
     temperature: Optional[float] = None
     top_p: Optional[float] = None
 class AnalyzeRequest(GenParams):
     text: str
 class RewriteRequest(GenParams):
     text: str
     quote: str = Field(..., description="Verbatim substring that must be replaced.")
@@ -78,17 +88,236 @@ class RewriteRequest(GenParams):
 # ============================
-# Startup
 # ============================
 @app.on_event("startup")
 def _startup():
-    load_llama(
-        gguf_repo_id=GGUF_REPO_ID,
-        gguf_filename=GGUF_FILENAME,
-        n_ctx=N_CTX,
-        n_threads=N_THREADS,
-        n_batch=N_BATCH,
-    )
 @app.get("/")
@@ -98,17 +327,22 @@ def root():
 @app.get("/health")
 def health():
-    return get_health(
-        gguf_repo_id=GGUF_REPO_ID,
-        gguf_filename=GGUF_FILENAME,
-        n_ctx=N_CTX,
-        n_threads=N_THREADS,
-        n_batch=N_BATCH,
-    )
 # ============================
-# Params selection
 # ============================
 def pick_params(req: GenParams) -> Dict[str, Any]:
     if req.light:
@@ -133,12 +367,209 @@ def pick_params(req: GenParams) -> Dict[str, Any]:
     if req.top_p is not None:
         params["top_p"] = float(req.top_p)
     params["max_new_tokens"] = max(1, min(int(params["max_new_tokens"]), 400))
     params["temperature"] = max(0.0, min(float(params["temperature"]), 1.5))
     params["top_p"] = max(0.05, min(float(params["top_p"]), 1.0))
     params["n_batch"] = max(16, min(int(params["n_batch"]), 512))
     return params
 # ============================
 # Routes
@@ -146,41 +577,42 @@ def pick_params(req: GenParams) -> Dict[str, Any]:
 @app.post("/analyze")
 async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
     rid = uuid.uuid4().hex[:10]
-    L = StepLogger(rid, "/analyze")
     t0 = time.time()
-    L.info(f"received light={req.light} chars={len(req.text) if req.text else 0}")
-    with L.step("validate"):
-        if not req.text or not req.text.strip():
-            return {"ok": False, "error": "empty_text"}
-    with L.step("pick_params"):
-        params = pick_params(req)
     payload = json.dumps({"text": req.text}, ensure_ascii=False)
-    with L.step("generate_under_lock"):
-        async with GEN_LOCK:
-            t_lock = time.time()
-            t_gen0 = time.time()
-            res = cached_chat_completion(
-                "analyze",
-                payload,
-                int(params["max_new_tokens"]),
-                float(params["temperature"]),
-                float(params["top_p"]),
-                int(params["n_batch"]),
-            )
-            t_gen1 = time.time()
-            elapsed_lock = time.time() - t_lock
     elapsed_total = time.time() - t0
     if not res.get("ok"):
-        L.info(f"failed err={res.get('error')}")
         return {
             **res,
             "meta": {
@@ -196,10 +628,10 @@ async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
             },
         }
-    with L.step("sanitize"):
-        clean = sanitize_analyze_output(res["result"], req.text)
-    L.info(f"ok fallacies={len(clean.get('fallacies', []))} total={elapsed_total:.2f}s")
     return {
         "ok": True,
         "result": clean,
@@ -224,30 +656,35 @@ async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
 @app.post("/rewrite")
 async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
     rid = uuid.uuid4().hex[:10]
-    L = StepLogger(rid, "/rewrite")
     t0 = time.time()
-    L.info(
-        f"received light={req.light} text_chars={len(req.text) if req.text else 0} quote_chars={len(req.quote) if req.quote else 0}"
     )
-    with L.step("validate"):
-        if not req.text or not req.text.strip():
-            return {"ok": False, "error": "empty_text"}
-        if not req.quote or not req.quote.strip():
-            return {"ok": False, "error": "empty_quote"}
     quote = req.quote.strip()
     occurrence = int(req.occurrence or 0)
-    with L.step("quote_check"):
-        if occurrence_index(req.text, quote, occurrence) == -1:
-            return {"ok": False, "error": "quote_not_found", "detail": {"occurrence": occurrence}}
-    with L.step("pick_params"):
-        params = pick_params(req)
-        if req.light and req.max_new_tokens is None:
-            params["max_new_tokens"] = max(params["max_new_tokens"], 80)
     payload = json.dumps(
         {
@@ -259,27 +696,27 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
         ensure_ascii=False,
     )
-    with L.step("generate_under_lock"):
-        async with GEN_LOCK:
-            t_lock = time.time()
-            t_gen0 = time.time()
-            res = cached_chat_completion(
-                "rewrite",
-                payload,
-                int(params["max_new_tokens"]),
-                float(params["temperature"]),
-                float(params["top_p"]),
-                int(params["n_batch"]),
-            )
-            t_gen1 = time.time()
-            elapsed_lock = time.time() - t_lock
     elapsed_total = time.time() - t0
     if not res.get("ok"):
-        L.info(f"failed err={res.get('error')}")
         return {
             **res,
             "meta": {
@@ -295,27 +732,29 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
             },
         }
-    with L.step("validate_model_output"):
-        obj = res["result"]
-        if not isinstance(obj, dict):
-            return {"ok": False, "error": "bad_rewrite_output"}
-        replacement = obj.get("replacement_quote")
-        if not isinstance(replacement, str):
-            return {"ok": False, "error": "missing_replacement_quote", "raw": obj}
-        replacement = replacement.strip()
-        if not replacement:
-            return {"ok": False, "error": "empty_replacement_quote", "raw": obj}
-        why = obj.get("why_this_fix", "")
-        why = strip_template_sentence(str(why).strip())
-    with L.step("replace"):
-        rep = replace_nth(req.text, quote, replacement, occurrence)
-        if not rep.get("ok"):
-            return {"ok": False, "error": rep.get("error", "replace_failed")}
-    L.info(f"ok total={elapsed_total:.2f}s")
     return {
         "ok": True,
         "result": {

+# main.py
 import os
 import json
 import time
 import uuid
 import asyncio
+from typing import Any, Dict, Optional, List
+from functools import lru_cache
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
 # ============================
 GGUF_REPO_ID = os.getenv("GGUF_REPO_ID", "maxime-antoine-dev/fades-mistral-v02-gguf")
 GGUF_FILENAME = os.getenv("GGUF_FILENAME", "mistral_v02_fades.Q4_K_M.gguf")
+# Model load params (fixed once at startup)
 N_CTX = int(os.getenv("N_CTX", "1536"))
 CPU_COUNT = os.cpu_count() or 4
 N_THREADS = int(os.getenv("N_THREADS", str(min(8, max(1, CPU_COUNT - 1)))))
 N_BATCH = int(os.getenv("N_BATCH", "256"))
+# Default generation params ("normal")
+MAX_NEW_TOKENS_DEFAULT = int(os.getenv("MAX_NEW_TOKENS", "180"))
 TEMPERATURE_DEFAULT = float(os.getenv("TEMPERATURE", "0.0"))
 TOP_P_DEFAULT = float(os.getenv("TOP_P", "0.95"))
+# "Light" generation params
 LIGHT_MAX_NEW_TOKENS = int(os.getenv("LIGHT_MAX_NEW_TOKENS", "60"))
 LIGHT_TEMPERATURE = float(os.getenv("LIGHT_TEMPERATURE", "0.0"))
 LIGHT_TOP_P = float(os.getenv("LIGHT_TOP_P", "0.9"))
+# "Light" runtime knobs
 LIGHT_N_BATCH = int(os.getenv("LIGHT_N_BATCH", "64"))
+# One request at a time on CPU
 GEN_LOCK = asyncio.Lock()
 app = FastAPI(title="FADES Fallacy Detector (GGUF / llama.cpp)")
 # ============================
+# CORS (for browser front-ends)
 # ============================
 _CORS_ORIGINS = os.getenv("CORS_ALLOW_ORIGINS", "*").strip()
 if _CORS_ORIGINS == "*" or not _CORS_ORIGINS:
 # Schemas
 # ============================
 class GenParams(BaseModel):
+    # if True => use "light" parameters
     light: bool = False
+    # optional overrides (applied after picking light/normal defaults)
     max_new_tokens: Optional[int] = None
     temperature: Optional[float] = None
     top_p: Optional[float] = None
 class AnalyzeRequest(GenParams):
     text: str
 class RewriteRequest(GenParams):
     text: str
     quote: str = Field(..., description="Verbatim substring that must be replaced.")
 # ============================
+# Labels & Prompts
+# ============================
+ALLOWED_LABELS = [
+    "none",
+    "faulty generalization",
+    "false causality",
+    "circular reasoning",
+    "ad populum",
+    "ad hominem",
+    "fallacy of logic",
+    "appeal to emotion",
+    "false dilemma",
+    "equivocation",
+    "fallacy of extension",
+    "fallacy of relevance",
+    "fallacy of credibility",
+    "miscellaneous",
+    "intentional",
+]
+LABELS_STR = ", ".join([f'"{x}"' for x in ALLOWED_LABELS])
+# Stronger /analyze prompt: forces specificity and forbids the "template" sentence
+ANALYZE_PROMPT = f"""You are a fallacy detection assistant.
+You MUST choose labels ONLY from this list (exact string):
+{LABELS_STR}
+You MUST return ONLY valid JSON with this schema:
+{{
+  "has_fallacy": boolean,
+  "fallacies": [
+    {{
+      "type": string,
+      "confidence": number,
+      "evidence_quotes": [string],
+      "rationale": string
+    }}
+  ],
+  "overall_explanation": string
+}}
+Hard rules:
+- Output ONLY JSON. No markdown. No extra text.
+- evidence_quotes MUST be verbatim substrings copied from the input text (no paraphrase).
+- Keep each evidence quote short (prefer 1–2 sentences; max 240 chars).
+- confidence MUST be a real probability between 0.0 and 1.0 (use 2 decimals).
+  It MUST NOT be always the same across examples. Calibrate it:
+  * 0.90–1.00: very explicit, unambiguous match, clear cue words.
+  * 0.70–0.89: strong match but some ambiguity or missing premise.
+  * 0.40–0.69: plausible but weak/partial evidence.
+  * 0.10–0.39: very uncertain.
+- The rationale MUST be specific to the evidence (2–4 sentences):
+  Explain (1) what the quote claims, (2) why that matches the fallacy label,
+  (3) what logical step is invalid or missing.
+  DO NOT use generic filler. Do NOT reuse stock phrases.
+  In particular, you MUST NOT output this sentence:
+  "The input contains fallacious reasoning consistent with the predicted type(s)."
+- overall_explanation MUST also be specific (2–5 sentences): summarize the reasoning issues and reference the key cue(s).
+- If no fallacy: has_fallacy=false and fallacies=[] and overall_explanation explains briefly why.
+INPUT:
+{{text}}
+OUTPUT:"""
+# /rewrite prompt: returns ONLY a replacement substring for the quote (server does the replacement)
+REWRITE_PROMPT = """You are rewriting a small quoted span inside a larger text.
+Goal:
+- You MUST propose a replacement for the QUOTE only.
+- The replacement should remove the fallacious reasoning described, while keeping the same tone/style/tense/entities.
+- The replacement MUST be plausible in the surrounding context and should be similar length (roughly +/- 40%).
+- Do NOT change anything outside the quote. Do NOT add new facts not implied by the original.
+- Do NOT introduce new fallacies.
+Return ONLY valid JSON with this schema:
+{
+  "replacement_quote": string,
+  "why_this_fix": string
+}
+Hard rules:
+- Output ONLY JSON. No markdown. No extra text.
+- replacement_quote should be standalone text (no surrounding quotes).
+- why_this_fix: 1–3 sentences, specific.
+INPUT_TEXT:
+{text}
+QUOTE_TO_REWRITE:
+{quote}
+FALLACY_TYPE:
+{fallacy_type}
+WHY_FALLACIOUS:
+{rationale}
+OUTPUT:"""
+def build_analyze_messages(text: str) -> List[Dict[str, str]]:
+    return [
+        {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
+        {"role": "user", "content": ANALYZE_PROMPT.replace("{text}", text)},
+    ]
+def build_rewrite_messages(text: str, quote: str, fallacy_type: str, rationale: str) -> List[Dict[str, str]]:
+    prompt = REWRITE_PROMPT.format(
+        text=text,
+        quote=quote,
+        fallacy_type=fallacy_type,
+        rationale=rationale,
+    )
+    return [
+        {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
+        {"role": "user", "content": prompt},
+    ]
 # ============================
+# Logging
+# ============================
+def _log(rid: str, msg: str):
+    print(f"[{rid}] {msg}", flush=True)
+# ============================
+# Robust JSON extraction
+# ============================
+def stop_at_complete_json(text: str) -> Optional[str]:
+    start = text.find("{")
+    if start == -1:
+        return None
+    depth = 0
+    in_str = False
+    esc = False
+    for i in range(start, len(text)):
+        ch = text[i]
+        if in_str:
+            if esc:
+                esc = False
+            elif ch == "\\":
+                esc = True
+            elif ch == '"':
+                in_str = False
+            continue
+        if ch == '"':
+            in_str = True
+            continue
+        if ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0:
+                return text[start : i + 1]
+    return None
+def extract_first_json_obj(s: str) -> Optional[Dict[str, Any]]:
+    cut = stop_at_complete_json(s) or s
+    start = cut.find("{")
+    end = cut.rfind("}")
+    if start == -1 or end == -1 or end <= start:
+        return None
+    cand = cut[start : end + 1].strip()
+    try:
+        return json.loads(cand)
+    except Exception:
+        return None
+# ============================
+# Model load
+# ============================
+llm: Optional[Llama] = None
+model_path: Optional[str] = None
+load_error: Optional[str] = None
+loaded_at_ts: Optional[float] = None
+def load_llama() -> None:
+    global llm, model_path, load_error, loaded_at_ts
+    print("=== FADES startup ===", flush=True)
+    print(f"GGUF_REPO_ID={GGUF_REPO_ID}", flush=True)
+    print(f"GGUF_FILENAME={GGUF_FILENAME}", flush=True)
+    print(f"N_CTX={N_CTX} N_THREADS={N_THREADS} N_BATCH={N_BATCH}", flush=True)
+    try:
+        t0 = time.time()
+        mp = hf_hub_download(
+            repo_id=GGUF_REPO_ID,
+            filename=GGUF_FILENAME,
+            token=os.getenv("HF_TOKEN"),
+        )
+        t1 = time.time()
+        print(f"✅ GGUF downloaded: {mp} ({t1 - t0:.1f}s)", flush=True)
+        t2 = time.time()
+        llm_local = Llama(
+            model_path=mp,
+            n_ctx=N_CTX,
+            n_threads=N_THREADS,
+            n_batch=N_BATCH,
+            n_gpu_layers=0,
+            verbose=False,
+        )
+        t3 = time.time()
+        print(f"✅ Model loaded: ({t3 - t2:.1f}s) n_ctx={N_CTX} threads={N_THREADS} batch={N_BATCH}", flush=True)
+        llm = llm_local
+        model_path = mp
+        load_error = None
+        loaded_at_ts = time.time()
+        print("=== Startup OK ===", flush=True)
+    except Exception as e:
+        load_error = repr(e)
+        print(f"❌ Startup FAILED: {load_error}", flush=True)
 @app.on_event("startup")
 def _startup():
+    load_llama()
 @app.get("/")
 @app.get("/health")
 def health():
+    return {
+        "ok": llm is not None and load_error is None,
+        "model_loaded": llm is not None,
+        "load_error": load_error,
+        "gguf_repo": GGUF_REPO_ID,
+        "gguf_filename": GGUF_FILENAME,
+        "model_path": model_path,
+        "n_ctx": N_CTX,
+        "n_threads": N_THREADS,
+        "n_batch": N_BATCH,
+        "loaded_at_ts": loaded_at_ts,
+    }
 # ============================
+# Param selection
 # ============================
 def pick_params(req: GenParams) -> Dict[str, Any]:
     if req.light:
     if req.top_p is not None:
         params["top_p"] = float(req.top_p)
+    # Safety caps
     params["max_new_tokens"] = max(1, min(int(params["max_new_tokens"]), 400))
     params["temperature"] = max(0.0, min(float(params["temperature"]), 1.5))
     params["top_p"] = max(0.05, min(float(params["top_p"]), 1.0))
     params["n_batch"] = max(16, min(int(params["n_batch"]), 512))
     return params
+# # ============================
+# # Post-processing: remove template sentence
+# # ============================
+# # This catches the exact sentence + small punctuation variations (case-insensitive).
+# # Also works if the model prefixes rationales with it.
+# _TEMPLATE_RE = re.compile(
+#     r"\bthe input contains fallacious reasoning consistent with the predicted type\(s\)\b\.?",
+#     flags=re.IGNORECASE,
+# )
+# def strip_template_sentence(text: str) -> str:
+#     if not isinstance(text, str):
+#         return ""
+#     out = _TEMPLATE_RE.sub("", text)
+#     # Cleanup common leftovers (double spaces, leading punctuation)
+#     out = out.replace("..", ".").strip()
+#     out = re.sub(r"\s{2,}", " ", out)
+#     out = re.sub(r"^\s*[\-–—:;,\.\s]+", "", out).strip()
+#     return out
+# ============================
+# Output sanitation / validation
+# ============================
+def _clamp01(x: Any, default: float = 0.5) -> float:
+    try:
+        v = float(x)
+    except Exception:
+        return default
+    if v < 0.0:
+        return 0.0
+    if v > 1.0:
+        return 1.0
+    return v
+def _is_allowed_label(lbl: Any) -> bool:
+    return isinstance(lbl, str) and lbl in ALLOWED_LABELS and lbl != "none"
+def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]:
+    """
+    Enforce shape, clamp confidence, drop invalid labels,
+    enforce evidence_quotes being substrings.
+    """
+    has_fallacy = bool(obj.get("has_fallacy", False))
+    fallacies_in = obj.get("fallacies", [])
+    if not isinstance(fallacies_in, list):
+        fallacies_in = []
+    fallacies_out = []
+    for f in fallacies_in:
+        if not isinstance(f, dict):
+            continue
+        f_type = f.get("type")
+        if not _is_allowed_label(f_type):
+            continue
+        conf = _clamp01(f.get("confidence", 0.5))
+        # keep 2 decimals for nicer UI
+        conf = float(f"{conf:.2f}")
+        ev = f.get("evidence_quotes", [])
+        if not isinstance(ev, list):
+            ev = []
+        ev_clean: List[str] = []
+        for q in ev:
+            if not isinstance(q, str):
+                continue
+            qq = q.strip()
+            if not qq:
+                continue
+            # evidence MUST be substring
+            if qq in input_text:
+                # keep short, but don't hard-cut if it breaks substring matching
+                if len(qq) <= 240:
+                    ev_clean.append(qq)
+                else:
+                    # if too long, try to keep first 240 if still substring (rare); else keep as-is
+                    short = qq[:240]
+                    if short in input_text:
+                        ev_clean.append(short)
+                    else:
+                        ev_clean.append(qq)
+        rationale = f.get("rationale")
+        if not isinstance(rationale, str):
+            rationale = ""
+        rationale = rationale.strip()
+        fallacies_out.append(
+            {
+                "type": f_type,
+                "confidence": conf,
+                "evidence_quotes": ev_clean[:3],
+                "rationale": rationale,
+            }
+        )
+    overall = obj.get("overall_explanation")
+    if not isinstance(overall, str):
+        overall = ""
+    overall = overall.strip()
+    # If no fallacies survived sanitation, force no-fallacy state
+    if len(fallacies_out) == 0:
+        has_fallacy = False
+    return {
+        "has_fallacy": has_fallacy,
+        "fallacies": fallacies_out,
+        "overall_explanation": overall,
+    }
+# ============================
+# Cached generation (task-aware)
+# ============================
+@lru_cache(maxsize=512)
+def _cached_chat_completion(
+    task: str,
+    payload: str,
+    light: bool,
+    max_new_tokens: int,
+    temperature: float,
+    top_p: float,
+    n_batch: int,
+) -> Dict[str, Any]:
+    if llm is None:
+        return {"ok": False, "error": "model_not_loaded", "detail": load_error}
+    try:
+        llm.n_batch = int(n_batch)  # type: ignore[attr-defined]
+    except Exception:
+        pass
+    try:
+        data = json.loads(payload)
+    except Exception:
+        return {"ok": False, "error": "bad_payload"}
+    if task == "analyze":
+        messages = build_analyze_messages(data["text"])
+    elif task == "rewrite":
+        messages = build_rewrite_messages(
+            data["text"],
+            data["quote"],
+            data["fallacy_type"],
+            data["rationale"],
+        )
+    else:
+        return {"ok": False, "error": "unknown_task"}
+    out = llm.create_chat_completion(
+        messages=messages,
+        max_tokens=int(max_new_tokens),
+        temperature=float(temperature),
+        top_p=float(top_p),
+        stream=False,
+    )
+    raw = out["choices"][0]["message"]["content"]
+    obj = extract_first_json_obj(raw)
+    if obj is None:
+        return {"ok": False, "error": "json_parse_error", "raw": raw}
+    return {"ok": True, "result": obj}
+def _occurrence_index(text: str, sub: str, occurrence: int) -> int:
+    if occurrence < 0:
+        return -1
+    start = 0
+    for _ in range(occurrence + 1):
+        idx = text.find(sub, start)
+        if idx == -1:
+            return -1
+        start = idx + max(1, len(sub))
+    return idx
+def _replace_nth(text: str, old: str, new: str, occurrence: int) -> Dict[str, Any]:
+    idx = _occurrence_index(text, old, occurrence)
+    if idx == -1:
+        return {"ok": False, "error": "quote_not_found"}
+    return {
+        "ok": True,
+        "rewritten_text": text[:idx] + new + text[idx + len(old) :],
+        "start_char": idx,
+        "end_char": idx + len(new),
+        "old_start_char": idx,
+        "old_end_char": idx + len(old),
+    }
 # ============================
 # Routes
 @app.post("/analyze")
 async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
     rid = uuid.uuid4().hex[:10]
     t0 = time.time()
+    _log(rid, f"📩 /analyze received (light={req.light}) chars={len(req.text) if req.text else 0}")
+    if not req.text or not req.text.strip():
+        return {"ok": False, "error": "empty_text"}
+    params = pick_params(req)
+    _log(
+        rid,
+        f"⚙️ Params: max_new_tokens={params['max_new_tokens']} temp={params['temperature']} top_p={params['top_p']} n_batch={params['n_batch']}",
+    )
     payload = json.dumps({"text": req.text}, ensure_ascii=False)
+    async with GEN_LOCK:
+        t_lock = time.time()
+        _log(rid, "🧠 Generating analyze...")
+        t_gen0 = time.time()
+        res = _cached_chat_completion(
+            "analyze",
+            payload,
+            bool(req.light),
+            int(params["max_new_tokens"]),
+            float(params["temperature"]),
+            float(params["top_p"]),
+            int(params["n_batch"]),
+        )
+        t_gen1 = time.time()
     elapsed_total = time.time() - t0
+    elapsed_lock = time.time() - t_lock
     if not res.get("ok"):
+        _log(rid, f"❌ /analyze failed: {res.get('error')}")
         return {
             **res,
             "meta": {
             },
         }
+    # sanitize output for stability (substrings, labels, confidence clamp)
+    clean = sanitize_analyze_output(res["result"], req.text)
+    _log(rid, f"✅ /analyze ok fallacies={len(clean.get('fallacies', []))} total={elapsed_total:.2f}s")
     return {
         "ok": True,
         "result": clean,
 @app.post("/rewrite")
 async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
     rid = uuid.uuid4().hex[:10]
     t0 = time.time()
+    _log(
+        rid,
+        f"📩 /rewrite received (light={req.light}) text_chars={len(req.text) if req.text else 0} quote_chars={len(req.quote) if req.quote else 0}",
     )
+    if not req.text or not req.text.strip():
+        return {"ok": False, "error": "empty_text"}
+    if not req.quote or not req.quote.strip():
+        return {"ok": False, "error": "empty_quote"}
     quote = req.quote.strip()
     occurrence = int(req.occurrence or 0)
+    # validate quote existence early
+    if _occurrence_index(req.text, quote, occurrence) == -1:
+        return {"ok": False, "error": "quote_not_found", "detail": {"occurrence": occurrence}}
+    params = pick_params(req)
+    # rewrite generally needs a bit more room than light analyze if you want fluent replacements
+    # (still controllable by request overrides)
+    if req.light and req.max_new_tokens is None:
+        params["max_new_tokens"] = max(params["max_new_tokens"], 80)
+    _log(
+        rid,
+        f"⚙️ Params: max_new_tokens={params['max_new_tokens']} temp={params['temperature']} top_p={params['top_p']} n_batch={params['n_batch']}",
+    )
     payload = json.dumps(
         {
         ensure_ascii=False,
     )
+    async with GEN_LOCK:
+        t_lock = time.time()
+        _log(rid, "🧠 Generating rewrite replacement_quote...")
+        t_gen0 = time.time()
+        res = _cached_chat_completion(
+            "rewrite",
+            payload,
+            bool(req.light),
+            int(params["max_new_tokens"]),
+            float(params["temperature"]),
+            float(params["top_p"]),
+            int(params["n_batch"]),
+        )
+        t_gen1 = time.time()
     elapsed_total = time.time() - t0
+    elapsed_lock = time.time() - t_lock
     if not res.get("ok"):
+        _log(rid, f"❌ /rewrite failed: {res.get('error')}")
         return {
             **res,
             "meta": {
             },
         }
+    obj = res["result"]
+    if not isinstance(obj, dict):
+        return {"ok": False, "error": "bad_rewrite_output"}
+    replacement = obj.get("replacement_quote")
+    if not isinstance(replacement, str):
+        return {"ok": False, "error": "missing_replacement_quote", "raw": obj}
+    replacement = replacement.strip()
+    if not replacement:
+        return {"ok": False, "error": "empty_replacement_quote", "raw": obj}
+    why = obj.get("why_this_fix")
+    if not isinstance(why, str):
+        why = ""
+    why = why.strip()
+    # server-side enforced: ONLY the quote is changed
+    rep = _replace_nth(req.text, quote, replacement, occurrence)
+    if not rep.get("ok"):
+        return {"ok": False, "error": rep.get("error", "replace_failed")}
+    _log(rid, f"✅ /rewrite ok total={elapsed_total:.2f}s")
     return {
         "ok": True,
         "result": {

model_runtime.py DELETED Viewed

@@ -1,129 +0,0 @@
-import os
-import time
-import json
-from functools import lru_cache
-from typing import Any, Dict, Optional
-from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
-from prompts import build_analyze_messages, build_rewrite_messages
-from utils import extract_first_json_obj
-llm: Optional[Llama] = None
-model_path: Optional[str] = None
-load_error: Optional[str] = None
-loaded_at_ts: Optional[float] = None
-def load_llama(
-    gguf_repo_id: str,
-    gguf_filename: str,
-    n_ctx: int,
-    n_threads: int,
-    n_batch: int,
-) -> None:
-    global llm, model_path, load_error, loaded_at_ts
-    print("=== FADES startup ===", flush=True)
-    print(f"GGUF_REPO_ID={gguf_repo_id}", flush=True)
-    print(f"GGUF_FILENAME={gguf_filename}", flush=True)
-    print(f"N_CTX={n_ctx} N_THREADS={n_threads} N_BATCH={n_batch}", flush=True)
-    try:
-        t0 = time.time()
-        mp = hf_hub_download(
-            repo_id=gguf_repo_id,
-            filename=gguf_filename,
-            token=os.getenv("HF_TOKEN"),
-        )
-        t1 = time.time()
-        print(f"✅ GGUF downloaded: {mp} ({t1 - t0:.1f}s)", flush=True)
-        t2 = time.time()
-        llm_local = Llama(
-            model_path=mp,
-            n_ctx=n_ctx,
-            n_threads=n_threads,
-            n_batch=n_batch,
-            n_gpu_layers=0,
-            verbose=False,
-        )
-        t3 = time.time()
-        print(f"✅ Model loaded: ({t3 - t2:.1f}s) n_ctx={n_ctx} threads={n_threads} batch={n_batch}", flush=True)
-        llm = llm_local
-        model_path = mp
-        load_error = None
-        loaded_at_ts = time.time()
-        print("=== Startup OK ===", flush=True)
-    except Exception as e:
-        load_error = repr(e)
-        llm = None
-        print(f"❌ Startup FAILED: {load_error}", flush=True)
-def get_health(gguf_repo_id: str, gguf_filename: str, n_ctx: int, n_threads: int, n_batch: int) -> Dict[str, Any]:
-    return {
-        "ok": llm is not None and load_error is None,
-        "model_loaded": llm is not None,
-        "load_error": load_error,
-        "gguf_repo": gguf_repo_id,
-        "gguf_filename": gguf_filename,
-        "model_path": model_path,
-        "n_ctx": n_ctx,
-        "n_threads": n_threads,
-        "n_batch": n_batch,
-        "loaded_at_ts": loaded_at_ts,
-    }
-@lru_cache(maxsize=512)
-def cached_chat_completion(
-    task: str,
-    payload: str,
-    max_new_tokens: int,
-    temperature: float,
-    top_p: float,
-    n_batch: int,
-) -> Dict[str, Any]:
-    """
-    Cached llama chat completion.
-    NOTE: GEN_LOCK is managed by FastAPI routes (outside).
-    """
-    if llm is None:
-        return {"ok": False, "error": "model_not_loaded", "detail": load_error}
-    try:
-        llm.n_batch = int(n_batch)  # type: ignore[attr-defined]
-    except Exception:
-        pass
-    try:
-        data = json.loads(payload)
-    except Exception:
-        return {"ok": False, "error": "bad_payload"}
-    if task == "analyze":
-        messages = build_analyze_messages(data["text"])
-    elif task == "rewrite":
-        messages = build_rewrite_messages(
-            data["text"],
-            data["quote"],
-            data["fallacy_type"],
-            data["rationale"],
-        )
-    else:
-        return {"ok": False, "error": "unknown_task"}
-    out = llm.create_chat_completion(
-        messages=messages,
-        max_tokens=int(max_new_tokens),
-        temperature=float(temperature),
-        top_p=float(top_p),
-        stream=False,
-    )
-    raw = out["choices"][0]["message"]["content"]
-    obj = extract_first_json_obj(raw)
-    if obj is None:
-        return {"ok": False, "error": "json_parse_error", "raw": raw}
-    return {"ok": True, "result": obj}

prompts.py DELETED Viewed

@@ -1,113 +0,0 @@
-from typing import Dict, List
-ALLOWED_LABELS = [
-    "none",
-    "faulty generalization",
-    "false causality",
-    "circular reasoning",
-    "ad populum",
-    "ad hominem",
-    "fallacy of logic",
-    "appeal to emotion",
-    "false dilemma",
-    "equivocation",
-    "fallacy of extension",
-    "fallacy of relevance",
-    "fallacy of credibility",
-    "miscellaneous",
-    "intentional",
-]
-LABELS_STR = ", ".join([f'"{x}"' for x in ALLOWED_LABELS])
-# Stronger /analyze prompt: forces specificity and forbids the "template" sentence
-ANALYZE_PROMPT = f"""You are a fallacy detection assistant.
-You MUST choose labels ONLY from this list (exact string):
-{LABELS_STR}
-You MUST return ONLY valid JSON with this schema:
-{{
-  "has_fallacy": boolean,
-  "fallacies": [
-    {{
-      "type": string,
-      "confidence": number,
-      "evidence_quotes": [string],
-      "rationale": string
-    }}
-  ],
-  "overall_explanation": string
-}}
-Hard rules:
-- Output ONLY JSON. No markdown. No extra text.
-- evidence_quotes MUST be verbatim substrings copied from the input text (no paraphrase).
-- Keep each evidence quote short (prefer 1–2 sentences; max 240 chars).
-- confidence MUST be a real probability between 0.0 and 1.0 (use 2 decimals).
-  It MUST NOT be always the same across examples. Calibrate it.
-- The rationale MUST be specific to the evidence (2–4 sentences):
-  Explain (1) what the quote claims, (2) why that matches the fallacy label,
-  (3) what logical step is invalid or missing.
-  DO NOT use generic filler. Do NOT reuse stock phrases.
-- If no fallacy: has_fallacy=false and fallacies=[] and overall_explanation explains briefly why.
-INPUT:
-{{text}}
-OUTPUT:"""
-# /rewrite prompt: returns ONLY a replacement substring for the quote (server does the replacement)
-REWRITE_PROMPT = """You are rewriting a small quoted span inside a larger text.
-Goal:
-- You MUST propose a replacement for the QUOTE only.
-- The replacement should remove the fallacious reasoning described, while keeping the same tone/style/tense/entities.
-- The replacement MUST be plausible in the surrounding context and should be similar length (roughly +/- 40%).
-- Do NOT change anything outside the quote. Do NOT add new facts not implied by the original.
-- Do NOT introduce new fallacies.
-Return ONLY valid JSON with this schema:
-{
-  "replacement_quote": string,
-  "why_this_fix": string
-}
-Hard rules:
-- Output ONLY JSON. No markdown. No extra text.
-- replacement_quote should be standalone text (no surrounding quotes).
-- why_this_fix: 1–3 sentences, specific.
-INPUT_TEXT:
-{text}
-QUOTE_TO_REWRITE:
-{quote}
-FALLACY_TYPE:
-{fallacy_type}
-WHY_FALLACIOUS:
-{rationale}
-OUTPUT:"""
-def build_analyze_messages(text: str) -> List[Dict[str, str]]:
-    return [
-        {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
-        {"role": "user", "content": ANALYZE_PROMPT.replace("{text}", text)},
-    ]
-def build_rewrite_messages(text: str, quote: str, fallacy_type: str, rationale: str) -> List[Dict[str, str]]:
-    prompt = (
-        REWRITE_PROMPT
-        .replace("<<TEXT>>", text)
-        .replace("<<QUOTE>>", quote)
-        .replace("<<FALLACY_TYPE>>", fallacy_type)
-        .replace("<<RATIONALE>>", rationale)
-    )
-    return [
-        {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
-        {"role": "user", "content": prompt},
-    ]

utils.py DELETED Viewed

@@ -1,171 +0,0 @@
-import json
-import re
-from typing import Any, Dict, Optional, List
-from prompts import ALLOWED_LABELS
-# ----------------------------
-# Robust JSON extraction
-# ----------------------------
-def stop_at_complete_json(text: str) -> Optional[str]:
-    start = text.find("{")
-    if start == -1:
-        return None
-    depth = 0
-    in_str = False
-    esc = False
-    for i in range(start, len(text)):
-        ch = text[i]
-        if in_str:
-            if esc:
-                esc = False
-            elif ch == "\\":
-                esc = True
-            elif ch == '"':
-                in_str = False
-            continue
-        if ch == '"':
-            in_str = True
-            continue
-        if ch == "{":
-            depth += 1
-        elif ch == "}":
-            depth -= 1
-            if depth == 0:
-                return text[start : i + 1]
-    return None
-def extract_first_json_obj(s: str) -> Optional[Dict[str, Any]]:
-    cut = stop_at_complete_json(s) or s
-    start = cut.find("{")
-    end = cut.rfind("}")
-    if start == -1 or end == -1 or end <= start:
-        return None
-    cand = cut[start : end + 1].strip()
-    try:
-        return json.loads(cand)
-    except Exception:
-        return None
-# ----------------------------
-# Post-processing: remove template sentence
-# ----------------------------
-_TEMPLATE_RE = re.compile(
-    r"\bthe input contains fallacious reasoning consistent with the predicted type\(s\)\b\.?",
-    flags=re.IGNORECASE,
-)
-def strip_template_sentence(text: str) -> str:
-    if not isinstance(text, str):
-        return ""
-    out = _TEMPLATE_RE.sub("", text)
-    out = out.replace("..", ".").strip()
-    out = re.sub(r"\s{2,}", " ", out)
-    out = re.sub(r"^\s*[\-–—:;,\.\s]+", "", out).strip()
-    return out
-# ----------------------------
-# Output sanitation / validation
-# ----------------------------
-def _clamp01(x: Any, default: float = 0.5) -> float:
-    try:
-        v = float(x)
-    except Exception:
-        return default
-    return 0.0 if v < 0.0 else (1.0 if v > 1.0 else v)
-def _is_allowed_label(lbl: Any) -> bool:
-    return isinstance(lbl, str) and lbl in ALLOWED_LABELS and lbl != "none"
-def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]:
-    has_fallacy = bool(obj.get("has_fallacy", False))
-    fallacies_in = obj.get("fallacies", [])
-    if not isinstance(fallacies_in, list):
-        fallacies_in = []
-    fallacies_out = []
-    for f in fallacies_in:
-        if not isinstance(f, dict):
-            continue
-        f_type = f.get("type")
-        if not _is_allowed_label(f_type):
-            continue
-        conf = _clamp01(f.get("confidence", 0.5))
-        conf = float(f"{conf:.2f}")
-        ev = f.get("evidence_quotes", [])
-        if not isinstance(ev, list):
-            ev = []
-        ev_clean: List[str] = []
-        for q in ev:
-            if not isinstance(q, str):
-                continue
-            qq = q.strip()
-            if not qq:
-                continue
-            if qq in input_text:
-                if len(qq) <= 240:
-                    ev_clean.append(qq)
-                else:
-                    short = qq[:240]
-                    ev_clean.append(short if short in input_text else qq)
-        rationale = strip_template_sentence(str(f.get("rationale", "")).strip())
-        fallacies_out.append(
-            {
-                "type": f_type,
-                "confidence": conf,
-                "evidence_quotes": ev_clean[:3],
-                "rationale": rationale,
-            }
-        )
-    overall = strip_template_sentence(str(obj.get("overall_explanation", "")).strip())
-    if len(fallacies_out) == 0:
-        has_fallacy = False
-    return {
-        "has_fallacy": has_fallacy,
-        "fallacies": fallacies_out,
-        "overall_explanation": overall,
-    }
-# ----------------------------
-# Replace helpers
-# ----------------------------
-def occurrence_index(text: str, sub: str, occurrence: int) -> int:
-    if occurrence < 0:
-        return -1
-    start = 0
-    for _ in range(occurrence + 1):
-        idx = text.find(sub, start)
-        if idx == -1:
-            return -1
-        start = idx + max(1, len(sub))
-    return idx
-def replace_nth(text: str, old: str, new: str, occurrence: int) -> Dict[str, Any]:
-    idx = occurrence_index(text, old, occurrence)
-    if idx == -1:
-        return {"ok": False, "error": "quote_not_found"}
-    return {
-        "ok": True,
-        "rewritten_text": text[:idx] + new + text[idx + len(old) :],
-        "start_char": idx,
-        "end_char": idx + len(new),
-        "old_start_char": idx,
-        "old_end_char": idx + len(old),
-    }