Spaces:

maxime-antoine-dev
/

fades-api

Sleeping

App Files Files Community

maxime-antoine-dev commited on 8 days ago

Commit

df0ce09

1 Parent(s): 406b25f

added technical confidence

Browse files

Files changed (4) hide show

README.md +47 -0
main.py +246 -855
requirements.txt +1 -0
utils.py +171 -0

README.md CHANGED Viewed

@@ -10,3 +10,50 @@ short_description: API for fades LLM
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## Build
+```
+docker build -t fades-api .
+```
+## Start
+```
+docker run --rm -it -p 7860:7860 -v "${PWD}\data:/data" fades-api
+```
+## Switch to another version of the model
+Quantized model : 4Go, ideal for CPU
+```python
+from huggingface_hub import hf_hub_download
+REPO_ID = "maxime-antoine-dev/maxime-antoine-dev/fades-mistral-v02-gguf
+REPO_TYPE = "model"
+# v1
+p1 = hf_hub_download(repo_id=REPO_ID, filename=HF_FILENAME, revision="v1")
+print("v1 path:", p1)
+# v2
+p2 = hf_hub_download(repo_id=REPO_ID, filename=HF_FILENAME, revision="v2")
+print("v2 path:", p2)
+```
+Full Model 13Go : ideal for GPU
+```python
+from huggingface_hub import hf_hub_download
+REPO_ID = "maxime-antoine-dev/maxime-antoine-dev/fades
+REPO_TYPE = "model"
+# v1
+p1 = hf_hub_download(repo_id=REPO_ID, filename=HF_FILENAME, revision="v1")
+print("v1 path:", p1)
+# v2
+p2 = hf_hub_download(repo_id=REPO_ID, filename=HF_FILENAME, revision="v2")
+print("v2 path:", p2)
+```

main.py CHANGED Viewed

@@ -1,56 +1,34 @@
-# main.py
 import os
 import json
 import time
-import uuid
 import asyncio
 import re
-from typing import Any, Dict, Optional, List, Tuple
-from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel, Field
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-# ============================
-# Config (model)
-# ============================
-GGUF_REPO_ID = os.getenv("GGUF_REPO_ID", "maxime-antoine-dev/fades-mistral-v02-gguf")
-GGUF_FILENAME = os.getenv("GGUF_FILENAME", "mistral_v02_fades.Q4_K_M.gguf")
-# Model load params (fixed once at startup)
-N_CTX = int(os.getenv("N_CTX", "1536"))
-CPU_COUNT = os.cpu_count() or 4
-N_THREADS = int(os.getenv("N_THREADS", str(min(8, max(1, CPU_COUNT - 1)))))
-N_BATCH = int(os.getenv("N_BATCH", "256"))
-# Default generation params ("normal")
-MAX_NEW_TOKENS_DEFAULT = int(os.getenv("MAX_NEW_TOKENS", "180"))
-TEMPERATURE_DEFAULT = float(os.getenv("TEMPERATURE", "0.0"))
-TOP_P_DEFAULT = float(os.getenv("TOP_P", "0.95"))
-# "Light" generation params
-LIGHT_MAX_NEW_TOKENS = int(os.getenv("LIGHT_MAX_NEW_TOKENS", "60"))
-LIGHT_TEMPERATURE = float(os.getenv("LIGHT_TEMPERATURE", "0.0"))
-LIGHT_TOP_P = float(os.getenv("LIGHT_TOP_P", "0.9"))
-# "Light" runtime knobs
-LIGHT_N_BATCH = int(os.getenv("LIGHT_N_BATCH", "64"))
-# Anti-loop defaults
-REPEAT_PENALTY_DEFAULT = float(os.getenv("REPEAT_PENALTY", "1.15"))
-# Cache only SUCCESSFUL generations (TTL)
-CACHE_TTL_S = int(os.getenv("CACHE_TTL_S", "300"))  # 5 minutes
-CACHE_MAX_ITEMS = int(os.getenv("CACHE_MAX_ITEMS", "512"))
-# One request at a time on CPU
 GEN_LOCK = asyncio.Lock()
-app = FastAPI(title="FADES Fallacy Detector (GGUF / llama.cpp)")
 # ============================
 # CORS (for browser front-ends)
@@ -69,61 +47,63 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# ============================
-# Schemas
-# ============================
-class GenParams(BaseModel):
-    light: bool = False
-    max_new_tokens: Optional[int] = None
-    temperature: Optional[float] = None
-    top_p: Optional[float] = None
-    repeat_penalty: Optional[float] = None
-class AnalyzeRequest(GenParams):
-    text: str
-class RewriteRequest(GenParams):
-    text: str
-    quote: str = Field(..., description="Verbatim substring that must be replaced.")
-    fallacy_type: str = Field(..., description="Fallacy type of the quote.")
-    rationale: str = Field(..., description="Why the quote is fallacious.")
-    occurrence: int = Field(0, description="Which occurrence of quote to replace (0-based).")
-# ============================
-# Labels & Prompts
-# ============================
 ALLOWED_LABELS = [
-    "none",
-    "faulty generalization",
-    "false causality",
-    "circular reasoning",
-    "ad populum",
-    "ad hominem",
-    "fallacy of logic",
-    "appeal to emotion",
-    "false dilemma",
-    "equivocation",
-    "fallacy of extension",
-    "fallacy of relevance",
-    "fallacy of credibility",
-    "miscellaneous",
-    "intentional",
 ]
-LABELS_STR = ", ".join([f'"{x}"' for x in ALLOWED_LABELS])
-END_SENTINEL = "<END_JSON>"
-STOP_SEQS = [END_SENTINEL]
-ANALYZE_PROMPT = f"""You are a fallacy detection assistant.
-You MUST choose labels ONLY from this list (exact string):
-{LABELS_STR}
-You MUST return ONLY valid JSON with this schema:
 {{
   "has_fallacy": boolean,
   "fallacies": [
@@ -136,818 +116,229 @@ You MUST return ONLY valid JSON with this schema:
   ],
   "overall_explanation": string
 }}
-Hard rules:
-- Output ONLY JSON. No markdown. No extra text.
-- evidence_quotes MUST be verbatim substrings copied from the input text (no paraphrase).
-- Keep each evidence quote short (prefer 1–2 sentences; max 240 chars).
-- confidence MUST be a real probability between 0.0 and 1.0 (use 2 decimals). It MUST NOT be always the same.
-- The rationale MUST be specific (2–4 sentences). DO NOT use generic filler.
-- You MUST NOT output this sentence anywhere:
-  "The input contains fallacious reasoning consistent with the predicted type(s)."
-- overall_explanation MUST be specific (2–5 sentences).
-IMPORTANT TERMINATION:
-- After the JSON object, output the token {END_SENTINEL} and stop.
-INPUT:
-{{text}}
-OUTPUT (JSON then {END_SENTINEL}):"""
-REWRITE_PROMPT = f"""You are rewriting a small quoted span inside a larger text.
-Goal:
-- You MUST propose a replacement for the QUOTE only.
-- The replacement should remove the fallacious reasoning described, while keeping the same tone/style/tense/entities.
-- The replacement MUST be plausible in the surrounding context and should be similar length (roughly +/- 40%).
-- Do NOT change anything outside the quote. Do NOT add new facts not implied by the original.
-- Do NOT introduce new fallacies.
-Return ONLY valid JSON with this schema:
 {{
-  "replacement_quote": string,
   "why_this_fix": string
 }}
-Hard rules:
-- Output ONLY JSON. No markdown. No extra text.
-- replacement_quote should be standalone text (no surrounding quotes).
-- why_this_fix: 1–3 sentences, specific.
-IMPORTANT TERMINATION:
-- After the JSON object, output the token {END_SENTINEL} and stop.
-INPUT_TEXT:
-{{text}}
-QUOTE_TO_REWRITE:
-{{quote}}
-FALLACY_TYPE:
-{{fallacy_type}}
-WHY_FALLACIOUS:
-{{rationale}}
-OUTPUT (JSON then {END_SENTINEL}):"""
-def build_analyze_messages(text: str) -> List[Dict[str, str]]:
-    return [
-        {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
-        {"role": "user", "content": ANALYZE_PROMPT.replace("{text}", text)},
-    ]
-def build_rewrite_messages(text: str, quote: str, fallacy_type: str, rationale: str) -> List[Dict[str, str]]:
-    prompt = (
-        REWRITE_PROMPT
-        .replace("{text}", text)
-        .replace("{quote}", quote)
-        .replace("{fallacy_type}", fallacy_type)
-        .replace("{rationale}", rationale)
-    )
-    return [
-        {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
-        {"role": "user", "content": prompt},
-    ]
-# ============================
-# Logging
-# ============================
-def _log(rid: str, msg: str):
-    print(f"[{rid}] {msg}", flush=True)
-# ============================
-# Robust JSON extraction + repair
-# ============================
-def _strip_sentinel(s: str) -> str:
-    if not isinstance(s, str):
-        return ""
-    idx = s.find(END_SENTINEL)
-    if idx != -1:
-        return s[:idx]
-    return s
-def stop_at_complete_json(text: str) -> Optional[str]:
     start = text.find("{")
-    if start == -1:
-        return None
     depth = 0
-    in_str = False
-    esc = False
-    for i in range(start, len(text)):
-        ch = text[i]
-        if in_str:
-            if esc:
-                esc = False
-            elif ch == "\\":
-                esc = True
-            elif ch == '"':
-                in_str = False
-            continue
-        if ch == '"':
-            in_str = True
-            continue
-        if ch == "{":
             depth += 1
-        elif ch == "}":
             depth -= 1
             if depth == 0:
-                return text[start : i + 1]
-    return None
-def extract_first_json_obj(s: str) -> Optional[Dict[str, Any]]:
-    s = _strip_sentinel(s)
-    cut = stop_at_complete_json(s) or s
-    start = cut.find("{")
-    end = cut.rfind("}")
-    if start == -1 or end == -1 or end <= start:
-        return None
-    cand = cut[start : end + 1].strip()
-    try:
-        return json.loads(cand)
-    except Exception:
-        return None
-def _count_unescaped_quotes(s: str) -> int:
-    in_str = False
-    esc = False
-    count = 0
-    for ch in s:
-        if esc:
-            esc = False
-            continue
-        if ch == "\\":
-            esc = True
-            continue
-        if ch == '"':
-            count += 1
-            in_str = not in_str
-    return count
-def _balance_braces_outside_strings(s: str) -> Tuple[int, int]:
-    opens = 0
-    closes = 0
-    in_str = False
-    esc = False
-    for ch in s:
-        if in_str:
-            if esc:
-                esc = False
-            elif ch == "\\":
-                esc = True
-            elif ch == '"':
-                in_str = False
-            continue
-        else:
-            if ch == '"':
-                in_str = True
-                continue
-            if ch == "{":
-                opens += 1
-            elif ch == "}":
-                closes += 1
-    return opens, closes
-def try_repair_and_parse_json(raw: str) -> Optional[Dict[str, Any]]:
     """
-    Best-effort repair when model got stuck/repetitive and didn't close JSON.
-    Strategy:
-      - take from first '{'
-      - if quotes count odd => append '"'
-      - balance braces outside strings by appending missing '}'
-      - try json.loads
     """
-    if not isinstance(raw, str):
-        return None
-    s = _strip_sentinel(raw)
-    start = s.find("{")
-    if start == -1:
-        return None
-    cand = s[start:].strip()
-    # If it contains huge repetition, hard-trim after some chars to avoid pathological payloads.
-    # (Keeps server responsive.)
-    MAX_CAND = 50_000
-    if len(cand) > MAX_CAND:
-        cand = cand[:MAX_CAND]
-    # Close open string if needed
-    if _count_unescaped_quotes(cand) % 2 == 1:
-        cand += '"'
-    opens, closes = _balance_braces_outside_strings(cand)
-    if closes > opens:
-        # can't safely repair this
-        return None
-    if opens > closes:
-        cand += "}" * (opens - closes)
-    cand = cand.strip()
-    try:
-        return json.loads(cand)
-    except Exception:
-        return None
-# ============================
-# Model load
-# ============================
-llm: Optional[Llama] = None
-model_path: Optional[str] = None
-load_error: Optional[str] = None
-loaded_at_ts: Optional[float] = None
-def load_llama() -> None:
-    global llm, model_path, load_error, loaded_at_ts
-    print("=== FADES startup ===", flush=True)
-    print(f"GGUF_REPO_ID={GGUF_REPO_ID}", flush=True)
-    print(f"GGUF_FILENAME={GGUF_FILENAME}", flush=True)
-    print(f"N_CTX={N_CTX} N_THREADS={N_THREADS} N_BATCH={N_BATCH}", flush=True)
     try:
-        t0 = time.time()
-        mp = hf_hub_download(
-            repo_id=GGUF_REPO_ID,
-            filename=GGUF_FILENAME,
-            token=os.getenv("HF_TOKEN"),
-        )
-        t1 = time.time()
-        print(f"✅ GGUF downloaded: {mp} ({t1 - t0:.1f}s)", flush=True)
-        t2 = time.time()
-        llm_local = Llama(
-            model_path=mp,
-            n_ctx=N_CTX,
-            n_threads=N_THREADS,
-            n_batch=N_BATCH,
-            n_gpu_layers=0,
-            verbose=False,
         )
-        t3 = time.time()
-        print(f"✅ Model loaded: ({t3 - t2:.1f}s) n_ctx={N_CTX} threads={N_THREADS} batch={N_BATCH}", flush=True)
-        llm = llm_local
-        model_path = mp
-        load_error = None
-        loaded_at_ts = time.time()
-        print("=== Startup OK ===", flush=True)
     except Exception as e:
-        load_error = repr(e)
-        print(f"❌ Startup FAILED: {load_error}", flush=True)
-@app.on_event("startup")
-def _startup():
-    load_llama()
-@app.get("/")
-def root():
-    return {"ok": True, "hint": "Use GET /health, POST /analyze, POST /rewrite"}
 @app.get("/health")
 def health():
-    return {
-        "ok": llm is not None and load_error is None,
-        "model_loaded": llm is not None,
-        "load_error": load_error,
-        "gguf_repo": GGUF_REPO_ID,
-        "gguf_filename": GGUF_FILENAME,
-        "model_path": model_path,
-        "n_ctx": N_CTX,
-        "n_threads": N_THREADS,
-        "n_batch": N_BATCH,
-        "loaded_at_ts": loaded_at_ts,
-    }
-# ============================
-# Param selection
-# ============================
-def pick_params(req: GenParams) -> Dict[str, Any]:
-    if req.light:
-        params = {
-            "max_new_tokens": LIGHT_MAX_NEW_TOKENS,
-            "temperature": LIGHT_TEMPERATURE,
-            "top_p": LIGHT_TOP_P,
-            "n_batch": LIGHT_N_BATCH,
-            "repeat_penalty": REPEAT_PENALTY_DEFAULT,
-        }
-    else:
-        params = {
-            "max_new_tokens": MAX_NEW_TOKENS_DEFAULT,
-            "temperature": TEMPERATURE_DEFAULT,
-            "top_p": TOP_P_DEFAULT,
-            "n_batch": N_BATCH,
-            "repeat_penalty": REPEAT_PENALTY_DEFAULT,
-        }
-    if req.max_new_tokens is not None:
-        params["max_new_tokens"] = int(req.max_new_tokens)
-    if req.temperature is not None:
-        params["temperature"] = float(req.temperature)
-    if req.top_p is not None:
-        params["top_p"] = float(req.top_p)
-    if req.repeat_penalty is not None:
-        params["repeat_penalty"] = float(req.repeat_penalty)
-    # Safety caps
-    params["max_new_tokens"] = max(1, min(int(params["max_new_tokens"]), 400))
-    params["temperature"] = max(0.0, min(float(params["temperature"]), 1.5))
-    params["top_p"] = max(0.05, min(float(params["top_p"]), 1.0))
-    params["n_batch"] = max(16, min(int(params["n_batch"]), 512))
-    params["repeat_penalty"] = max(1.0, min(float(params["repeat_penalty"]), 1.5))
-    return params
-# ============================
-# Post-processing helpers
-# ============================
-_TEMPLATE_SENTENCE = "The input contains fallacious reasoning consistent with the predicted type(s)."
-_TEMPLATE_RE = re.compile(
-    r"(?is)\bThe input contains fallacious reasoning consistent with the predicted type\(s\)\.\s*"
-)
-def strip_template_sentence(text: Any) -> str:
-    if not isinstance(text, str):
-        return ""
-    out = _TEMPLATE_RE.sub("", text)
-    out = out.replace(_TEMPLATE_SENTENCE, "")
-    out = re.sub(r"\s{2,}", " ", out).strip()
-    out = re.sub(r"^[\s\-–—:;,\.\u2022]+", "", out).strip()
-    out = out.replace("..", ".").replace(" ,", ",").strip()
-    return out
-# ============================
-# Output sanitation / validation
-# ============================
-def _clamp01(x: Any, default: float = 0.5) -> float:
-    try:
-        v = float(x)
-    except Exception:
-        return default
-    if v < 0.0:
-        return 0.0
-    if v > 1.0:
-        return 1.0
-    return v
-def _is_allowed_label(lbl: Any) -> bool:
-    return isinstance(lbl, str) and lbl in ALLOWED_LABELS and lbl != "none"
-def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]:
-    has_fallacy = bool(obj.get("has_fallacy", False))
-    fallacies_in = obj.get("fallacies", [])
-    if not isinstance(fallacies_in, list):
-        fallacies_in = []
-    fallacies_out = []
-    for f in fallacies_in:
-        if not isinstance(f, dict):
-            continue
-        f_type = f.get("type")
-        if not _is_allowed_label(f_type):
-            continue
-        conf = _clamp01(f.get("confidence", 0.5))
-        conf = float(f"{conf:.2f}")
-        ev = f.get("evidence_quotes", [])
-        if not isinstance(ev, list):
-            ev = []
-        ev_clean: List[str] = []
-        for q in ev:
-            if not isinstance(q, str):
-                continue
-            qq = q.strip()
-            if not qq:
-                continue
-            if qq in input_text:
-                ev_clean.append(qq if len(qq) <= 240 else qq[:240])
-        rationale = strip_template_sentence(f.get("rationale", ""))
-        fallacies_out.append(
-            {
-                "type": f_type,
-                "confidence": conf,
-                "evidence_quotes": ev_clean[:3],
-                "rationale": rationale,
-            }
-        )
-    overall = strip_template_sentence(obj.get("overall_explanation", ""))
-    if len(fallacies_out) == 0:
-        has_fallacy = False
-    return {
-        "has_fallacy": has_fallacy,
-        "fallacies": fallacies_out,
-        "overall_explanation": overall,
-    }
-def generate_overall_explanation(clean: Dict[str, Any]) -> str:
-    has_fallacy = bool(clean.get("has_fallacy"))
-    fallacies = clean.get("fallacies") or []
-    if not has_fallacy or not fallacies:
-        return (
-            "No clear fallacious reasoning was detected in the text. "
-            "The argument appears broadly consistent as written, though it may still rely on unstated assumptions."
         )
-    # unique types
-    types: List[str] = []
-    for f in fallacies:
-        if isinstance(f, dict):
-            t = f.get("type")
-            if isinstance(t, str) and t not in types:
-                types.append(t)
-    # example
-    example = ""
-    for f in fallacies:
-        if isinstance(f, dict):
-            ev = f.get("evidence_quotes")
-            if isinstance(ev, list) and ev and isinstance(ev[0], str) and ev[0].strip():
-                example = ev[0].strip()
-                break
-    if example and len(example) > 160:
-        example = example[:160].rstrip() + "…"
-    risk_map = {
-        "faulty generalization": "It can cause you to over-apply a conclusion from too little evidence.",
-        "false causality": "It can lead to incorrect cause-and-effect beliefs and bad decisions based on them.",
-        "circular reasoning": "It can make a claim look proven while it is actually assumed from the start.",
-        "ad populum": "It can make popularity feel like proof, which can spread or entrench misinformation.",
-        "ad hominem": "It can shift focus from evidence to personal attacks, increasing polarization.",
-        "fallacy of logic": "It can make the argument sound coherent while a key logical step does not follow.",
-        "appeal to emotion": "It can push decisions through fear/anger rather than evidence.",
-        "false dilemma": "It can hide reasonable alternatives by framing the situation as only two options.",
-        "equivocation": "It can create confusion by changing the meaning of key terms mid-argument.",
-        "fallacy of extension": "It can exaggerate consequences by leaping from a modest premise to an extreme outcome.",
-        "fallacy of relevance": "It can distract from the real issue with points that do not support the conclusion.",
-        "fallacy of credibility": "It can replace evidence with perceived authority or social credibility.",
-        "miscellaneous": "It can still mislead by making the conclusion feel stronger than the support provided.",
-        "intentional": "It can be persuasive while bypassing careful reasoning, increasing the chance of manipulation.",
-    }
-    risks: List[str] = []
-    for t in types:
-        rs = risk_map.get(t)
-        if rs and rs not in risks:
-            risks.append(rs)
-        if len(risks) >= 2:
-            break
-    types_str = ", ".join(types) if len(types) <= 3 else ", ".join(types[:3]) + "…"
-    out = (
-        f"The text contains fallacious reasoning ({types_str}) that can make the conclusion seem stronger than the evidence supports."
-    )
-    if example:
-        out += f' For example: "{example}".'
-    out += " Risk: " + (" ".join(risks) if risks else "it may mislead readers by presenting weak support as if it were decisive.")
-    return out.strip()
-# ============================
-# Success-only cache
-# ============================
-_SUCCESS_CACHE: Dict[Tuple[Any, ...], Tuple[float, Dict[str, Any]]] = {}
-def _cache_get(key: Tuple[Any, ...]) -> Optional[Dict[str, Any]]:
-    item = _SUCCESS_CACHE.get(key)
-    if not item:
-        return None
-    ts, val = item
-    if (time.time() - ts) > CACHE_TTL_S:
-        _SUCCESS_CACHE.pop(key, None)
-        return None
-    return val
-def _cache_put(key: Tuple[Any, ...], val: Dict[str, Any]) -> None:
-    # naive eviction if too big
-    if len(_SUCCESS_CACHE) >= CACHE_MAX_ITEMS:
-        # drop oldest
-        oldest_key = min(_SUCCESS_CACHE.items(), key=lambda kv: kv[1][0])[0]
-        _SUCCESS_CACHE.pop(oldest_key, None)
-    _SUCCESS_CACHE[key] = (time.time(), val)
-# ============================
-# Completion (task-aware)
-# ============================
-def _chat_completion(
-    task: str,
-    payload: str,
-    light: bool,
-    max_new_tokens: int,
-    temperature: float,
-    top_p: float,
-    n_batch: int,
-    repeat_penalty: float,
-) -> Dict[str, Any]:
-    if llm is None:
-        return {"ok": False, "error": "model_not_loaded", "detail": load_error}
-    key = (task, payload, light, max_new_tokens, temperature, top_p, n_batch, repeat_penalty)
-    cached = _cache_get(key)
-    if cached is not None:
-        return {"ok": True, "result": cached, "cached": True}
-    try:
-        llm.n_batch = int(n_batch)  # type: ignore[attr-defined]
-    except Exception:
-        pass
     try:
-        data = json.loads(payload)
-    except Exception:
-        return {"ok": False, "error": "bad_payload"}
-    if task == "analyze":
-        messages = build_analyze_messages(data["text"])
-    elif task == "rewrite":
-        messages = build_rewrite_messages(
-            data["text"],
-            data["quote"],
-            data["fallacy_type"],
-            data["rationale"],
-        )
-    else:
-        return {"ok": False, "error": "unknown_task"}
-    t0 = time.time()
-    out = llm.create_chat_completion(
-        messages=messages,
-        max_tokens=int(max_new_tokens),
-        temperature=float(temperature),
-        top_p=float(top_p),
-        repeat_penalty=float(repeat_penalty),
-        stop=STOP_SEQS,
-        stream=False,
-    )
-    t1 = time.time()
-    raw = out["choices"][0]["message"]["content"]
-    raw = _strip_sentinel(raw)
-    obj = extract_first_json_obj(raw)
-    if obj is None:
-        # attempt repair (close quote/braces) to avoid unusable responses
-        obj = try_repair_and_parse_json(raw)
-    if obj is None:
-        return {"ok": False, "error": "json_parse_error", "raw": raw, "gen_s": round(t1 - t0, 3)}
-    # success only: store in cache
-    _cache_put(key, obj)
-    return {"ok": True, "result": obj, "gen_s": round(t1 - t0, 3)}
-def _occurrence_index(text: str, sub: str, occurrence: int) -> int:
-    if occurrence < 0:
-        return -1
-    start = 0
-    for _ in range(occurrence + 1):
-        idx = text.find(sub, start)
-        if idx == -1:
-            return -1
-        start = idx + max(1, len(sub))
-    return idx
-def _replace_nth(text: str, old: str, new: str, occurrence: int) -> Dict[str, Any]:
-    idx = _occurrence_index(text, old, occurrence)
-    if idx == -1:
-        return {"ok": False, "error": "quote_not_found"}
-    return {
-        "ok": True,
-        "rewritten_text": text[:idx] + new + text[idx + len(old) :],
-        "start_char": idx,
-        "end_char": idx + len(new),
-        "old_start_char": idx,
-        "old_end_char": idx + len(old),
-    }
-# ============================
-# Routes
-# ============================
-@app.post("/analyze")
-async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
-    rid = uuid.uuid4().hex[:10]
-    t0 = time.time()
-    _log(rid, f"📩 /analyze received (light={req.light}) chars={len(req.text) if req.text else 0}")
-    if not req.text or not req.text.strip():
-        return {"ok": False, "error": "empty_text"}
-    params = pick_params(req)
-    _log(
-        rid,
-        f"⚙️ Params: max_new_tokens={params['max_new_tokens']} temp={params['temperature']} top_p={params['top_p']} n_batch={params['n_batch']} repeat_penalty={params['repeat_penalty']}",
-    )
-    payload = json.dumps({"text": req.text}, ensure_ascii=False)
-    async with GEN_LOCK:
-        _log(rid, "🧠 Generating analyze...")
-        res = _chat_completion(
-            "analyze",
-            payload,
-            bool(req.light),
-            int(params["max_new_tokens"]),
-            float(params["temperature"]),
-            float(params["top_p"]),
-            int(params["n_batch"]),
-            float(params["repeat_penalty"]),
-        )
-    elapsed_total = time.time() - t0
-    if not res.get("ok"):
-        _log(rid, f"❌ /analyze failed: {res.get('error')}")
-        return {
-            **res,
-            "meta": {
-                "request_id": rid,
-                "light": bool(req.light),
-                "params": {
-                    "max_new_tokens": int(params["max_new_tokens"]),
-                    "temperature": float(params["temperature"]),
-                    "top_p": float(params["top_p"]),
-                    "n_batch": int(params["n_batch"]),
-                    "repeat_penalty": float(params["repeat_penalty"]),
-                },
-                "timings_s": {"total": round(elapsed_total, 3), "gen": res.get("gen_s", None)},
-            },
-        }
-    clean = sanitize_analyze_output(res["result"], req.text)
-    # ensure overall explanation is always a useful summary + risk
-    # clean["overall_explanation"] = generate_overall_explanation(clean)
-    _log(rid, f"✅ /analyze ok fallacies={len(clean.get('fallacies', []))} total={elapsed_total:.2f}s")
     return {
-        "ok": True,
-        "result": clean,
         "meta": {
-            "request_id": rid,
-            "light": bool(req.light),
-            "params": {
-                "max_new_tokens": int(params["max_new_tokens"]),
-                "temperature": float(params["temperature"]),
-                "top_p": float(params["top_p"]),
-                "n_batch": int(params["n_batch"]),
-                "repeat_penalty": float(params["repeat_penalty"]),
-            },
-            "timings_s": {"total": round(elapsed_total, 3), "gen": res.get("gen_s", None)},
-        },
     }
 @app.post("/rewrite")
-async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
-    rid = uuid.uuid4().hex[:10]
-    t0 = time.time()
-    _log(
-        rid,
-        f"📩 /rewrite received (light={req.light}) text_chars={len(req.text) if req.text else 0} quote_chars={len(req.quote) if req.quote else 0}",
-    )
-    if not req.text or not req.text.strip():
-        return {"ok": False, "error": "empty_text"}
-    if not req.quote or not req.quote.strip():
-        return {"ok": False, "error": "empty_quote"}
-    quote = req.quote.strip()
-    occurrence = int(req.occurrence or 0)
-    if _occurrence_index(req.text, quote, occurrence) == -1:
-        return {"ok": False, "error": "quote_not_found", "detail": {"occurrence": occurrence}}
-    params = pick_params(req)
-    if req.light and req.max_new_tokens is None:
-        params["max_new_tokens"] = max(params["max_new_tokens"], 80)
-    payload = json.dumps(
-        {
-            "text": req.text,
-            "quote": quote,
-            "fallacy_type": req.fallacy_type,
-            "rationale": req.rationale,
-        },
-        ensure_ascii=False,
-    )
     async with GEN_LOCK:
-        _log(rid, "🧠 Generating rewrite replacement_quote...")
-        res = _chat_completion(
-            "rewrite",
-            payload,
-            bool(req.light),
-            int(params["max_new_tokens"]),
-            float(params["temperature"]),
-            float(params["top_p"]),
-            int(params["n_batch"]),
-            float(params["repeat_penalty"]),
-        )
-    elapsed_total = time.time() - t0
-    if not res.get("ok"):
-        _log(rid, f"❌ /rewrite failed: {res.get('error')}")
-        return {
-            **res,
-            "meta": {
-                "request_id": rid,
-                "light": bool(req.light),
-                "params": {
-                    "max_new_tokens": int(params["max_new_tokens"]),
-                    "temperature": float(params["temperature"]),
-                    "top_p": float(params["top_p"]),
-                    "n_batch": int(params["n_batch"]),
-                    "repeat_penalty": float(params["repeat_penalty"]),
-                },
-                "timings_s": {"total": round(elapsed_total, 3), "gen": res.get("gen_s", None)},
-            },
-        }
-    obj = res["result"]
-    if not isinstance(obj, dict):
-        return {"ok": False, "error": "bad_rewrite_output"}
-    replacement = obj.get("replacement_quote")
-    if not isinstance(replacement, str):
-        return {"ok": False, "error": "missing_replacement_quote", "raw": obj}
-    replacement = replacement.strip()
-    if not replacement:
-        return {"ok": False, "error": "empty_replacement_quote", "raw": obj}
-    why = obj.get("why_this_fix")
-    why = strip_template_sentence(why)
-    rep = _replace_nth(req.text, quote, replacement, occurrence)
-    if not rep.get("ok"):
-        return {"ok": False, "error": rep.get("error", "replace_failed")}
-    _log(rid, f"✅ /rewrite ok total={elapsed_total:.2f}s")
-    return {
-        "ok": True,
-        "result": {
-            "rewritten_text": rep["rewritten_text"],
-            "old_quote": quote,
-            "replacement_quote": replacement,
-            "why_this_fix": why,
-            "occurrence": occurrence,
-            "span": {
-                "old_start_char": rep["old_start_char"],
-                "old_end_char": rep["old_end_char"],
-                "new_start_char": rep["start_char"],
-                "new_end_char": rep["end_char"],
-            },
-        },
-        "meta": {
-            "request_id": rid,
-            "light": bool(req.light),
-            "params": {
-                "max_new_tokens": int(params["max_new_tokens"]),
-                "temperature": float(params["temperature"]),
-                "top_p": float(params["top_p"]),
-                "n_batch": int(params["n_batch"]),
-                "repeat_penalty": float(params["repeat_penalty"]),
-            },
-            "timings_s": {"total": round(elapsed_total, 3), "gen": res.get("gen_s", None)},
-        },
-    }

 import os
 import json
 import time
+import math
 import asyncio
 import re
+from functools import lru_cache
+from typing import Any, Dict, List, Optional
 from fastapi.middleware.cors import CORSMiddleware
+import nest_asyncio
+import uvicorn
+from fastapi import FastAPI
+from pydantic import BaseModel
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+ENABLE_FULL_CONFIDENCE = True
+USE_FLASH_ATTN = True
+N_BATCH = 1024
+N_THREADS = 6
+N_CTX = 1024
+DRIVE_CACHE_DIR = "/content/drive/MyDrive/FADES_Models_Cache"
+if os.path.exists("/content/drive") and not os.path.exists(DRIVE_CACHE_DIR):
+    try: os.makedirs(DRIVE_CACHE_DIR)
+    except: pass
+GGUF_REPO_ID = "maxime-antoine-dev/fades-mistral-v02-gguf"
+GGUF_FILENAME = "mistral_v02_fades.Q4_K_M.gguf"
 GEN_LOCK = asyncio.Lock()
+app = FastAPI(title="FADES Fallacy Detector API (Final)")
 # ============================
 # CORS (for browser front-ends)
     allow_headers=["*"],
 )
 ALLOWED_LABELS = [
+    "none", "faulty generalization", "false causality", "circular reasoning",
+    "ad populum", "ad hominem", "fallacy of logic", "appeal to emotion",
+    "false dilemma", "equivocation", "fallacy of extension",
+    "fallacy of relevance", "fallacy of credibility", "miscellaneous", "intentional"
 ]
+# mapping des premiers mots vers les labels (pour regrouper les probas)
+LABEL_MAPPING = {
+    "none": ["none"],
+    "faulty": ["faulty generalization"],
+    "false": ["false causality", "false dilemma"],
+    "circular": ["circular reasoning"],
+    "ad": ["ad populum", "ad hominem"],
+    "fallacy": ["fallacy of logic", "extension", "relevance", "credibility"],
+    "appeal": ["appeal to emotion"],
+    "equivocation": ["equivocation"],
+    "miscellaneous": ["miscellaneous"],
+    "intentional": ["intentional"]
+}
+# On ajoute des exemples (Few-Shot) pour guider le modèle
+ANALYZE_SYS_PROMPT = """You are a logic expert. Detect logical fallacies.
+OUTPUT JSON ONLY.
+RULES:
+1. Use ONLY these labels: {labels}
+2. "rationale": Explain WHY.
+3. "confidence": 0.0 to 1.0.
+EXAMPLES (Follow this logic):
+Input: "You are stupid, so your opinion is wrong."
+Output: {{
+  "has_fallacy": true,
+  "fallacies": [{{
+      "type": "ad hominem",
+      "confidence": 0.95,
+      "evidence_quotes": ["You are stupid"],
+      "rationale": "Direct attack on the person rather than the argument."
+  }}],
+  "overall_explanation": "Ad Hominem attack."
+}}
+Input: "Think of the children! We must ban this immediately or they will suffer!"
+Output: {{
+  "has_fallacy": true,
+  "fallacies": [{{
+      "type": "appeal to emotion",
+      "confidence": 0.90,
+      "evidence_quotes": ["Think of the children", "they will suffer"],
+      "rationale": "Uses fear and pity to manipulate opinion without logical proof."
+  }}],
+  "overall_explanation": "Manipulative emotional appeal."
+}}
+JSON SCHEMA:
 {{
   "has_fallacy": boolean,
   "fallacies": [
   ],
   "overall_explanation": string
 }}
+"""
+REWRITE_SYS_PROMPT = """You are a text editor. Rewrite to remove the fallacy.
+Output Format (JSON):
 {{
+  "rewritten_text": string,
   "why_this_fix": string
 }}
+"""
+def clean_and_repair_json(text: str) -> str:
+    text = text.replace("```json", "").replace("```", "").strip()
+    # 2. On cherche le premier '{'
     start = text.find("{")
+    if start == -1: return text
     depth = 0
+    for i, char in enumerate(text[start:], start=start):
+        if char == "{":
             depth += 1
+        elif char == "}":
             depth -= 1
             if depth == 0:
+                potential_json = text[start:i+1]
+                try:
+                    json.loads(potential_json)
+                    return potential_json
+                except:
+                    pass
+    end = text.rfind("}")
+    if start != -1 and end != -1:
+        return text[start:end+1]
+    return text
+def analyze_alternatives(start_index: int, top_logprobs_list: List[Dict[str, float]]) -> Dict[str, float]:
     """
+    Regarde les 'top_logprobs' au moment où le label a commencé à être écrit.
+    Retourne un dictionnaire des probabilités pour chaque FAMILLE de label.
+    Ex: {"Ad ...": 0.8, "Faulty ...": 0.1, "None": 0.05}
     """
+    if start_index < 0 or start_index >= len(top_logprobs_list):
+        return {}
+    candidates = top_logprobs_list[start_index]
+    distribution = {}
+    total_prob = 0.0
+    for token, logprob in candidates.items():
+        clean_tok = str(token).replace(" ", "").lower().strip()
+        prob = math.exp(logprob)
+        matched = False
+        for key, group in LABEL_MAPPING.items():
+            if clean_tok.startswith(key):
+                group_name = f"{key.capitalize()} ({'/'.join([g.split()[-1] for g in group])})" if len(group) > 1 else group[0].title()
+                distribution[group_name] = distribution.get(group_name, 0.0) + prob
+                matched = True
+                break
+        if not matched:
+            distribution["_other_"] = distribution.get("_other_", 0.0) + prob
+        total_prob += prob
+    return {k: round(v, 4) for k, v in distribution.items() if v > 0.001}
+def extract_label_info(target_label: str, tokens: List[str], logprobs: List[float], top_logprobs: List[Dict]) -> Dict:
+    """Récupère la confiance spécifique ET la distribution des alternatives"""
+    if not target_label: return {"conf": 0.0, "dist": {}}
+    target_clean = target_label.lower().strip()
+    current_text = ""
+    start_index = -1
+    # on chreche trouver où commence le label
+    for i, token in enumerate(tokens):
+        tok_str = str(token) if not isinstance(token, bytes) else token.decode('utf-8', errors='ignore')
+        current_text += tok_str
+        #oOn cherche le label s'il apparaît
+        if target_clean in current_text.lower() and start_index == -1:
+            start_index = max(0, i - 5)
+            # on affine pour trouver le vrai début (souvent précédé de guillemets)
+            # c'est approximatif mais suffisant pour choper le bon token
+            for j in range(start_index, i + 1):
+                t_s = str(tokens[j]).lower()
+                # si le token commence par la première lettre du label
+                if target_clean[0] in t_s:
+                    start_index = j
+                    break
+            break
+    conf = 0.0
+    dist = {}
+    if start_index != -1:
+        valid = [math.exp(logprobs[k]) for k in range(start_index, min(len(logprobs), start_index+3)) if logprobs[k] is not None]
+        conf = round(sum(valid)/len(valid), 4) if valid else 0.0
+        if top_logprobs:
+            dist = analyze_alternatives(start_index, top_logprobs)
+    return {"conf": conf, "dist": dist}
+@lru_cache(maxsize=1)
+def get_model():
+    print(f"📦 Loading Model...")
     try:
+        model_path = hf_hub_download(repo_id=GGUF_REPO_ID, filename=GGUF_FILENAME, cache_dir=DRIVE_CACHE_DIR)
+        llm = Llama(
+            model_path=model_path, n_ctx=N_CTX, n_threads=N_THREADS, n_batch=N_BATCH, verbose=False,
+            n_gpu_layers=-1, flash_attn=USE_FLASH_ATTN, logits_all=ENABLE_FULL_CONFIDENCE
         )
+        return llm
     except Exception as e:
+        print(f"❌ Error: {e}")
+        raise e
+class AnalyzeRequest(BaseModel):
+    text: str
+    max_new_tokens: int = 300
+    temperature: float = 0.1
+class RewriteRequest(BaseModel):
+    text: str
+    fallacy_type: str
+    rationale: str
+    max_new_tokens: int = 300
 @app.get("/health")
 def health():
+    get_model()
+    return {"status": "ok"}
+@app.post("/analyze")
+async def analyze(req: AnalyzeRequest):
+    llm = get_model()
+    system_prompt = ANALYZE_SYS_PROMPT.format(labels=", ".join(ALLOWED_LABELS))
+    prompt = f"[INST] {system_prompt}\n\nINPUT TEXT:\n{req.text} [/INST]"
+    req_logprobs = 20 if ENABLE_FULL_CONFIDENCE else None
+    async with GEN_LOCK:
+        start_time = time.time()
+        output = llm(
+            prompt, max_tokens=req.max_new_tokens, temperature=req.temperature, top_p=0.95,
+            repeat_penalty=1.15, stop=["</s>", "```"], echo=False, logprobs=req_logprobs
         )
+        gen_time = time.time() - start_time
+    raw_text = output['choices'][0]['text']
+    tokens = []
+    logprobs = []
+    top_logprobs = []
+    if ENABLE_FULL_CONFIDENCE and 'logprobs' in output['choices'][0]:
+        lp_data = output['choices'][0]['logprobs']
+        tokens = lp_data.get('tokens', [])
+        logprobs = lp_data.get('token_logprobs', [])
+        top_logprobs = lp_data.get('top_logprobs', [])
+    cleaned_text = clean_and_repair_json(raw_text)
+    result_json = {}
+    success = False
+    technical_confidence = 0.0
+    label_distribution = {}
     try:
+        result_json = json.loads(cleaned_text)
+        success = True
+        if result_json.get("has_fallacy") and result_json.get("fallacies"):
+            for fallacy in result_json["fallacies"]:
+                d_type = fallacy.get("type", "")
+                if ENABLE_FULL_CONFIDENCE:
+                    info = extract_label_info(d_type, tokens, logprobs, top_logprobs)
+                    spec_conf = info["conf"]
+                    label_distribution = info["dist"]
+                    fallacy["technical_confidence"] = spec_conf
+                    fallacy["alternatives"] = label_distribution
+                    declared = fallacy.get("confidence", 0.8)
+                    fallacy["confidence"] = round((declared + spec_conf) / 2, 2)
+                    if technical_confidence == 0.0: technical_confidence = spec_conf
+        else:
+             if ENABLE_FULL_CONFIDENCE:
+                 info = extract_label_info("has_fallacy", tokens, logprobs, top_logprobs)
+                 label_distribution = info["dist"]
+    except json.JSONDecodeError:
+        result_json = {"error": "JSON Error", "raw": raw_text}
+        success = False
     return {
+        "ok": success,
+        "result": result_json,
         "meta": {
+            "tech_conf": technical_confidence,
+            "distribution": label_distribution,
+            "time": round(gen_time, 2)
+        }
     }
 @app.post("/rewrite")
+async def rewrite(req: RewriteRequest):
+    llm = get_model()
+    system_prompt = REWRITE_SYS_PROMPT.format(fallacy_type=req.fallacy_type, rationale=req.rationale)
+    prompt = f"[INST] {system_prompt}\n\nTEXT TO FIX:\n{req.text} [/INST]"
     async with GEN_LOCK:
+        output = llm(prompt, max_tokens=req.max_new_tokens, temperature=0.7, repeat_penalty=1.1, stop=["</s>", "}"])
+    try:
+        res = json.loads(clean_and_repair_json(output['choices'][0]['text']))
+        ok = True
+    except:
+        res = {"raw": output['choices'][0]['text']}
+        ok = False
+    return {"ok": ok, "result": res}
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ fastapi>=0.110
 uvicorn[standard]>=0.27
 huggingface_hub>=0.23
 llama-cpp-python==0.2.90

 uvicorn[standard]>=0.27
 huggingface_hub>=0.23
 llama-cpp-python==0.2.90
+nest-asyncio

utils.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import json
+import re
+from typing import Any, Dict, Optional, List
+from prompts import ALLOWED_LABELS
+# ----------------------------
+# Robust JSON extraction
+# ----------------------------
+def stop_at_complete_json(text: str) -> Optional[str]:
+    start = text.find("{")
+    if start == -1:
+        return None
+    depth = 0
+    in_str = False
+    esc = False
+    for i in range(start, len(text)):
+        ch = text[i]
+        if in_str:
+            if esc:
+                esc = False
+            elif ch == "\\":
+                esc = True
+            elif ch == '"':
+                in_str = False
+            continue
+        if ch == '"':
+            in_str = True
+            continue
+        if ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0:
+                return text[start : i + 1]
+    return None
+def extract_first_json_obj(s: str) -> Optional[Dict[str, Any]]:
+    cut = stop_at_complete_json(s) or s
+    start = cut.find("{")
+    end = cut.rfind("}")
+    if start == -1 or end == -1 or end <= start:
+        return None
+    cand = cut[start : end + 1].strip()
+    try:
+        return json.loads(cand)
+    except Exception:
+        return None
+# ----------------------------
+# Post-processing: remove template sentence
+# ----------------------------
+_TEMPLATE_RE = re.compile(
+    r"\bthe input contains fallacious reasoning consistent with the predicted type\(s\)\b\.?",
+    flags=re.IGNORECASE,
+)
+def strip_template_sentence(text: str) -> str:
+    if not isinstance(text, str):
+        return ""
+    out = _TEMPLATE_RE.sub("", text)
+    out = out.replace("..", ".").strip()
+    out = re.sub(r"\s{2,}", " ", out)
+    out = re.sub(r"^\s*[\-–—:;,\.\s]+", "", out).strip()
+    return out
+# ----------------------------
+# Output sanitation / validation
+# ----------------------------
+def _clamp01(x: Any, default: float = 0.5) -> float:
+    try:
+        v = float(x)
+    except Exception:
+        return default
+    return 0.0 if v < 0.0 else (1.0 if v > 1.0 else v)
+def _is_allowed_label(lbl: Any) -> bool:
+    return isinstance(lbl, str) and lbl in ALLOWED_LABELS and lbl != "none"
+def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]:
+    has_fallacy = bool(obj.get("has_fallacy", False))
+    fallacies_in = obj.get("fallacies", [])
+    if not isinstance(fallacies_in, list):
+        fallacies_in = []
+    fallacies_out = []
+    for f in fallacies_in:
+        if not isinstance(f, dict):
+            continue
+        f_type = f.get("type")
+        if not _is_allowed_label(f_type):
+            continue
+        conf = _clamp01(f.get("confidence", 0.5))
+        conf = float(f"{conf:.2f}")
+        ev = f.get("evidence_quotes", [])
+        if not isinstance(ev, list):
+            ev = []
+        ev_clean: List[str] = []
+        for q in ev:
+            if not isinstance(q, str):
+                continue
+            qq = q.strip()
+            if not qq:
+                continue
+            if qq in input_text:
+                if len(qq) <= 240:
+                    ev_clean.append(qq)
+                else:
+                    short = qq[:240]
+                    ev_clean.append(short if short in input_text else qq)
+        rationale = strip_template_sentence(str(f.get("rationale", "")).strip())
+        fallacies_out.append(
+            {
+                "type": f_type,
+                "confidence": conf,
+                "evidence_quotes": ev_clean[:3],
+                "rationale": rationale,
+            }
+        )
+    overall = strip_template_sentence(str(obj.get("overall_explanation", "")).strip())
+    if len(fallacies_out) == 0:
+        has_fallacy = False
+    return {
+        "has_fallacy": has_fallacy,
+        "fallacies": fallacies_out,
+        "overall_explanation": overall,
+    }
+# ----------------------------
+# Replace helpers
+# ----------------------------
+def occurrence_index(text: str, sub: str, occurrence: int) -> int:
+    if occurrence < 0:
+        return -1
+    start = 0
+    for _ in range(occurrence + 1):
+        idx = text.find(sub, start)
+        if idx == -1:
+            return -1
+        start = idx + max(1, len(sub))
+    return idx
+def replace_nth(text: str, old: str, new: str, occurrence: int) -> Dict[str, Any]:
+    idx = occurrence_index(text, old, occurrence)
+    if idx == -1:
+        return {"ok": False, "error": "quote_not_found"}
+    return {
+        "ok": True,
+        "rewritten_text": text[:idx] + new + text[idx + len(old) :],
+        "start_char": idx,
+        "end_char": idx + len(new),
+        "old_start_char": idx,
+        "old_end_char": idx + len(old),
+    }