Spaces:

maxime-antoine-dev
/

fades-api

Sleeping

App Files Files Community

maxime-antoine-dev commited on about 1 month ago

Commit

1f2a732

1 Parent(s): 66ca5c9

increased max tokens

Browse files

Files changed (1) hide show

main.py +73 -89

main.py CHANGED Viewed

@@ -4,6 +4,7 @@ import json
 import time
 import uuid
 import asyncio
 from typing import Any, Dict, Optional, List
 from functools import lru_cache
@@ -20,33 +21,27 @@ from llama_cpp import Llama
 GGUF_REPO_ID = os.getenv("GGUF_REPO_ID", "maxime-antoine-dev/fades-mistral-v02-gguf")
 GGUF_FILENAME = os.getenv("GGUF_FILENAME", "mistral_v02_fades.Q4_K_M.gguf")
-# Model load params (fixed once at startup)
 N_CTX = int(os.getenv("N_CTX", "1536"))
 CPU_COUNT = os.cpu_count() or 4
 N_THREADS = int(os.getenv("N_THREADS", str(min(8, max(1, CPU_COUNT - 1)))))
 N_BATCH = int(os.getenv("N_BATCH", "256"))
-# Default generation params ("normal")
 MAX_NEW_TOKENS_DEFAULT = int(os.getenv("MAX_NEW_TOKENS", "180"))
 TEMPERATURE_DEFAULT = float(os.getenv("TEMPERATURE", "0.0"))
 TOP_P_DEFAULT = float(os.getenv("TOP_P", "0.95"))
-# "Light" generation params
 LIGHT_MAX_NEW_TOKENS = int(os.getenv("LIGHT_MAX_NEW_TOKENS", "60"))
 LIGHT_TEMPERATURE = float(os.getenv("LIGHT_TEMPERATURE", "0.0"))
 LIGHT_TOP_P = float(os.getenv("LIGHT_TOP_P", "0.9"))
-# "Light" runtime knobs
 LIGHT_N_BATCH = int(os.getenv("LIGHT_N_BATCH", "64"))
-# One request at a time on CPU
 GEN_LOCK = asyncio.Lock()
 app = FastAPI(title="FADES Fallacy Detector (GGUF / llama.cpp)")
 # ============================
-# CORS (for browser front-ends)
 # ============================
 _CORS_ORIGINS = os.getenv("CORS_ALLOW_ORIGINS", "*").strip()
 if _CORS_ORIGINS == "*" or not _CORS_ORIGINS:
@@ -67,9 +62,7 @@ app.add_middleware(
 # Schemas
 # ============================
 class GenParams(BaseModel):
-    # if True => use "light" parameters
     light: bool = False
-    # optional overrides (applied after picking light/normal defaults)
     max_new_tokens: Optional[int] = None
     temperature: Optional[float] = None
     top_p: Optional[float] = None
@@ -110,7 +103,6 @@ ALLOWED_LABELS = [
 LABELS_STR = ", ".join([f'"{x}"' for x in ALLOWED_LABELS])
-# Stronger /analyze prompt: forces specificity and forbids the "template" sentence
 ANALYZE_PROMPT = f"""You are a fallacy detection assistant.
 You MUST choose labels ONLY from this list (exact string):
@@ -134,33 +126,48 @@ Hard rules:
 - Output ONLY JSON. No markdown. No extra text.
 - evidence_quotes MUST be verbatim substrings copied from the input text (no paraphrase).
 - Keep each evidence quote short (prefer 1–2 sentences; max 240 chars).
-- confidence MUST be a real probability between 0.0 and 1.0 (use 2 decimals).
-  It MUST NOT be always the same across examples. Calibrate it:
   * 0.90–1.00: very explicit, unambiguous match, clear cue words.
   * 0.70–0.89: strong match but some ambiguity or missing premise.
   * 0.40–0.69: plausible but weak/partial evidence.
   * 0.10–0.39: very uncertain.
-- The rationale MUST be specific to the evidence (2–4 sentences):
-  Explain (1) what the quote claims, (2) why that matches the fallacy label,
-  (3) what logical step is invalid or missing.
-  DO NOT use generic filler. Do NOT reuse stock phrases.
-  In particular, you MUST NOT output this sentence:
   "The input contains fallacious reasoning consistent with the predicted type(s)."
-- overall_explanation MUST also be specific (2–5 sentences): summarize the reasoning issues and reference the key cue(s).
-- If no fallacy: has_fallacy=false and fallacies=[] and overall_explanation explains briefly why.
 INPUT:
 {{text}}
 OUTPUT:"""
-# /rewrite prompt: returns ONLY a replacement substring for the quote (server does the replacement)
 REWRITE_PROMPT = """You are rewriting a small quoted span inside a larger text.
 Goal:
 - You MUST propose a replacement for the QUOTE only.
 - The replacement should remove the fallacious reasoning described, while keeping the same tone/style/tense/entities.
-- The replacement MUST be plausible in the surrounding context and should be similar length (roughly +/- 40%).
 - Do NOT change anything outside the quote. Do NOT add new facts not implied by the original.
 - Do NOT introduce new fallacies.
@@ -176,16 +183,16 @@ Hard rules:
 - why_this_fix: 1–3 sentences, specific.
 INPUT_TEXT:
-{text}
 QUOTE_TO_REWRITE:
-{quote}
 FALLACY_TYPE:
-{fallacy_type}
 WHY_FALLACIOUS:
-{rationale}
 OUTPUT:"""
@@ -198,11 +205,12 @@ def build_analyze_messages(text: str) -> List[Dict[str, str]]:
 def build_rewrite_messages(text: str, quote: str, fallacy_type: str, rationale: str) -> List[Dict[str, str]]:
-    prompt = REWRITE_PROMPT.format(
-        text=text,
-        quote=quote,
-        fallacy_type=fallacy_type,
-        rationale=rationale,
     )
     return [
         {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
@@ -367,7 +375,6 @@ def pick_params(req: GenParams) -> Dict[str, Any]:
     if req.top_p is not None:
         params["top_p"] = float(req.top_p)
-    # Safety caps
     params["max_new_tokens"] = max(1, min(int(params["max_new_tokens"]), 400))
     params["temperature"] = max(0.0, min(float(params["temperature"]), 1.5))
     params["top_p"] = max(0.05, min(float(params["top_p"]), 1.0))
@@ -375,6 +382,28 @@ def pick_params(req: GenParams) -> Dict[str, Any]:
     return params
 # ============================
 # Output sanitation / validation
 # ============================
@@ -383,11 +412,7 @@ def _clamp01(x: Any, default: float = 0.5) -> float:
         v = float(x)
     except Exception:
         return default
-    if v < 0.0:
-        return 0.0
-    if v > 1.0:
-        return 1.0
-    return v
 def _is_allowed_label(lbl: Any) -> bool:
@@ -395,10 +420,6 @@ def _is_allowed_label(lbl: Any) -> bool:
 def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]:
-    """
-    Enforce shape, clamp confidence, drop invalid labels,
-    enforce evidence_quotes being substrings.
-    """
     has_fallacy = bool(obj.get("has_fallacy", False))
     fallacies_in = obj.get("fallacies", [])
     if not isinstance(fallacies_in, list):
@@ -413,12 +434,12 @@ def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, A
             continue
         conf = _clamp01(f.get("confidence", 0.5))
-        # keep 2 decimals for nicer UI
         conf = float(f"{conf:.2f}")
         ev = f.get("evidence_quotes", [])
         if not isinstance(ev, list):
             ev = []
         ev_clean: List[str] = []
         for q in ev:
             if not isinstance(q, str):
@@ -426,23 +447,15 @@ def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, A
             qq = q.strip()
             if not qq:
                 continue
-            # evidence MUST be substring
             if qq in input_text:
-                # keep short, but don't hard-cut if it breaks substring matching
                 if len(qq) <= 240:
                     ev_clean.append(qq)
                 else:
-                    # if too long, try to keep first 240 if still substring (rare); else keep as-is
                     short = qq[:240]
-                    if short in input_text:
-                        ev_clean.append(short)
-                    else:
-                        ev_clean.append(qq)
-        rationale = f.get("rationale")
-        if not isinstance(rationale, str):
-            rationale = ""
-        rationale = rationale.strip()
         fallacies_out.append(
             {
@@ -453,12 +466,9 @@ def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, A
             }
         )
-    overall = obj.get("overall_explanation")
-    if not isinstance(overall, str):
-        overall = ""
-    overall = overall.strip()
-    # If no fallacies survived sanitation, force no-fallacy state
     if len(fallacies_out) == 0:
         has_fallacy = False
@@ -563,18 +573,12 @@ async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
         return {"ok": False, "error": "empty_text"}
     params = pick_params(req)
-    _log(
-        rid,
-        f"⚙️ Params: max_new_tokens={params['max_new_tokens']} temp={params['temperature']} top_p={params['top_p']} n_batch={params['n_batch']}",
-    )
     payload = json.dumps({"text": req.text}, ensure_ascii=False)
     async with GEN_LOCK:
         t_lock = time.time()
-        _log(rid, "🧠 Generating analyze...")
         t_gen0 = time.time()
         res = _cached_chat_completion(
             "analyze",
             payload,
@@ -584,13 +588,13 @@ async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
             float(params["top_p"]),
             int(params["n_batch"]),
         )
         t_gen1 = time.time()
     elapsed_total = time.time() - t0
     elapsed_lock = time.time() - t_lock
     if not res.get("ok"):
-        _log(rid, f"❌ /analyze failed: {res.get('error')}")
         return {
             **res,
             "meta": {
@@ -606,10 +610,8 @@ async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
             },
         }
-    # sanitize output for stability (substrings, labels, confidence clamp)
     clean = sanitize_analyze_output(res["result"], req.text)
-    _log(rid, f"✅ /analyze ok fallacies={len(clean.get('fallacies', []))} total={elapsed_total:.2f}s")
     return {
         "ok": True,
         "result": clean,
@@ -636,11 +638,6 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
     rid = uuid.uuid4().hex[:10]
     t0 = time.time()
-    _log(
-        rid,
-        f"📩 /rewrite received (light={req.light}) text_chars={len(req.text) if req.text else 0} quote_chars={len(req.quote) if req.quote else 0}",
-    )
     if not req.text or not req.text.strip():
         return {"ok": False, "error": "empty_text"}
     if not req.quote or not req.quote.strip():
@@ -649,21 +646,13 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
     quote = req.quote.strip()
     occurrence = int(req.occurrence or 0)
-    # validate quote existence early
     if _occurrence_index(req.text, quote, occurrence) == -1:
         return {"ok": False, "error": "quote_not_found", "detail": {"occurrence": occurrence}}
     params = pick_params(req)
-    # rewrite generally needs a bit more room than light analyze if you want fluent replacements
-    # (still controllable by request overrides)
     if req.light and req.max_new_tokens is None:
         params["max_new_tokens"] = max(params["max_new_tokens"], 80)
-    _log(
-        rid,
-        f"⚙️ Params: max_new_tokens={params['max_new_tokens']} temp={params['temperature']} top_p={params['top_p']} n_batch={params['n_batch']}",
-    )
     payload = json.dumps(
         {
             "text": req.text,
@@ -676,9 +665,8 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
     async with GEN_LOCK:
         t_lock = time.time()
-        _log(rid, "🧠 Generating rewrite replacement_quote...")
         t_gen0 = time.time()
         res = _cached_chat_completion(
             "rewrite",
             payload,
@@ -688,13 +676,13 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
             float(params["top_p"]),
             int(params["n_batch"]),
         )
         t_gen1 = time.time()
     elapsed_total = time.time() - t0
     elapsed_lock = time.time() - t_lock
     if not res.get("ok"):
-        _log(rid, f"❌ /rewrite failed: {res.get('error')}")
         return {
             **res,
             "meta": {
@@ -722,17 +710,13 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
     if not replacement:
         return {"ok": False, "error": "empty_replacement_quote", "raw": obj}
-    why = obj.get("why_this_fix")
-    if not isinstance(why, str):
-        why = ""
-    why = why.strip()
-    # server-side enforced: ONLY the quote is changed
     rep = _replace_nth(req.text, quote, replacement, occurrence)
     if not rep.get("ok"):
         return {"ok": False, "error": rep.get("error", "replace_failed")}
-    _log(rid, f"✅ /rewrite ok total={elapsed_total:.2f}s")
     return {
         "ok": True,
         "result": {

 import time
 import uuid
 import asyncio
+import re
 from typing import Any, Dict, Optional, List
 from functools import lru_cache
 GGUF_REPO_ID = os.getenv("GGUF_REPO_ID", "maxime-antoine-dev/fades-mistral-v02-gguf")
 GGUF_FILENAME = os.getenv("GGUF_FILENAME", "mistral_v02_fades.Q4_K_M.gguf")
 N_CTX = int(os.getenv("N_CTX", "1536"))
 CPU_COUNT = os.cpu_count() or 4
 N_THREADS = int(os.getenv("N_THREADS", str(min(8, max(1, CPU_COUNT - 1)))))
 N_BATCH = int(os.getenv("N_BATCH", "256"))
 MAX_NEW_TOKENS_DEFAULT = int(os.getenv("MAX_NEW_TOKENS", "180"))
 TEMPERATURE_DEFAULT = float(os.getenv("TEMPERATURE", "0.0"))
 TOP_P_DEFAULT = float(os.getenv("TOP_P", "0.95"))
 LIGHT_MAX_NEW_TOKENS = int(os.getenv("LIGHT_MAX_NEW_TOKENS", "60"))
 LIGHT_TEMPERATURE = float(os.getenv("LIGHT_TEMPERATURE", "0.0"))
 LIGHT_TOP_P = float(os.getenv("LIGHT_TOP_P", "0.9"))
 LIGHT_N_BATCH = int(os.getenv("LIGHT_N_BATCH", "64"))
 GEN_LOCK = asyncio.Lock()
 app = FastAPI(title="FADES Fallacy Detector (GGUF / llama.cpp)")
 # ============================
+# CORS
 # ============================
 _CORS_ORIGINS = os.getenv("CORS_ALLOW_ORIGINS", "*").strip()
 if _CORS_ORIGINS == "*" or not _CORS_ORIGINS:
 # Schemas
 # ============================
 class GenParams(BaseModel):
     light: bool = False
     max_new_tokens: Optional[int] = None
     temperature: Optional[float] = None
     top_p: Optional[float] = None
 LABELS_STR = ", ".join([f'"{x}"' for x in ALLOWED_LABELS])
 ANALYZE_PROMPT = f"""You are a fallacy detection assistant.
 You MUST choose labels ONLY from this list (exact string):
 - Output ONLY JSON. No markdown. No extra text.
 - evidence_quotes MUST be verbatim substrings copied from the input text (no paraphrase).
 - Keep each evidence quote short (prefer 1–2 sentences; max 240 chars).
+- confidence MUST be a real probability between 0.0 and 1.0 (use 2 decimals) and MUST vary when appropriate.
+  Calibrate it:
   * 0.90–1.00: very explicit, unambiguous match, clear cue words.
   * 0.70–0.89: strong match but some ambiguity or missing premise.
   * 0.40–0.69: plausible but weak/partial evidence.
   * 0.10–0.39: very uncertain.
+About rationale vs overall_explanation:
+- Each fallacy.rationale MUST be QUOTE-LOCAL (2–4 sentences):
+  (1) restate what the quote is asserting,
+  (2) identify the missing/invalid inference step,
+  (3) explain why that matches the selected fallacy label.
+  Mention at least one concrete cue from the quote (e.g., escalation, popularity claim, personal attack, etc.).
+- overall_explanation MUST be GLOBAL and MUST NOT restate rationales sentence-by-sentence.
+  Instead (2–5 sentences):
+  (a) summarize the overall reasoning pattern(s),
+  (b) explain why that pattern is harmful,
+  (c) give plausible consequences (bad decisions, distorted debate, polarization, unjustified fear, scapegoating).
+Anti-template rule:
+- DO NOT use generic filler or stock phrases.
+- You MUST NOT output this sentence (or close variants):
   "The input contains fallacious reasoning consistent with the predicted type(s)."
+If no fallacy:
+- has_fallacy=false
+- fallacies=[]
+- overall_explanation briefly explains why the reasoning is acceptable.
 INPUT:
 {{text}}
 OUTPUT:"""
+# IMPORTANT: do NOT use .format() on a template containing JSON braces.
+# Use custom tokens and .replace() to avoid KeyError.
 REWRITE_PROMPT = """You are rewriting a small quoted span inside a larger text.
 Goal:
 - You MUST propose a replacement for the QUOTE only.
 - The replacement should remove the fallacious reasoning described, while keeping the same tone/style/tense/entities.
+- The replacement MUST be plausible in the surrounding context and similar length (roughly +/- 40%).
 - Do NOT change anything outside the quote. Do NOT add new facts not implied by the original.
 - Do NOT introduce new fallacies.
 - why_this_fix: 1–3 sentences, specific.
 INPUT_TEXT:
+<<TEXT>>
 QUOTE_TO_REWRITE:
+<<QUOTE>>
 FALLACY_TYPE:
+<<FALLACY_TYPE>>
 WHY_FALLACIOUS:
+<<RATIONALE>>
 OUTPUT:"""
 def build_rewrite_messages(text: str, quote: str, fallacy_type: str, rationale: str) -> List[Dict[str, str]]:
+    prompt = (
+        REWRITE_PROMPT
+        .replace("<<TEXT>>", text)
+        .replace("<<QUOTE>>", quote)
+        .replace("<<FALLACY_TYPE>>", fallacy_type)
+        .replace("<<RATIONALE>>", rationale)
     )
     return [
         {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
     if req.top_p is not None:
         params["top_p"] = float(req.top_p)
     params["max_new_tokens"] = max(1, min(int(params["max_new_tokens"]), 400))
     params["temperature"] = max(0.0, min(float(params["temperature"]), 1.5))
     params["top_p"] = max(0.05, min(float(params["top_p"]), 1.0))
     return params
+# ============================
+# Post-processing: remove template sentence
+# ============================
+# This catches the exact sentence + small punctuation variations (case-insensitive).
+# Also works if the model prefixes rationales with it.
+_TEMPLATE_RE = re.compile(
+    r"\bthe input contains fallacious reasoning consistent with the predicted type\(s\)\b\.?",
+    flags=re.IGNORECASE,
+)
+def strip_template_sentence(text: str) -> str:
+    if not isinstance(text, str):
+        return ""
+    out = _TEMPLATE_RE.sub("", text)
+    # Cleanup common leftovers (double spaces, leading punctuation)
+    out = out.replace("..", ".").strip()
+    out = re.sub(r"\s{2,}", " ", out)
+    out = re.sub(r"^\s*[\-–—:;,\.\s]+", "", out).strip()
+    return out
 # ============================
 # Output sanitation / validation
 # ============================
         v = float(x)
     except Exception:
         return default
+    return 0.0 if v < 0.0 else (1.0 if v > 1.0 else v)
 def _is_allowed_label(lbl: Any) -> bool:
 def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]:
     has_fallacy = bool(obj.get("has_fallacy", False))
     fallacies_in = obj.get("fallacies", [])
     if not isinstance(fallacies_in, list):
             continue
         conf = _clamp01(f.get("confidence", 0.5))
         conf = float(f"{conf:.2f}")
         ev = f.get("evidence_quotes", [])
         if not isinstance(ev, list):
             ev = []
         ev_clean: List[str] = []
         for q in ev:
             if not isinstance(q, str):
             qq = q.strip()
             if not qq:
                 continue
             if qq in input_text:
                 if len(qq) <= 240:
                     ev_clean.append(qq)
                 else:
                     short = qq[:240]
+                    ev_clean.append(short if short in input_text else qq)
+        rationale = f.get("rationale", "")
+        rationale = strip_template_sentence(rationale.strip())
         fallacies_out.append(
             {
             }
         )
+    overall = obj.get("overall_explanation", "")
+    overall = strip_template_sentence(overall.strip())
     if len(fallacies_out) == 0:
         has_fallacy = False
         return {"ok": False, "error": "empty_text"}
     params = pick_params(req)
     payload = json.dumps({"text": req.text}, ensure_ascii=False)
     async with GEN_LOCK:
         t_lock = time.time()
         t_gen0 = time.time()
         res = _cached_chat_completion(
             "analyze",
             payload,
             float(params["top_p"]),
             int(params["n_batch"]),
         )
         t_gen1 = time.time()
     elapsed_total = time.time() - t0
     elapsed_lock = time.time() - t_lock
     if not res.get("ok"):
         return {
             **res,
             "meta": {
             },
         }
     clean = sanitize_analyze_output(res["result"], req.text)
     return {
         "ok": True,
         "result": clean,
     rid = uuid.uuid4().hex[:10]
     t0 = time.time()
     if not req.text or not req.text.strip():
         return {"ok": False, "error": "empty_text"}
     if not req.quote or not req.quote.strip():
     quote = req.quote.strip()
     occurrence = int(req.occurrence or 0)
     if _occurrence_index(req.text, quote, occurrence) == -1:
         return {"ok": False, "error": "quote_not_found", "detail": {"occurrence": occurrence}}
     params = pick_params(req)
     if req.light and req.max_new_tokens is None:
         params["max_new_tokens"] = max(params["max_new_tokens"], 80)
     payload = json.dumps(
         {
             "text": req.text,
     async with GEN_LOCK:
         t_lock = time.time()
         t_gen0 = time.time()
         res = _cached_chat_completion(
             "rewrite",
             payload,
             float(params["top_p"]),
             int(params["n_batch"]),
         )
         t_gen1 = time.time()
     elapsed_total = time.time() - t0
     elapsed_lock = time.time() - t_lock
     if not res.get("ok"):
         return {
             **res,
             "meta": {
     if not replacement:
         return {"ok": False, "error": "empty_replacement_quote", "raw": obj}
+    why = obj.get("why_this_fix", "")
+    why = strip_template_sentence(why.strip())
     rep = _replace_nth(req.text, quote, replacement, occurrence)
     if not rep.get("ok"):
         return {"ok": False, "error": rep.get("error", "replace_failed")}
     return {
         "ok": True,
         "result": {