maxime-antoine-dev commited on
Commit
afd3da3
·
1 Parent(s): 992feaf

refactored code

Browse files
Files changed (5) hide show
  1. logger_utils.py +29 -0
  2. main.py +96 -497
  3. model_runtime.py +129 -0
  4. prompts.py +113 -0
  5. utils.py +171 -0
logger_utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from contextlib import contextmanager
3
+
4
+ def log(rid: str, msg: str) -> None:
5
+ print(f"[{rid}] {msg}", flush=True)
6
+
7
+ class StepLogger:
8
+ """
9
+ Lightweight structured step logger for server logs.
10
+ """
11
+ def __init__(self, rid: str, route: str):
12
+ self.rid = rid
13
+ self.route = route
14
+
15
+ def info(self, message: str) -> None:
16
+ log(self.rid, f"{self.route} {message}")
17
+
18
+ @contextmanager
19
+ def step(self, name: str):
20
+ t0 = time.time()
21
+ self.info(f"step={name} start")
22
+ try:
23
+ yield
24
+ dt = time.time() - t0
25
+ self.info(f"step={name} ok ({dt:.3f}s)")
26
+ except Exception as e:
27
+ dt = time.time() - t0
28
+ self.info(f"step={name} fail ({dt:.3f}s) err={repr(e)}")
29
+ raise
main.py CHANGED
@@ -1,18 +1,17 @@
1
- # main.py
2
  import os
3
  import json
4
  import time
5
  import uuid
6
  import asyncio
7
- import re
8
- from typing import Any, Dict, Optional, List
9
- from functools import lru_cache
10
 
11
  from fastapi import FastAPI
12
  from fastapi.middleware.cors import CORSMiddleware
13
  from pydantic import BaseModel, Field
14
- from huggingface_hub import hf_hub_download
15
- from llama_cpp import Llama
 
 
16
 
17
 
18
  # ============================
@@ -67,11 +66,9 @@ class GenParams(BaseModel):
67
  temperature: Optional[float] = None
68
  top_p: Optional[float] = None
69
 
70
-
71
  class AnalyzeRequest(GenParams):
72
  text: str
73
 
74
-
75
  class RewriteRequest(GenParams):
76
  text: str
77
  quote: str = Field(..., description="Verbatim substring that must be replaced.")
@@ -81,251 +78,17 @@ class RewriteRequest(GenParams):
81
 
82
 
83
  # ============================
84
- # Labels & Prompts
85
- # ============================
86
- ALLOWED_LABELS = [
87
- "none",
88
- "faulty generalization",
89
- "false causality",
90
- "circular reasoning",
91
- "ad populum",
92
- "ad hominem",
93
- "fallacy of logic",
94
- "appeal to emotion",
95
- "false dilemma",
96
- "equivocation",
97
- "fallacy of extension",
98
- "fallacy of relevance",
99
- "fallacy of credibility",
100
- "miscellaneous",
101
- "intentional",
102
- ]
103
-
104
- LABELS_STR = ", ".join([f'"{x}"' for x in ALLOWED_LABELS])
105
-
106
- ANALYZE_PROMPT = f"""You are a fallacy detection assistant.
107
-
108
- You MUST choose labels ONLY from this list (exact string):
109
- {LABELS_STR}
110
-
111
- You MUST return ONLY valid JSON with this schema:
112
- {{
113
- "has_fallacy": boolean,
114
- "fallacies": [
115
- {{
116
- "type": string,
117
- "confidence": number,
118
- "evidence_quotes": [string],
119
- "rationale": string
120
- }}
121
- ],
122
- "overall_explanation": string
123
- }}
124
-
125
- Hard rules:
126
- - Output ONLY JSON. No markdown. No extra text.
127
- - evidence_quotes MUST be verbatim substrings copied from the input text (no paraphrase).
128
- - Keep each evidence quote short (prefer 1–2 sentences; max 240 chars).
129
- - confidence MUST be a real probability between 0.0 and 1.0 (use 2 decimals) and MUST vary when appropriate.
130
- Calibrate it:
131
- * 0.90–1.00: very explicit, unambiguous match, clear cue words.
132
- * 0.70–0.89: strong match but some ambiguity or missing premise.
133
- * 0.40–0.69: plausible but weak/partial evidence.
134
- * 0.10–0.39: very uncertain.
135
-
136
- About rationale vs overall_explanation:
137
- - Each fallacy.rationale MUST be QUOTE-LOCAL (2–4 sentences):
138
- (1) restate what the quote is asserting,
139
- (2) identify the missing/invalid inference step,
140
- (3) explain why that matches the selected fallacy label.
141
- Mention at least one concrete cue from the quote (e.g., escalation, popularity claim, personal attack, etc.).
142
- - overall_explanation MUST be GLOBAL and MUST NOT restate rationales sentence-by-sentence.
143
- Instead (2–5 sentences):
144
- (a) summarize the overall reasoning pattern(s),
145
- (b) explain why that pattern is harmful,
146
- (c) give plausible consequences (bad decisions, distorted debate, polarization, unjustified fear, scapegoating).
147
-
148
- Anti-template rule:
149
- - DO NOT use generic filler or stock phrases.
150
- - You MUST NOT output this sentence (or close variants):
151
- "The input contains fallacious reasoning consistent with the predicted type(s)."
152
-
153
- If no fallacy:
154
- - has_fallacy=false
155
- - fallacies=[]
156
- - overall_explanation briefly explains why the reasoning is acceptable.
157
-
158
- INPUT:
159
- {{text}}
160
-
161
- OUTPUT:"""
162
-
163
- # IMPORTANT: do NOT use .format() on a template containing JSON braces.
164
- # Use custom tokens and .replace() to avoid KeyError.
165
- REWRITE_PROMPT = """You are rewriting a small quoted span inside a larger text.
166
-
167
- Goal:
168
- - You MUST propose a replacement for the QUOTE only.
169
- - The replacement should remove the fallacious reasoning described, while keeping the same tone/style/tense/entities.
170
- - The replacement MUST be plausible in the surrounding context and similar length (roughly +/- 40%).
171
- - Do NOT change anything outside the quote. Do NOT add new facts not implied by the original.
172
- - Do NOT introduce new fallacies.
173
-
174
- Return ONLY valid JSON with this schema:
175
- {
176
- "replacement_quote": string,
177
- "why_this_fix": string
178
- }
179
-
180
- Hard rules:
181
- - Output ONLY JSON. No markdown. No extra text.
182
- - replacement_quote should be standalone text (no surrounding quotes).
183
- - why_this_fix: 1–3 sentences, specific.
184
-
185
- INPUT_TEXT:
186
- <<TEXT>>
187
-
188
- QUOTE_TO_REWRITE:
189
- <<QUOTE>>
190
-
191
- FALLACY_TYPE:
192
- <<FALLACY_TYPE>>
193
-
194
- WHY_FALLACIOUS:
195
- <<RATIONALE>>
196
-
197
- OUTPUT:"""
198
-
199
-
200
- def build_analyze_messages(text: str) -> List[Dict[str, str]]:
201
- return [
202
- {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
203
- {"role": "user", "content": ANALYZE_PROMPT.replace("{text}", text)},
204
- ]
205
-
206
-
207
- def build_rewrite_messages(text: str, quote: str, fallacy_type: str, rationale: str) -> List[Dict[str, str]]:
208
- prompt = (
209
- REWRITE_PROMPT
210
- .replace("<<TEXT>>", text)
211
- .replace("<<QUOTE>>", quote)
212
- .replace("<<FALLACY_TYPE>>", fallacy_type)
213
- .replace("<<RATIONALE>>", rationale)
214
- )
215
- return [
216
- {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
217
- {"role": "user", "content": prompt},
218
- ]
219
-
220
-
221
- # ============================
222
- # Logging
223
- # ============================
224
- def _log(rid: str, msg: str):
225
- print(f"[{rid}] {msg}", flush=True)
226
-
227
-
228
- # ============================
229
- # Robust JSON extraction
230
- # ============================
231
- def stop_at_complete_json(text: str) -> Optional[str]:
232
- start = text.find("{")
233
- if start == -1:
234
- return None
235
-
236
- depth = 0
237
- in_str = False
238
- esc = False
239
-
240
- for i in range(start, len(text)):
241
- ch = text[i]
242
- if in_str:
243
- if esc:
244
- esc = False
245
- elif ch == "\\":
246
- esc = True
247
- elif ch == '"':
248
- in_str = False
249
- continue
250
-
251
- if ch == '"':
252
- in_str = True
253
- continue
254
- if ch == "{":
255
- depth += 1
256
- elif ch == "}":
257
- depth -= 1
258
- if depth == 0:
259
- return text[start : i + 1]
260
- return None
261
-
262
-
263
- def extract_first_json_obj(s: str) -> Optional[Dict[str, Any]]:
264
- cut = stop_at_complete_json(s) or s
265
- start = cut.find("{")
266
- end = cut.rfind("}")
267
- if start == -1 or end == -1 or end <= start:
268
- return None
269
- cand = cut[start : end + 1].strip()
270
- try:
271
- return json.loads(cand)
272
- except Exception:
273
- return None
274
-
275
-
276
- # ============================
277
- # Model load
278
  # ============================
279
- llm: Optional[Llama] = None
280
- model_path: Optional[str] = None
281
- load_error: Optional[str] = None
282
- loaded_at_ts: Optional[float] = None
283
-
284
-
285
- def load_llama() -> None:
286
- global llm, model_path, load_error, loaded_at_ts
287
-
288
- print("=== FADES startup ===", flush=True)
289
- print(f"GGUF_REPO_ID={GGUF_REPO_ID}", flush=True)
290
- print(f"GGUF_FILENAME={GGUF_FILENAME}", flush=True)
291
- print(f"N_CTX={N_CTX} N_THREADS={N_THREADS} N_BATCH={N_BATCH}", flush=True)
292
-
293
- try:
294
- t0 = time.time()
295
- mp = hf_hub_download(
296
- repo_id=GGUF_REPO_ID,
297
- filename=GGUF_FILENAME,
298
- token=os.getenv("HF_TOKEN"),
299
- )
300
- t1 = time.time()
301
- print(f"✅ GGUF downloaded: {mp} ({t1 - t0:.1f}s)", flush=True)
302
-
303
- t2 = time.time()
304
- llm_local = Llama(
305
- model_path=mp,
306
- n_ctx=N_CTX,
307
- n_threads=N_THREADS,
308
- n_batch=N_BATCH,
309
- n_gpu_layers=0,
310
- verbose=False,
311
- )
312
- t3 = time.time()
313
- print(f"✅ Model loaded: ({t3 - t2:.1f}s) n_ctx={N_CTX} threads={N_THREADS} batch={N_BATCH}", flush=True)
314
-
315
- llm = llm_local
316
- model_path = mp
317
- load_error = None
318
- loaded_at_ts = time.time()
319
- print("=== Startup OK ===", flush=True)
320
-
321
- except Exception as e:
322
- load_error = repr(e)
323
- print(f"❌ Startup FAILED: {load_error}", flush=True)
324
-
325
-
326
  @app.on_event("startup")
327
  def _startup():
328
- load_llama()
 
 
 
 
 
 
329
 
330
 
331
  @app.get("/")
@@ -335,22 +98,17 @@ def root():
335
 
336
  @app.get("/health")
337
  def health():
338
- return {
339
- "ok": llm is not None and load_error is None,
340
- "model_loaded": llm is not None,
341
- "load_error": load_error,
342
- "gguf_repo": GGUF_REPO_ID,
343
- "gguf_filename": GGUF_FILENAME,
344
- "model_path": model_path,
345
- "n_ctx": N_CTX,
346
- "n_threads": N_THREADS,
347
- "n_batch": N_BATCH,
348
- "loaded_at_ts": loaded_at_ts,
349
- }
350
 
351
 
352
  # ============================
353
- # Param selection
354
  # ============================
355
  def pick_params(req: GenParams) -> Dict[str, Any]:
356
  if req.light:
@@ -382,219 +140,47 @@ def pick_params(req: GenParams) -> Dict[str, Any]:
382
  return params
383
 
384
 
385
- # ============================
386
- # Post-processing: remove template sentence
387
- # ============================
388
- # This catches the exact sentence + small punctuation variations (case-insensitive).
389
- # Also works if the model prefixes rationales with it.
390
- _TEMPLATE_RE = re.compile(
391
- r"\bthe input contains fallacious reasoning consistent with the predicted type\(s\)\b\.?",
392
- flags=re.IGNORECASE,
393
- )
394
-
395
- def strip_template_sentence(text: str) -> str:
396
- if not isinstance(text, str):
397
- return ""
398
- out = _TEMPLATE_RE.sub("", text)
399
-
400
- # Cleanup common leftovers (double spaces, leading punctuation)
401
- out = out.replace("..", ".").strip()
402
- out = re.sub(r"\s{2,}", " ", out)
403
- out = re.sub(r"^\s*[\-–—:;,\.\s]+", "", out).strip()
404
- return out
405
-
406
-
407
- # ============================
408
- # Output sanitation / validation
409
- # ============================
410
- def _clamp01(x: Any, default: float = 0.5) -> float:
411
- try:
412
- v = float(x)
413
- except Exception:
414
- return default
415
- return 0.0 if v < 0.0 else (1.0 if v > 1.0 else v)
416
-
417
-
418
- def _is_allowed_label(lbl: Any) -> bool:
419
- return isinstance(lbl, str) and lbl in ALLOWED_LABELS and lbl != "none"
420
-
421
-
422
- def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]:
423
- has_fallacy = bool(obj.get("has_fallacy", False))
424
- fallacies_in = obj.get("fallacies", [])
425
- if not isinstance(fallacies_in, list):
426
- fallacies_in = []
427
-
428
- fallacies_out = []
429
- for f in fallacies_in:
430
- if not isinstance(f, dict):
431
- continue
432
- f_type = f.get("type")
433
- if not _is_allowed_label(f_type):
434
- continue
435
-
436
- conf = _clamp01(f.get("confidence", 0.5))
437
- conf = float(f"{conf:.2f}")
438
-
439
- ev = f.get("evidence_quotes", [])
440
- if not isinstance(ev, list):
441
- ev = []
442
-
443
- ev_clean: List[str] = []
444
- for q in ev:
445
- if not isinstance(q, str):
446
- continue
447
- qq = q.strip()
448
- if not qq:
449
- continue
450
- if qq in input_text:
451
- if len(qq) <= 240:
452
- ev_clean.append(qq)
453
- else:
454
- short = qq[:240]
455
- ev_clean.append(short if short in input_text else qq)
456
-
457
- rationale = f.get("rationale", "")
458
- rationale = strip_template_sentence(rationale.strip())
459
-
460
- fallacies_out.append(
461
- {
462
- "type": f_type,
463
- "confidence": conf,
464
- "evidence_quotes": ev_clean[:3],
465
- "rationale": rationale,
466
- }
467
- )
468
-
469
- overall = obj.get("overall_explanation", "")
470
- overall = strip_template_sentence(overall.strip())
471
-
472
- if len(fallacies_out) == 0:
473
- has_fallacy = False
474
-
475
- return {
476
- "has_fallacy": has_fallacy,
477
- "fallacies": fallacies_out,
478
- "overall_explanation": overall,
479
- }
480
-
481
-
482
- # ============================
483
- # Cached generation (task-aware)
484
- # ============================
485
- @lru_cache(maxsize=512)
486
- def _cached_chat_completion(
487
- task: str,
488
- payload: str,
489
- light: bool,
490
- max_new_tokens: int,
491
- temperature: float,
492
- top_p: float,
493
- n_batch: int,
494
- ) -> Dict[str, Any]:
495
- if llm is None:
496
- return {"ok": False, "error": "model_not_loaded", "detail": load_error}
497
-
498
- try:
499
- llm.n_batch = int(n_batch) # type: ignore[attr-defined]
500
- except Exception:
501
- pass
502
-
503
- try:
504
- data = json.loads(payload)
505
- except Exception:
506
- return {"ok": False, "error": "bad_payload"}
507
-
508
- if task == "analyze":
509
- messages = build_analyze_messages(data["text"])
510
- elif task == "rewrite":
511
- messages = build_rewrite_messages(
512
- data["text"],
513
- data["quote"],
514
- data["fallacy_type"],
515
- data["rationale"],
516
- )
517
- else:
518
- return {"ok": False, "error": "unknown_task"}
519
-
520
- out = llm.create_chat_completion(
521
- messages=messages,
522
- max_tokens=int(max_new_tokens),
523
- temperature=float(temperature),
524
- top_p=float(top_p),
525
- stream=False,
526
- )
527
-
528
- raw = out["choices"][0]["message"]["content"]
529
- obj = extract_first_json_obj(raw)
530
- if obj is None:
531
- return {"ok": False, "error": "json_parse_error", "raw": raw}
532
-
533
- return {"ok": True, "result": obj}
534
-
535
-
536
- def _occurrence_index(text: str, sub: str, occurrence: int) -> int:
537
- if occurrence < 0:
538
- return -1
539
- start = 0
540
- for _ in range(occurrence + 1):
541
- idx = text.find(sub, start)
542
- if idx == -1:
543
- return -1
544
- start = idx + max(1, len(sub))
545
- return idx
546
-
547
-
548
- def _replace_nth(text: str, old: str, new: str, occurrence: int) -> Dict[str, Any]:
549
- idx = _occurrence_index(text, old, occurrence)
550
- if idx == -1:
551
- return {"ok": False, "error": "quote_not_found"}
552
- return {
553
- "ok": True,
554
- "rewritten_text": text[:idx] + new + text[idx + len(old) :],
555
- "start_char": idx,
556
- "end_char": idx + len(new),
557
- "old_start_char": idx,
558
- "old_end_char": idx + len(old),
559
- }
560
-
561
-
562
  # ============================
563
  # Routes
564
  # ============================
565
  @app.post("/analyze")
566
  async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
567
  rid = uuid.uuid4().hex[:10]
 
568
  t0 = time.time()
569
 
570
- _log(rid, f"📩 /analyze received (light={req.light}) chars={len(req.text) if req.text else 0}")
 
 
 
 
571
 
572
- if not req.text or not req.text.strip():
573
- return {"ok": False, "error": "empty_text"}
574
 
575
- params = pick_params(req)
576
  payload = json.dumps({"text": req.text}, ensure_ascii=False)
577
 
578
- async with GEN_LOCK:
579
- t_lock = time.time()
580
- t_gen0 = time.time()
 
581
 
582
- res = _cached_chat_completion(
583
- "analyze",
584
- payload,
585
- bool(req.light),
586
- int(params["max_new_tokens"]),
587
- float(params["temperature"]),
588
- float(params["top_p"]),
589
- int(params["n_batch"]),
590
- )
591
 
592
- t_gen1 = time.time()
 
593
 
594
  elapsed_total = time.time() - t0
595
- elapsed_lock = time.time() - t_lock
596
 
597
  if not res.get("ok"):
 
598
  return {
599
  **res,
600
  "meta": {
@@ -610,8 +196,10 @@ async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
610
  },
611
  }
612
 
613
- clean = sanitize_analyze_output(res["result"], req.text)
 
614
 
 
615
  return {
616
  "ok": True,
617
  "result": clean,
@@ -636,22 +224,30 @@ async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
636
  @app.post("/rewrite")
637
  async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
638
  rid = uuid.uuid4().hex[:10]
 
639
  t0 = time.time()
640
 
641
- if not req.text or not req.text.strip():
642
- return {"ok": False, "error": "empty_text"}
643
- if not req.quote or not req.quote.strip():
644
- return {"ok": False, "error": "empty_quote"}
 
 
 
 
 
645
 
646
  quote = req.quote.strip()
647
  occurrence = int(req.occurrence or 0)
648
 
649
- if _occurrence_index(req.text, quote, occurrence) == -1:
650
- return {"ok": False, "error": "quote_not_found", "detail": {"occurrence": occurrence}}
 
651
 
652
- params = pick_params(req)
653
- if req.light and req.max_new_tokens is None:
654
- params["max_new_tokens"] = max(params["max_new_tokens"], 80)
 
655
 
656
  payload = json.dumps(
657
  {
@@ -663,26 +259,27 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
663
  ensure_ascii=False,
664
  )
665
 
666
- async with GEN_LOCK:
667
- t_lock = time.time()
668
- t_gen0 = time.time()
 
669
 
670
- res = _cached_chat_completion(
671
- "rewrite",
672
- payload,
673
- bool(req.light),
674
- int(params["max_new_tokens"]),
675
- float(params["temperature"]),
676
- float(params["top_p"]),
677
- int(params["n_batch"]),
678
- )
679
 
680
- t_gen1 = time.time()
 
681
 
682
  elapsed_total = time.time() - t0
683
- elapsed_lock = time.time() - t_lock
684
 
685
  if not res.get("ok"):
 
686
  return {
687
  **res,
688
  "meta": {
@@ -698,25 +295,27 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
698
  },
699
  }
700
 
701
- obj = res["result"]
702
- if not isinstance(obj, dict):
703
- return {"ok": False, "error": "bad_rewrite_output"}
704
-
705
- replacement = obj.get("replacement_quote")
706
- if not isinstance(replacement, str):
707
- return {"ok": False, "error": "missing_replacement_quote", "raw": obj}
708
 
709
- replacement = replacement.strip()
710
- if not replacement:
711
- return {"ok": False, "error": "empty_replacement_quote", "raw": obj}
 
 
 
712
 
713
- why = obj.get("why_this_fix", "")
714
- why = strip_template_sentence(why.strip())
715
 
716
- rep = _replace_nth(req.text, quote, replacement, occurrence)
717
- if not rep.get("ok"):
718
- return {"ok": False, "error": rep.get("error", "replace_failed")}
 
719
 
 
720
  return {
721
  "ok": True,
722
  "result": {
 
 
1
  import os
2
  import json
3
  import time
4
  import uuid
5
  import asyncio
6
+ from typing import Any, Dict, Optional
 
 
7
 
8
  from fastapi import FastAPI
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from pydantic import BaseModel, Field
11
+
12
+ from logger_utils import StepLogger
13
+ from utils import sanitize_analyze_output, occurrence_index, replace_nth, strip_template_sentence
14
+ from model_runtime import load_llama, get_health, cached_chat_completion
15
 
16
 
17
  # ============================
 
66
  temperature: Optional[float] = None
67
  top_p: Optional[float] = None
68
 
 
69
  class AnalyzeRequest(GenParams):
70
  text: str
71
 
 
72
  class RewriteRequest(GenParams):
73
  text: str
74
  quote: str = Field(..., description="Verbatim substring that must be replaced.")
 
78
 
79
 
80
  # ============================
81
+ # Startup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  # ============================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  @app.on_event("startup")
84
  def _startup():
85
+ load_llama(
86
+ gguf_repo_id=GGUF_REPO_ID,
87
+ gguf_filename=GGUF_FILENAME,
88
+ n_ctx=N_CTX,
89
+ n_threads=N_THREADS,
90
+ n_batch=N_BATCH,
91
+ )
92
 
93
 
94
  @app.get("/")
 
98
 
99
  @app.get("/health")
100
  def health():
101
+ return get_health(
102
+ gguf_repo_id=GGUF_REPO_ID,
103
+ gguf_filename=GGUF_FILENAME,
104
+ n_ctx=N_CTX,
105
+ n_threads=N_THREADS,
106
+ n_batch=N_BATCH,
107
+ )
 
 
 
 
 
108
 
109
 
110
  # ============================
111
+ # Params selection
112
  # ============================
113
  def pick_params(req: GenParams) -> Dict[str, Any]:
114
  if req.light:
 
140
  return params
141
 
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  # ============================
144
  # Routes
145
  # ============================
146
  @app.post("/analyze")
147
  async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
148
  rid = uuid.uuid4().hex[:10]
149
+ L = StepLogger(rid, "/analyze")
150
  t0 = time.time()
151
 
152
+ L.info(f"received light={req.light} chars={len(req.text) if req.text else 0}")
153
+
154
+ with L.step("validate"):
155
+ if not req.text or not req.text.strip():
156
+ return {"ok": False, "error": "empty_text"}
157
 
158
+ with L.step("pick_params"):
159
+ params = pick_params(req)
160
 
 
161
  payload = json.dumps({"text": req.text}, ensure_ascii=False)
162
 
163
+ with L.step("generate_under_lock"):
164
+ async with GEN_LOCK:
165
+ t_lock = time.time()
166
+ t_gen0 = time.time()
167
 
168
+ res = cached_chat_completion(
169
+ "analyze",
170
+ payload,
171
+ int(params["max_new_tokens"]),
172
+ float(params["temperature"]),
173
+ float(params["top_p"]),
174
+ int(params["n_batch"]),
175
+ )
 
176
 
177
+ t_gen1 = time.time()
178
+ elapsed_lock = time.time() - t_lock
179
 
180
  elapsed_total = time.time() - t0
 
181
 
182
  if not res.get("ok"):
183
+ L.info(f"failed err={res.get('error')}")
184
  return {
185
  **res,
186
  "meta": {
 
196
  },
197
  }
198
 
199
+ with L.step("sanitize"):
200
+ clean = sanitize_analyze_output(res["result"], req.text)
201
 
202
+ L.info(f"ok fallacies={len(clean.get('fallacies', []))} total={elapsed_total:.2f}s")
203
  return {
204
  "ok": True,
205
  "result": clean,
 
224
  @app.post("/rewrite")
225
  async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
226
  rid = uuid.uuid4().hex[:10]
227
+ L = StepLogger(rid, "/rewrite")
228
  t0 = time.time()
229
 
230
+ L.info(
231
+ f"received light={req.light} text_chars={len(req.text) if req.text else 0} quote_chars={len(req.quote) if req.quote else 0}"
232
+ )
233
+
234
+ with L.step("validate"):
235
+ if not req.text or not req.text.strip():
236
+ return {"ok": False, "error": "empty_text"}
237
+ if not req.quote or not req.quote.strip():
238
+ return {"ok": False, "error": "empty_quote"}
239
 
240
  quote = req.quote.strip()
241
  occurrence = int(req.occurrence or 0)
242
 
243
+ with L.step("quote_check"):
244
+ if occurrence_index(req.text, quote, occurrence) == -1:
245
+ return {"ok": False, "error": "quote_not_found", "detail": {"occurrence": occurrence}}
246
 
247
+ with L.step("pick_params"):
248
+ params = pick_params(req)
249
+ if req.light and req.max_new_tokens is None:
250
+ params["max_new_tokens"] = max(params["max_new_tokens"], 80)
251
 
252
  payload = json.dumps(
253
  {
 
259
  ensure_ascii=False,
260
  )
261
 
262
+ with L.step("generate_under_lock"):
263
+ async with GEN_LOCK:
264
+ t_lock = time.time()
265
+ t_gen0 = time.time()
266
 
267
+ res = cached_chat_completion(
268
+ "rewrite",
269
+ payload,
270
+ int(params["max_new_tokens"]),
271
+ float(params["temperature"]),
272
+ float(params["top_p"]),
273
+ int(params["n_batch"]),
274
+ )
 
275
 
276
+ t_gen1 = time.time()
277
+ elapsed_lock = time.time() - t_lock
278
 
279
  elapsed_total = time.time() - t0
 
280
 
281
  if not res.get("ok"):
282
+ L.info(f"failed err={res.get('error')}")
283
  return {
284
  **res,
285
  "meta": {
 
295
  },
296
  }
297
 
298
+ with L.step("validate_model_output"):
299
+ obj = res["result"]
300
+ if not isinstance(obj, dict):
301
+ return {"ok": False, "error": "bad_rewrite_output"}
 
 
 
302
 
303
+ replacement = obj.get("replacement_quote")
304
+ if not isinstance(replacement, str):
305
+ return {"ok": False, "error": "missing_replacement_quote", "raw": obj}
306
+ replacement = replacement.strip()
307
+ if not replacement:
308
+ return {"ok": False, "error": "empty_replacement_quote", "raw": obj}
309
 
310
+ why = obj.get("why_this_fix", "")
311
+ why = strip_template_sentence(str(why).strip())
312
 
313
+ with L.step("replace"):
314
+ rep = replace_nth(req.text, quote, replacement, occurrence)
315
+ if not rep.get("ok"):
316
+ return {"ok": False, "error": rep.get("error", "replace_failed")}
317
 
318
+ L.info(f"ok total={elapsed_total:.2f}s")
319
  return {
320
  "ok": True,
321
  "result": {
model_runtime.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import json
4
+ from functools import lru_cache
5
+ from typing import Any, Dict, Optional
6
+
7
+ from huggingface_hub import hf_hub_download
8
+ from llama_cpp import Llama
9
+
10
+ from prompts import build_analyze_messages, build_rewrite_messages
11
+ from utils import extract_first_json_obj
12
+
13
+ llm: Optional[Llama] = None
14
+ model_path: Optional[str] = None
15
+ load_error: Optional[str] = None
16
+ loaded_at_ts: Optional[float] = None
17
+
18
+ def load_llama(
19
+ gguf_repo_id: str,
20
+ gguf_filename: str,
21
+ n_ctx: int,
22
+ n_threads: int,
23
+ n_batch: int,
24
+ ) -> None:
25
+ global llm, model_path, load_error, loaded_at_ts
26
+
27
+ print("=== FADES startup ===", flush=True)
28
+ print(f"GGUF_REPO_ID={gguf_repo_id}", flush=True)
29
+ print(f"GGUF_FILENAME={gguf_filename}", flush=True)
30
+ print(f"N_CTX={n_ctx} N_THREADS={n_threads} N_BATCH={n_batch}", flush=True)
31
+
32
+ try:
33
+ t0 = time.time()
34
+ mp = hf_hub_download(
35
+ repo_id=gguf_repo_id,
36
+ filename=gguf_filename,
37
+ token=os.getenv("HF_TOKEN"),
38
+ )
39
+ t1 = time.time()
40
+ print(f"✅ GGUF downloaded: {mp} ({t1 - t0:.1f}s)", flush=True)
41
+
42
+ t2 = time.time()
43
+ llm_local = Llama(
44
+ model_path=mp,
45
+ n_ctx=n_ctx,
46
+ n_threads=n_threads,
47
+ n_batch=n_batch,
48
+ n_gpu_layers=0,
49
+ verbose=False,
50
+ )
51
+ t3 = time.time()
52
+ print(f"✅ Model loaded: ({t3 - t2:.1f}s) n_ctx={n_ctx} threads={n_threads} batch={n_batch}", flush=True)
53
+
54
+ llm = llm_local
55
+ model_path = mp
56
+ load_error = None
57
+ loaded_at_ts = time.time()
58
+ print("=== Startup OK ===", flush=True)
59
+ except Exception as e:
60
+ load_error = repr(e)
61
+ llm = None
62
+ print(f"❌ Startup FAILED: {load_error}", flush=True)
63
+
64
+ def get_health(gguf_repo_id: str, gguf_filename: str, n_ctx: int, n_threads: int, n_batch: int) -> Dict[str, Any]:
65
+ return {
66
+ "ok": llm is not None and load_error is None,
67
+ "model_loaded": llm is not None,
68
+ "load_error": load_error,
69
+ "gguf_repo": gguf_repo_id,
70
+ "gguf_filename": gguf_filename,
71
+ "model_path": model_path,
72
+ "n_ctx": n_ctx,
73
+ "n_threads": n_threads,
74
+ "n_batch": n_batch,
75
+ "loaded_at_ts": loaded_at_ts,
76
+ }
77
+
78
+ @lru_cache(maxsize=512)
79
+ def cached_chat_completion(
80
+ task: str,
81
+ payload: str,
82
+ max_new_tokens: int,
83
+ temperature: float,
84
+ top_p: float,
85
+ n_batch: int,
86
+ ) -> Dict[str, Any]:
87
+ """
88
+ Cached llama chat completion.
89
+ NOTE: GEN_LOCK is managed by FastAPI routes (outside).
90
+ """
91
+ if llm is None:
92
+ return {"ok": False, "error": "model_not_loaded", "detail": load_error}
93
+
94
+ try:
95
+ llm.n_batch = int(n_batch) # type: ignore[attr-defined]
96
+ except Exception:
97
+ pass
98
+
99
+ try:
100
+ data = json.loads(payload)
101
+ except Exception:
102
+ return {"ok": False, "error": "bad_payload"}
103
+
104
+ if task == "analyze":
105
+ messages = build_analyze_messages(data["text"])
106
+ elif task == "rewrite":
107
+ messages = build_rewrite_messages(
108
+ data["text"],
109
+ data["quote"],
110
+ data["fallacy_type"],
111
+ data["rationale"],
112
+ )
113
+ else:
114
+ return {"ok": False, "error": "unknown_task"}
115
+
116
+ out = llm.create_chat_completion(
117
+ messages=messages,
118
+ max_tokens=int(max_new_tokens),
119
+ temperature=float(temperature),
120
+ top_p=float(top_p),
121
+ stream=False,
122
+ )
123
+
124
+ raw = out["choices"][0]["message"]["content"]
125
+ obj = extract_first_json_obj(raw)
126
+ if obj is None:
127
+ return {"ok": False, "error": "json_parse_error", "raw": raw}
128
+
129
+ return {"ok": True, "result": obj}
prompts.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List
2
+
3
+ ALLOWED_LABELS = [
4
+ "none",
5
+ "faulty generalization",
6
+ "false causality",
7
+ "circular reasoning",
8
+ "ad populum",
9
+ "ad hominem",
10
+ "fallacy of logic",
11
+ "appeal to emotion",
12
+ "false dilemma",
13
+ "equivocation",
14
+ "fallacy of extension",
15
+ "fallacy of relevance",
16
+ "fallacy of credibility",
17
+ "miscellaneous",
18
+ "intentional",
19
+ ]
20
+
21
+ LABELS_STR = ", ".join([f'"{x}"' for x in ALLOWED_LABELS])
22
+
23
+ # Stronger /analyze prompt: forces specificity and forbids the "template" sentence
24
+ ANALYZE_PROMPT = f"""You are a fallacy detection assistant.
25
+
26
+ You MUST choose labels ONLY from this list (exact string):
27
+ {LABELS_STR}
28
+
29
+ You MUST return ONLY valid JSON with this schema:
30
+ {{
31
+ "has_fallacy": boolean,
32
+ "fallacies": [
33
+ {{
34
+ "type": string,
35
+ "confidence": number,
36
+ "evidence_quotes": [string],
37
+ "rationale": string
38
+ }}
39
+ ],
40
+ "overall_explanation": string
41
+ }}
42
+
43
+ Hard rules:
44
+ - Output ONLY JSON. No markdown. No extra text.
45
+ - evidence_quotes MUST be verbatim substrings copied from the input text (no paraphrase).
46
+ - Keep each evidence quote short (prefer 1–2 sentences; max 240 chars).
47
+ - confidence MUST be a real probability between 0.0 and 1.0 (use 2 decimals).
48
+ It MUST NOT be always the same across examples. Calibrate it.
49
+ - The rationale MUST be specific to the evidence (2–4 sentences):
50
+ Explain (1) what the quote claims, (2) why that matches the fallacy label,
51
+ (3) what logical step is invalid or missing.
52
+ DO NOT use generic filler. Do NOT reuse stock phrases.
53
+ - If no fallacy: has_fallacy=false and fallacies=[] and overall_explanation explains briefly why.
54
+ INPUT:
55
+ {{text}}
56
+
57
+ OUTPUT:"""
58
+
59
+ # /rewrite prompt: returns ONLY a replacement substring for the quote (server does the replacement)
60
+ REWRITE_PROMPT = """You are rewriting a small quoted span inside a larger text.
61
+
62
+ Goal:
63
+ - You MUST propose a replacement for the QUOTE only.
64
+ - The replacement should remove the fallacious reasoning described, while keeping the same tone/style/tense/entities.
65
+ - The replacement MUST be plausible in the surrounding context and should be similar length (roughly +/- 40%).
66
+ - Do NOT change anything outside the quote. Do NOT add new facts not implied by the original.
67
+ - Do NOT introduce new fallacies.
68
+
69
+ Return ONLY valid JSON with this schema:
70
+ {
71
+ "replacement_quote": string,
72
+ "why_this_fix": string
73
+ }
74
+
75
+ Hard rules:
76
+ - Output ONLY JSON. No markdown. No extra text.
77
+ - replacement_quote should be standalone text (no surrounding quotes).
78
+ - why_this_fix: 1–3 sentences, specific.
79
+
80
+ INPUT_TEXT:
81
+ {text}
82
+
83
+ QUOTE_TO_REWRITE:
84
+ {quote}
85
+
86
+ FALLACY_TYPE:
87
+ {fallacy_type}
88
+
89
+ WHY_FALLACIOUS:
90
+ {rationale}
91
+
92
+ OUTPUT:"""
93
+
94
+
95
+ def build_analyze_messages(text: str) -> List[Dict[str, str]]:
96
+ return [
97
+ {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
98
+ {"role": "user", "content": ANALYZE_PROMPT.replace("{text}", text)},
99
+ ]
100
+
101
+
102
+ def build_rewrite_messages(text: str, quote: str, fallacy_type: str, rationale: str) -> List[Dict[str, str]]:
103
+ prompt = (
104
+ REWRITE_PROMPT
105
+ .replace("<<TEXT>>", text)
106
+ .replace("<<QUOTE>>", quote)
107
+ .replace("<<FALLACY_TYPE>>", fallacy_type)
108
+ .replace("<<RATIONALE>>", rationale)
109
+ )
110
+ return [
111
+ {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
112
+ {"role": "user", "content": prompt},
113
+ ]
utils.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from typing import Any, Dict, Optional, List
4
+ from prompts import ALLOWED_LABELS
5
+
6
+ # ----------------------------
7
+ # Robust JSON extraction
8
+ # ----------------------------
9
+ def stop_at_complete_json(text: str) -> Optional[str]:
10
+ start = text.find("{")
11
+ if start == -1:
12
+ return None
13
+
14
+ depth = 0
15
+ in_str = False
16
+ esc = False
17
+
18
+ for i in range(start, len(text)):
19
+ ch = text[i]
20
+ if in_str:
21
+ if esc:
22
+ esc = False
23
+ elif ch == "\\":
24
+ esc = True
25
+ elif ch == '"':
26
+ in_str = False
27
+ continue
28
+
29
+ if ch == '"':
30
+ in_str = True
31
+ continue
32
+ if ch == "{":
33
+ depth += 1
34
+ elif ch == "}":
35
+ depth -= 1
36
+ if depth == 0:
37
+ return text[start : i + 1]
38
+ return None
39
+
40
+
41
+ def extract_first_json_obj(s: str) -> Optional[Dict[str, Any]]:
42
+ cut = stop_at_complete_json(s) or s
43
+ start = cut.find("{")
44
+ end = cut.rfind("}")
45
+ if start == -1 or end == -1 or end <= start:
46
+ return None
47
+ cand = cut[start : end + 1].strip()
48
+ try:
49
+ return json.loads(cand)
50
+ except Exception:
51
+ return None
52
+
53
+
54
+ # ----------------------------
55
+ # Post-processing: remove template sentence
56
+ # ----------------------------
57
+ _TEMPLATE_RE = re.compile(
58
+ r"\bthe input contains fallacious reasoning consistent with the predicted type\(s\)\b\.?",
59
+ flags=re.IGNORECASE,
60
+ )
61
+
62
+ def strip_template_sentence(text: str) -> str:
63
+ if not isinstance(text, str):
64
+ return ""
65
+ out = _TEMPLATE_RE.sub("", text)
66
+ out = out.replace("..", ".").strip()
67
+ out = re.sub(r"\s{2,}", " ", out)
68
+ out = re.sub(r"^\s*[\-–—:;,\.\s]+", "", out).strip()
69
+ return out
70
+
71
+
72
+ # ----------------------------
73
+ # Output sanitation / validation
74
+ # ----------------------------
75
+ def _clamp01(x: Any, default: float = 0.5) -> float:
76
+ try:
77
+ v = float(x)
78
+ except Exception:
79
+ return default
80
+ return 0.0 if v < 0.0 else (1.0 if v > 1.0 else v)
81
+
82
+
83
+ def _is_allowed_label(lbl: Any) -> bool:
84
+ return isinstance(lbl, str) and lbl in ALLOWED_LABELS and lbl != "none"
85
+
86
+
87
+ def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]:
88
+ has_fallacy = bool(obj.get("has_fallacy", False))
89
+ fallacies_in = obj.get("fallacies", [])
90
+ if not isinstance(fallacies_in, list):
91
+ fallacies_in = []
92
+
93
+ fallacies_out = []
94
+ for f in fallacies_in:
95
+ if not isinstance(f, dict):
96
+ continue
97
+ f_type = f.get("type")
98
+ if not _is_allowed_label(f_type):
99
+ continue
100
+
101
+ conf = _clamp01(f.get("confidence", 0.5))
102
+ conf = float(f"{conf:.2f}")
103
+
104
+ ev = f.get("evidence_quotes", [])
105
+ if not isinstance(ev, list):
106
+ ev = []
107
+
108
+ ev_clean: List[str] = []
109
+ for q in ev:
110
+ if not isinstance(q, str):
111
+ continue
112
+ qq = q.strip()
113
+ if not qq:
114
+ continue
115
+ if qq in input_text:
116
+ if len(qq) <= 240:
117
+ ev_clean.append(qq)
118
+ else:
119
+ short = qq[:240]
120
+ ev_clean.append(short if short in input_text else qq)
121
+
122
+ rationale = strip_template_sentence(str(f.get("rationale", "")).strip())
123
+
124
+ fallacies_out.append(
125
+ {
126
+ "type": f_type,
127
+ "confidence": conf,
128
+ "evidence_quotes": ev_clean[:3],
129
+ "rationale": rationale,
130
+ }
131
+ )
132
+
133
+ overall = strip_template_sentence(str(obj.get("overall_explanation", "")).strip())
134
+
135
+ if len(fallacies_out) == 0:
136
+ has_fallacy = False
137
+
138
+ return {
139
+ "has_fallacy": has_fallacy,
140
+ "fallacies": fallacies_out,
141
+ "overall_explanation": overall,
142
+ }
143
+
144
+
145
+ # ----------------------------
146
+ # Replace helpers
147
+ # ----------------------------
148
+ def occurrence_index(text: str, sub: str, occurrence: int) -> int:
149
+ if occurrence < 0:
150
+ return -1
151
+ start = 0
152
+ for _ in range(occurrence + 1):
153
+ idx = text.find(sub, start)
154
+ if idx == -1:
155
+ return -1
156
+ start = idx + max(1, len(sub))
157
+ return idx
158
+
159
+
160
+ def replace_nth(text: str, old: str, new: str, occurrence: int) -> Dict[str, Any]:
161
+ idx = occurrence_index(text, old, occurrence)
162
+ if idx == -1:
163
+ return {"ok": False, "error": "quote_not_found"}
164
+ return {
165
+ "ok": True,
166
+ "rewritten_text": text[:idx] + new + text[idx + len(old) :],
167
+ "start_char": idx,
168
+ "end_char": idx + len(new),
169
+ "old_start_char": idx,
170
+ "old_end_char": idx + len(old),
171
+ }