maxime-antoine-dev commited on
Commit
1f2a732
·
1 Parent(s): 66ca5c9

increased max tokens

Browse files
Files changed (1) hide show
  1. main.py +73 -89
main.py CHANGED
@@ -4,6 +4,7 @@ import json
4
  import time
5
  import uuid
6
  import asyncio
 
7
  from typing import Any, Dict, Optional, List
8
  from functools import lru_cache
9
 
@@ -20,33 +21,27 @@ from llama_cpp import Llama
20
  GGUF_REPO_ID = os.getenv("GGUF_REPO_ID", "maxime-antoine-dev/fades-mistral-v02-gguf")
21
  GGUF_FILENAME = os.getenv("GGUF_FILENAME", "mistral_v02_fades.Q4_K_M.gguf")
22
 
23
- # Model load params (fixed once at startup)
24
  N_CTX = int(os.getenv("N_CTX", "1536"))
25
  CPU_COUNT = os.cpu_count() or 4
26
  N_THREADS = int(os.getenv("N_THREADS", str(min(8, max(1, CPU_COUNT - 1)))))
27
  N_BATCH = int(os.getenv("N_BATCH", "256"))
28
 
29
- # Default generation params ("normal")
30
  MAX_NEW_TOKENS_DEFAULT = int(os.getenv("MAX_NEW_TOKENS", "180"))
31
  TEMPERATURE_DEFAULT = float(os.getenv("TEMPERATURE", "0.0"))
32
  TOP_P_DEFAULT = float(os.getenv("TOP_P", "0.95"))
33
 
34
- # "Light" generation params
35
  LIGHT_MAX_NEW_TOKENS = int(os.getenv("LIGHT_MAX_NEW_TOKENS", "60"))
36
  LIGHT_TEMPERATURE = float(os.getenv("LIGHT_TEMPERATURE", "0.0"))
37
  LIGHT_TOP_P = float(os.getenv("LIGHT_TOP_P", "0.9"))
38
-
39
- # "Light" runtime knobs
40
  LIGHT_N_BATCH = int(os.getenv("LIGHT_N_BATCH", "64"))
41
 
42
- # One request at a time on CPU
43
  GEN_LOCK = asyncio.Lock()
44
 
45
  app = FastAPI(title="FADES Fallacy Detector (GGUF / llama.cpp)")
46
 
47
 
48
  # ============================
49
- # CORS (for browser front-ends)
50
  # ============================
51
  _CORS_ORIGINS = os.getenv("CORS_ALLOW_ORIGINS", "*").strip()
52
  if _CORS_ORIGINS == "*" or not _CORS_ORIGINS:
@@ -67,9 +62,7 @@ app.add_middleware(
67
  # Schemas
68
  # ============================
69
  class GenParams(BaseModel):
70
- # if True => use "light" parameters
71
  light: bool = False
72
- # optional overrides (applied after picking light/normal defaults)
73
  max_new_tokens: Optional[int] = None
74
  temperature: Optional[float] = None
75
  top_p: Optional[float] = None
@@ -110,7 +103,6 @@ ALLOWED_LABELS = [
110
 
111
  LABELS_STR = ", ".join([f'"{x}"' for x in ALLOWED_LABELS])
112
 
113
- # Stronger /analyze prompt: forces specificity and forbids the "template" sentence
114
  ANALYZE_PROMPT = f"""You are a fallacy detection assistant.
115
 
116
  You MUST choose labels ONLY from this list (exact string):
@@ -134,33 +126,48 @@ Hard rules:
134
  - Output ONLY JSON. No markdown. No extra text.
135
  - evidence_quotes MUST be verbatim substrings copied from the input text (no paraphrase).
136
  - Keep each evidence quote short (prefer 1–2 sentences; max 240 chars).
137
- - confidence MUST be a real probability between 0.0 and 1.0 (use 2 decimals).
138
- It MUST NOT be always the same across examples. Calibrate it:
139
  * 0.90–1.00: very explicit, unambiguous match, clear cue words.
140
  * 0.70–0.89: strong match but some ambiguity or missing premise.
141
  * 0.40–0.69: plausible but weak/partial evidence.
142
  * 0.10–0.39: very uncertain.
143
- - The rationale MUST be specific to the evidence (2–4 sentences):
144
- Explain (1) what the quote claims, (2) why that matches the fallacy label,
145
- (3) what logical step is invalid or missing.
146
- DO NOT use generic filler. Do NOT reuse stock phrases.
147
- In particular, you MUST NOT output this sentence:
 
 
 
 
 
 
 
 
 
 
 
148
  "The input contains fallacious reasoning consistent with the predicted type(s)."
149
- - overall_explanation MUST also be specific (2–5 sentences): summarize the reasoning issues and reference the key cue(s).
150
- - If no fallacy: has_fallacy=false and fallacies=[] and overall_explanation explains briefly why.
 
 
 
151
 
152
  INPUT:
153
  {{text}}
154
 
155
  OUTPUT:"""
156
 
157
- # /rewrite prompt: returns ONLY a replacement substring for the quote (server does the replacement)
 
158
  REWRITE_PROMPT = """You are rewriting a small quoted span inside a larger text.
159
 
160
  Goal:
161
  - You MUST propose a replacement for the QUOTE only.
162
  - The replacement should remove the fallacious reasoning described, while keeping the same tone/style/tense/entities.
163
- - The replacement MUST be plausible in the surrounding context and should be similar length (roughly +/- 40%).
164
  - Do NOT change anything outside the quote. Do NOT add new facts not implied by the original.
165
  - Do NOT introduce new fallacies.
166
 
@@ -176,16 +183,16 @@ Hard rules:
176
  - why_this_fix: 1–3 sentences, specific.
177
 
178
  INPUT_TEXT:
179
- {text}
180
 
181
  QUOTE_TO_REWRITE:
182
- {quote}
183
 
184
  FALLACY_TYPE:
185
- {fallacy_type}
186
 
187
  WHY_FALLACIOUS:
188
- {rationale}
189
 
190
  OUTPUT:"""
191
 
@@ -198,11 +205,12 @@ def build_analyze_messages(text: str) -> List[Dict[str, str]]:
198
 
199
 
200
  def build_rewrite_messages(text: str, quote: str, fallacy_type: str, rationale: str) -> List[Dict[str, str]]:
201
- prompt = REWRITE_PROMPT.format(
202
- text=text,
203
- quote=quote,
204
- fallacy_type=fallacy_type,
205
- rationale=rationale,
 
206
  )
207
  return [
208
  {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
@@ -367,7 +375,6 @@ def pick_params(req: GenParams) -> Dict[str, Any]:
367
  if req.top_p is not None:
368
  params["top_p"] = float(req.top_p)
369
 
370
- # Safety caps
371
  params["max_new_tokens"] = max(1, min(int(params["max_new_tokens"]), 400))
372
  params["temperature"] = max(0.0, min(float(params["temperature"]), 1.5))
373
  params["top_p"] = max(0.05, min(float(params["top_p"]), 1.0))
@@ -375,6 +382,28 @@ def pick_params(req: GenParams) -> Dict[str, Any]:
375
  return params
376
 
377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  # ============================
379
  # Output sanitation / validation
380
  # ============================
@@ -383,11 +412,7 @@ def _clamp01(x: Any, default: float = 0.5) -> float:
383
  v = float(x)
384
  except Exception:
385
  return default
386
- if v < 0.0:
387
- return 0.0
388
- if v > 1.0:
389
- return 1.0
390
- return v
391
 
392
 
393
  def _is_allowed_label(lbl: Any) -> bool:
@@ -395,10 +420,6 @@ def _is_allowed_label(lbl: Any) -> bool:
395
 
396
 
397
  def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]:
398
- """
399
- Enforce shape, clamp confidence, drop invalid labels,
400
- enforce evidence_quotes being substrings.
401
- """
402
  has_fallacy = bool(obj.get("has_fallacy", False))
403
  fallacies_in = obj.get("fallacies", [])
404
  if not isinstance(fallacies_in, list):
@@ -413,12 +434,12 @@ def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, A
413
  continue
414
 
415
  conf = _clamp01(f.get("confidence", 0.5))
416
- # keep 2 decimals for nicer UI
417
  conf = float(f"{conf:.2f}")
418
 
419
  ev = f.get("evidence_quotes", [])
420
  if not isinstance(ev, list):
421
  ev = []
 
422
  ev_clean: List[str] = []
423
  for q in ev:
424
  if not isinstance(q, str):
@@ -426,23 +447,15 @@ def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, A
426
  qq = q.strip()
427
  if not qq:
428
  continue
429
- # evidence MUST be substring
430
  if qq in input_text:
431
- # keep short, but don't hard-cut if it breaks substring matching
432
  if len(qq) <= 240:
433
  ev_clean.append(qq)
434
  else:
435
- # if too long, try to keep first 240 if still substring (rare); else keep as-is
436
  short = qq[:240]
437
- if short in input_text:
438
- ev_clean.append(short)
439
- else:
440
- ev_clean.append(qq)
441
 
442
- rationale = f.get("rationale")
443
- if not isinstance(rationale, str):
444
- rationale = ""
445
- rationale = rationale.strip()
446
 
447
  fallacies_out.append(
448
  {
@@ -453,12 +466,9 @@ def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, A
453
  }
454
  )
455
 
456
- overall = obj.get("overall_explanation")
457
- if not isinstance(overall, str):
458
- overall = ""
459
- overall = overall.strip()
460
 
461
- # If no fallacies survived sanitation, force no-fallacy state
462
  if len(fallacies_out) == 0:
463
  has_fallacy = False
464
 
@@ -563,18 +573,12 @@ async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
563
  return {"ok": False, "error": "empty_text"}
564
 
565
  params = pick_params(req)
566
- _log(
567
- rid,
568
- f"⚙️ Params: max_new_tokens={params['max_new_tokens']} temp={params['temperature']} top_p={params['top_p']} n_batch={params['n_batch']}",
569
- )
570
-
571
  payload = json.dumps({"text": req.text}, ensure_ascii=False)
572
 
573
  async with GEN_LOCK:
574
  t_lock = time.time()
575
-
576
- _log(rid, "🧠 Generating analyze...")
577
  t_gen0 = time.time()
 
578
  res = _cached_chat_completion(
579
  "analyze",
580
  payload,
@@ -584,13 +588,13 @@ async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
584
  float(params["top_p"]),
585
  int(params["n_batch"]),
586
  )
 
587
  t_gen1 = time.time()
588
 
589
  elapsed_total = time.time() - t0
590
  elapsed_lock = time.time() - t_lock
591
 
592
  if not res.get("ok"):
593
- _log(rid, f"❌ /analyze failed: {res.get('error')}")
594
  return {
595
  **res,
596
  "meta": {
@@ -606,10 +610,8 @@ async def analyze(req: AnalyzeRequest) -> Dict[str, Any]:
606
  },
607
  }
608
 
609
- # sanitize output for stability (substrings, labels, confidence clamp)
610
  clean = sanitize_analyze_output(res["result"], req.text)
611
 
612
- _log(rid, f"✅ /analyze ok fallacies={len(clean.get('fallacies', []))} total={elapsed_total:.2f}s")
613
  return {
614
  "ok": True,
615
  "result": clean,
@@ -636,11 +638,6 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
636
  rid = uuid.uuid4().hex[:10]
637
  t0 = time.time()
638
 
639
- _log(
640
- rid,
641
- f"📩 /rewrite received (light={req.light}) text_chars={len(req.text) if req.text else 0} quote_chars={len(req.quote) if req.quote else 0}",
642
- )
643
-
644
  if not req.text or not req.text.strip():
645
  return {"ok": False, "error": "empty_text"}
646
  if not req.quote or not req.quote.strip():
@@ -649,21 +646,13 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
649
  quote = req.quote.strip()
650
  occurrence = int(req.occurrence or 0)
651
 
652
- # validate quote existence early
653
  if _occurrence_index(req.text, quote, occurrence) == -1:
654
  return {"ok": False, "error": "quote_not_found", "detail": {"occurrence": occurrence}}
655
 
656
  params = pick_params(req)
657
- # rewrite generally needs a bit more room than light analyze if you want fluent replacements
658
- # (still controllable by request overrides)
659
  if req.light and req.max_new_tokens is None:
660
  params["max_new_tokens"] = max(params["max_new_tokens"], 80)
661
 
662
- _log(
663
- rid,
664
- f"⚙️ Params: max_new_tokens={params['max_new_tokens']} temp={params['temperature']} top_p={params['top_p']} n_batch={params['n_batch']}",
665
- )
666
-
667
  payload = json.dumps(
668
  {
669
  "text": req.text,
@@ -676,9 +665,8 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
676
 
677
  async with GEN_LOCK:
678
  t_lock = time.time()
679
-
680
- _log(rid, "🧠 Generating rewrite replacement_quote...")
681
  t_gen0 = time.time()
 
682
  res = _cached_chat_completion(
683
  "rewrite",
684
  payload,
@@ -688,13 +676,13 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
688
  float(params["top_p"]),
689
  int(params["n_batch"]),
690
  )
 
691
  t_gen1 = time.time()
692
 
693
  elapsed_total = time.time() - t0
694
  elapsed_lock = time.time() - t_lock
695
 
696
  if not res.get("ok"):
697
- _log(rid, f"❌ /rewrite failed: {res.get('error')}")
698
  return {
699
  **res,
700
  "meta": {
@@ -722,17 +710,13 @@ async def rewrite(req: RewriteRequest) -> Dict[str, Any]:
722
  if not replacement:
723
  return {"ok": False, "error": "empty_replacement_quote", "raw": obj}
724
 
725
- why = obj.get("why_this_fix")
726
- if not isinstance(why, str):
727
- why = ""
728
- why = why.strip()
729
 
730
- # server-side enforced: ONLY the quote is changed
731
  rep = _replace_nth(req.text, quote, replacement, occurrence)
732
  if not rep.get("ok"):
733
  return {"ok": False, "error": rep.get("error", "replace_failed")}
734
 
735
- _log(rid, f"✅ /rewrite ok total={elapsed_total:.2f}s")
736
  return {
737
  "ok": True,
738
  "result": {
 
4
  import time
5
  import uuid
6
  import asyncio
7
+ import re
8
  from typing import Any, Dict, Optional, List
9
  from functools import lru_cache
10
 
 
21
  GGUF_REPO_ID = os.getenv("GGUF_REPO_ID", "maxime-antoine-dev/fades-mistral-v02-gguf")
22
  GGUF_FILENAME = os.getenv("GGUF_FILENAME", "mistral_v02_fades.Q4_K_M.gguf")
23
 
 
24
  N_CTX = int(os.getenv("N_CTX", "1536"))
25
  CPU_COUNT = os.cpu_count() or 4
26
  N_THREADS = int(os.getenv("N_THREADS", str(min(8, max(1, CPU_COUNT - 1)))))
27
  N_BATCH = int(os.getenv("N_BATCH", "256"))
28
 
 
29
  MAX_NEW_TOKENS_DEFAULT = int(os.getenv("MAX_NEW_TOKENS", "180"))
30
  TEMPERATURE_DEFAULT = float(os.getenv("TEMPERATURE", "0.0"))
31
  TOP_P_DEFAULT = float(os.getenv("TOP_P", "0.95"))
32
 
 
33
  LIGHT_MAX_NEW_TOKENS = int(os.getenv("LIGHT_MAX_NEW_TOKENS", "60"))
34
  LIGHT_TEMPERATURE = float(os.getenv("LIGHT_TEMPERATURE", "0.0"))
35
  LIGHT_TOP_P = float(os.getenv("LIGHT_TOP_P", "0.9"))
 
 
36
  LIGHT_N_BATCH = int(os.getenv("LIGHT_N_BATCH", "64"))
37
 
 
38
  GEN_LOCK = asyncio.Lock()
39
 
40
  app = FastAPI(title="FADES Fallacy Detector (GGUF / llama.cpp)")
41
 
42
 
43
  # ============================
44
+ # CORS
45
  # ============================
46
  _CORS_ORIGINS = os.getenv("CORS_ALLOW_ORIGINS", "*").strip()
47
  if _CORS_ORIGINS == "*" or not _CORS_ORIGINS:
 
62
  # Schemas
63
  # ============================
64
  class GenParams(BaseModel):
 
65
  light: bool = False
 
66
  max_new_tokens: Optional[int] = None
67
  temperature: Optional[float] = None
68
  top_p: Optional[float] = None
 
103
 
104
  LABELS_STR = ", ".join([f'"{x}"' for x in ALLOWED_LABELS])
105
 
 
106
  ANALYZE_PROMPT = f"""You are a fallacy detection assistant.
107
 
108
  You MUST choose labels ONLY from this list (exact string):
 
126
  - Output ONLY JSON. No markdown. No extra text.
127
  - evidence_quotes MUST be verbatim substrings copied from the input text (no paraphrase).
128
  - Keep each evidence quote short (prefer 1–2 sentences; max 240 chars).
129
+ - confidence MUST be a real probability between 0.0 and 1.0 (use 2 decimals) and MUST vary when appropriate.
130
+ Calibrate it:
131
  * 0.90–1.00: very explicit, unambiguous match, clear cue words.
132
  * 0.70–0.89: strong match but some ambiguity or missing premise.
133
  * 0.40–0.69: plausible but weak/partial evidence.
134
  * 0.10–0.39: very uncertain.
135
+
136
+ About rationale vs overall_explanation:
137
+ - Each fallacy.rationale MUST be QUOTE-LOCAL (2–4 sentences):
138
+ (1) restate what the quote is asserting,
139
+ (2) identify the missing/invalid inference step,
140
+ (3) explain why that matches the selected fallacy label.
141
+ Mention at least one concrete cue from the quote (e.g., escalation, popularity claim, personal attack, etc.).
142
+ - overall_explanation MUST be GLOBAL and MUST NOT restate rationales sentence-by-sentence.
143
+ Instead (2–5 sentences):
144
+ (a) summarize the overall reasoning pattern(s),
145
+ (b) explain why that pattern is harmful,
146
+ (c) give plausible consequences (bad decisions, distorted debate, polarization, unjustified fear, scapegoating).
147
+
148
+ Anti-template rule:
149
+ - DO NOT use generic filler or stock phrases.
150
+ - You MUST NOT output this sentence (or close variants):
151
  "The input contains fallacious reasoning consistent with the predicted type(s)."
152
+
153
+ If no fallacy:
154
+ - has_fallacy=false
155
+ - fallacies=[]
156
+ - overall_explanation briefly explains why the reasoning is acceptable.
157
 
158
  INPUT:
159
  {{text}}
160
 
161
  OUTPUT:"""
162
 
163
+ # IMPORTANT: do NOT use .format() on a template containing JSON braces.
164
+ # Use custom tokens and .replace() to avoid KeyError.
165
  REWRITE_PROMPT = """You are rewriting a small quoted span inside a larger text.
166
 
167
  Goal:
168
  - You MUST propose a replacement for the QUOTE only.
169
  - The replacement should remove the fallacious reasoning described, while keeping the same tone/style/tense/entities.
170
+ - The replacement MUST be plausible in the surrounding context and similar length (roughly +/- 40%).
171
  - Do NOT change anything outside the quote. Do NOT add new facts not implied by the original.
172
  - Do NOT introduce new fallacies.
173
 
 
183
  - why_this_fix: 1–3 sentences, specific.
184
 
185
  INPUT_TEXT:
186
+ <<TEXT>>
187
 
188
  QUOTE_TO_REWRITE:
189
+ <<QUOTE>>
190
 
191
  FALLACY_TYPE:
192
+ <<FALLACY_TYPE>>
193
 
194
  WHY_FALLACIOUS:
195
+ <<RATIONALE>>
196
 
197
  OUTPUT:"""
198
 
 
205
 
206
 
207
  def build_rewrite_messages(text: str, quote: str, fallacy_type: str, rationale: str) -> List[Dict[str, str]]:
208
+ prompt = (
209
+ REWRITE_PROMPT
210
+ .replace("<<TEXT>>", text)
211
+ .replace("<<QUOTE>>", quote)
212
+ .replace("<<FALLACY_TYPE>>", fallacy_type)
213
+ .replace("<<RATIONALE>>", rationale)
214
  )
215
  return [
216
  {"role": "system", "content": "Return only JSON. Exactly one JSON object. No extra text."},
 
375
  if req.top_p is not None:
376
  params["top_p"] = float(req.top_p)
377
 
 
378
  params["max_new_tokens"] = max(1, min(int(params["max_new_tokens"]), 400))
379
  params["temperature"] = max(0.0, min(float(params["temperature"]), 1.5))
380
  params["top_p"] = max(0.05, min(float(params["top_p"]), 1.0))
 
382
  return params
383
 
384
 
385
+ # ============================
386
+ # Post-processing: remove template sentence
387
+ # ============================
388
+ # This catches the exact sentence + small punctuation variations (case-insensitive).
389
+ # Also works if the model prefixes rationales with it.
390
+ _TEMPLATE_RE = re.compile(
391
+ r"\bthe input contains fallacious reasoning consistent with the predicted type\(s\)\b\.?",
392
+ flags=re.IGNORECASE,
393
+ )
394
+
395
+ def strip_template_sentence(text: str) -> str:
396
+ if not isinstance(text, str):
397
+ return ""
398
+ out = _TEMPLATE_RE.sub("", text)
399
+
400
+ # Cleanup common leftovers (double spaces, leading punctuation)
401
+ out = out.replace("..", ".").strip()
402
+ out = re.sub(r"\s{2,}", " ", out)
403
+ out = re.sub(r"^\s*[\-–—:;,\.\s]+", "", out).strip()
404
+ return out
405
+
406
+
407
  # ============================
408
  # Output sanitation / validation
409
  # ============================
 
412
  v = float(x)
413
  except Exception:
414
  return default
415
+ return 0.0 if v < 0.0 else (1.0 if v > 1.0 else v)
 
 
 
 
416
 
417
 
418
  def _is_allowed_label(lbl: Any) -> bool:
 
420
 
421
 
422
  def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]:
 
 
 
 
423
  has_fallacy = bool(obj.get("has_fallacy", False))
424
  fallacies_in = obj.get("fallacies", [])
425
  if not isinstance(fallacies_in, list):
 
434
  continue
435
 
436
  conf = _clamp01(f.get("confidence", 0.5))
 
437
  conf = float(f"{conf:.2f}")
438
 
439
  ev = f.get("evidence_quotes", [])
440
  if not isinstance(ev, list):
441
  ev = []
442
+
443
  ev_clean: List[str] = []
444
  for q in ev:
445
  if not isinstance(q, str):
 
447
  qq = q.strip()
448
  if not qq:
449
  continue
 
450
  if qq in input_text:
 
451
  if len(qq) <= 240:
452
  ev_clean.append(qq)
453
  else:
 
454
  short = qq[:240]
455
+ ev_clean.append(short if short in input_text else qq)
 
 
 
456
 
457
+ rationale = f.get("rationale", "")
458
+ rationale = strip_template_sentence(rationale.strip())
 
 
459
 
460
  fallacies_out.append(
461
  {
 
466
  }
467
  )
468
 
469
+ overall = obj.get("overall_explanation", "")
470
+ overall = strip_template_sentence(overall.strip())
 
 
471
 
 
472
  if len(fallacies_out) == 0:
473
  has_fallacy = False
474
 
 
573
  return {"ok": False, "error": "empty_text"}
574
 
575
  params = pick_params(req)
 
 
 
 
 
576
  payload = json.dumps({"text": req.text}, ensure_ascii=False)
577
 
578
  async with GEN_LOCK:
579
  t_lock = time.time()
 
 
580
  t_gen0 = time.time()
581
+
582
  res = _cached_chat_completion(
583
  "analyze",
584
  payload,
 
588
  float(params["top_p"]),
589
  int(params["n_batch"]),
590
  )
591
+
592
  t_gen1 = time.time()
593
 
594
  elapsed_total = time.time() - t0
595
  elapsed_lock = time.time() - t_lock
596
 
597
  if not res.get("ok"):
 
598
  return {
599
  **res,
600
  "meta": {
 
610
  },
611
  }
612
 
 
613
  clean = sanitize_analyze_output(res["result"], req.text)
614
 
 
615
  return {
616
  "ok": True,
617
  "result": clean,
 
638
  rid = uuid.uuid4().hex[:10]
639
  t0 = time.time()
640
 
 
 
 
 
 
641
  if not req.text or not req.text.strip():
642
  return {"ok": False, "error": "empty_text"}
643
  if not req.quote or not req.quote.strip():
 
646
  quote = req.quote.strip()
647
  occurrence = int(req.occurrence or 0)
648
 
 
649
  if _occurrence_index(req.text, quote, occurrence) == -1:
650
  return {"ok": False, "error": "quote_not_found", "detail": {"occurrence": occurrence}}
651
 
652
  params = pick_params(req)
 
 
653
  if req.light and req.max_new_tokens is None:
654
  params["max_new_tokens"] = max(params["max_new_tokens"], 80)
655
 
 
 
 
 
 
656
  payload = json.dumps(
657
  {
658
  "text": req.text,
 
665
 
666
  async with GEN_LOCK:
667
  t_lock = time.time()
 
 
668
  t_gen0 = time.time()
669
+
670
  res = _cached_chat_completion(
671
  "rewrite",
672
  payload,
 
676
  float(params["top_p"]),
677
  int(params["n_batch"]),
678
  )
679
+
680
  t_gen1 = time.time()
681
 
682
  elapsed_total = time.time() - t0
683
  elapsed_lock = time.time() - t_lock
684
 
685
  if not res.get("ok"):
 
686
  return {
687
  **res,
688
  "meta": {
 
710
  if not replacement:
711
  return {"ok": False, "error": "empty_replacement_quote", "raw": obj}
712
 
713
+ why = obj.get("why_this_fix", "")
714
+ why = strip_template_sentence(why.strip())
 
 
715
 
 
716
  rep = _replace_nth(req.text, quote, replacement, occurrence)
717
  if not rep.get("ok"):
718
  return {"ok": False, "error": rep.get("error", "replace_failed")}
719
 
 
720
  return {
721
  "ok": True,
722
  "result": {