maxime-antoine-dev commited on
Commit
d0d7bc6
·
1 Parent(s): df0ce09

fixed build hf

Browse files
Dockerfile CHANGED
@@ -19,11 +19,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
19
 
20
  COPY requirements.txt /app/requirements.txt
21
 
22
- # pip tooling up-to-date helps a lot for pyproject builds
23
  RUN pip install --upgrade pip setuptools wheel \
24
  && pip install -r /app/requirements.txt
25
 
26
  COPY main.py /app/main.py
 
27
 
28
  EXPOSE 7860
29
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
19
 
20
  COPY requirements.txt /app/requirements.txt
21
 
 
22
  RUN pip install --upgrade pip setuptools wheel \
23
  && pip install -r /app/requirements.txt
24
 
25
  COPY main.py /app/main.py
26
+ COPY utils.py /app/utils.py
27
 
28
  EXPOSE 7860
29
+
30
+ # PORT is set by HF Spaces; default to 7860 locally
31
+ CMD ["bash", "-lc", "uvicorn main:app --host 0.0.0.0 --port ${PORT:-7860}"]
data/.cache/huggingface/hub/.locks/models--maxime-antoine-dev--fades-mistral-v02-gguf/bb616db9af8e0a80a6e48d6848ebadc8cff7a20bdf21c4e752c1320ca60725f6.lock ADDED
File without changes
data/.cache/huggingface/hub/models--maxime-antoine-dev--fades-mistral-v02-gguf/blobs/bb616db9af8e0a80a6e48d6848ebadc8cff7a20bdf21c4e752c1320ca60725f6.incomplete ADDED
File without changes
data/.cache/huggingface/hub/models--maxime-antoine-dev--fades-mistral-v02-gguf/refs/main ADDED
@@ -0,0 +1 @@
 
 
1
+ 18135d5f557c580cdb31f394dc47b11be2e2e09e
main.py CHANGED
@@ -3,30 +3,66 @@ import json
3
  import time
4
  import math
5
  import asyncio
6
- import re
7
  from functools import lru_cache
8
- from typing import Any, Dict, List, Optional
9
  from fastapi.middleware.cors import CORSMiddleware
10
- import nest_asyncio
11
  import uvicorn
12
  from fastapi import FastAPI
13
  from pydantic import BaseModel
14
  from huggingface_hub import hf_hub_download
15
  from llama_cpp import Llama
16
 
17
- ENABLE_FULL_CONFIDENCE = True
18
- USE_FLASH_ATTN = True
19
- N_BATCH = 1024
20
- N_THREADS = 6
21
- N_CTX = 1024
22
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  DRIVE_CACHE_DIR = "/content/drive/MyDrive/FADES_Models_Cache"
24
- if os.path.exists("/content/drive") and not os.path.exists(DRIVE_CACHE_DIR):
25
- try: os.makedirs(DRIVE_CACHE_DIR)
26
- except: pass
27
 
28
- GGUF_REPO_ID = "maxime-antoine-dev/fades-mistral-v02-gguf"
29
- GGUF_FILENAME = "mistral_v02_fades.Q4_K_M.gguf"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  GEN_LOCK = asyncio.Lock()
31
  app = FastAPI(title="FADES Fallacy Detector API (Final)")
32
 
@@ -54,7 +90,6 @@ ALLOWED_LABELS = [
54
  "fallacy of relevance", "fallacy of credibility", "miscellaneous", "intentional"
55
  ]
56
 
57
- # mapping des premiers mots vers les labels (pour regrouper les probas)
58
  LABEL_MAPPING = {
59
  "none": ["none"],
60
  "faulty": ["faulty generalization"],
@@ -68,7 +103,6 @@ LABEL_MAPPING = {
68
  "intentional": ["intentional"]
69
  }
70
 
71
- # On ajoute des exemples (Few-Shot) pour guider le modèle
72
  ANALYZE_SYS_PROMPT = """You are a logic expert. Detect logical fallacies.
73
  OUTPUT JSON ONLY.
74
 
@@ -117,6 +151,7 @@ JSON SCHEMA:
117
  "overall_explanation": string
118
  }}
119
  """
 
120
  REWRITE_SYS_PROMPT = """You are a text editor. Rewrite to remove the fallacy.
121
  Output Format (JSON):
122
  {{
@@ -127,10 +162,9 @@ Output Format (JSON):
127
 
128
  def clean_and_repair_json(text: str) -> str:
129
  text = text.replace("```json", "").replace("```", "").strip()
130
-
131
- # 2. On cherche le premier '{'
132
  start = text.find("{")
133
- if start == -1: return text
 
134
 
135
  depth = 0
136
  for i, char in enumerate(text[start:], start=start):
@@ -139,31 +173,24 @@ def clean_and_repair_json(text: str) -> str:
139
  elif char == "}":
140
  depth -= 1
141
  if depth == 0:
142
- potential_json = text[start:i+1]
143
  try:
144
  json.loads(potential_json)
145
- return potential_json
146
- except:
147
  pass
 
148
  end = text.rfind("}")
149
  if start != -1 and end != -1:
150
- return text[start:end+1]
151
-
152
  return text
153
 
154
  def analyze_alternatives(start_index: int, top_logprobs_list: List[Dict[str, float]]) -> Dict[str, float]:
155
- """
156
- Regarde les 'top_logprobs' au moment où le label a commencé à être écrit.
157
- Retourne un dictionnaire des probabilités pour chaque FAMILLE de label.
158
- Ex: {"Ad ...": 0.8, "Faulty ...": 0.1, "None": 0.05}
159
- """
160
  if start_index < 0 or start_index >= len(top_logprobs_list):
161
  return {}
162
  candidates = top_logprobs_list[start_index]
163
 
164
- distribution = {}
165
- total_prob = 0.0
166
-
167
  for token, logprob in candidates.items():
168
  clean_tok = str(token).replace(" ", "").lower().strip()
169
  prob = math.exp(logprob)
@@ -171,7 +198,11 @@ def analyze_alternatives(start_index: int, top_logprobs_list: List[Dict[str, flo
171
  matched = False
172
  for key, group in LABEL_MAPPING.items():
173
  if clean_tok.startswith(key):
174
- group_name = f"{key.capitalize()} ({'/'.join([g.split()[-1] for g in group])})" if len(group) > 1 else group[0].title()
 
 
 
 
175
  distribution[group_name] = distribution.get(group_name, 0.0) + prob
176
  matched = True
177
  break
@@ -179,43 +210,38 @@ def analyze_alternatives(start_index: int, top_logprobs_list: List[Dict[str, flo
179
  if not matched:
180
  distribution["_other_"] = distribution.get("_other_", 0.0) + prob
181
 
182
- total_prob += prob
183
-
184
  return {k: round(v, 4) for k, v in distribution.items() if v > 0.001}
185
 
186
  def extract_label_info(target_label: str, tokens: List[str], logprobs: List[float], top_logprobs: List[Dict]) -> Dict:
187
- """Récupère la confiance spécifique ET la distribution des alternatives"""
188
- if not target_label: return {"conf": 0.0, "dist": {}}
189
 
190
  target_clean = target_label.lower().strip()
191
  current_text = ""
192
  start_index = -1
193
 
194
- # on chreche trouver où commence le label
195
  for i, token in enumerate(tokens):
196
- tok_str = str(token) if not isinstance(token, bytes) else token.decode('utf-8', errors='ignore')
197
  current_text += tok_str
198
- #oOn cherche le label s'il apparaît
199
  if target_clean in current_text.lower() and start_index == -1:
200
  start_index = max(0, i - 5)
201
- # on affine pour trouver le vrai début (souvent précédé de guillemets)
202
- # c'est approximatif mais suffisant pour choper le bon token
203
  for j in range(start_index, i + 1):
204
  t_s = str(tokens[j]).lower()
205
- # si le token commence par la première lettre du label
206
- if target_clean[0] in t_s:
207
  start_index = j
208
  break
209
  break
210
 
211
  conf = 0.0
212
- dist = {}
213
 
214
  if start_index != -1:
215
-
216
- valid = [math.exp(logprobs[k]) for k in range(start_index, min(len(logprobs), start_index+3)) if logprobs[k] is not None]
217
- conf = round(sum(valid)/len(valid), 4) if valid else 0.0
218
-
 
 
219
  if top_logprobs:
220
  dist = analyze_alternatives(start_index, top_logprobs)
221
 
@@ -223,17 +249,42 @@ def extract_label_info(target_label: str, tokens: List[str], logprobs: List[floa
223
 
224
  @lru_cache(maxsize=1)
225
  def get_model():
226
- print(f"📦 Loading Model...")
 
 
 
 
 
 
 
 
227
  try:
228
- model_path = hf_hub_download(repo_id=GGUF_REPO_ID, filename=GGUF_FILENAME, cache_dir=DRIVE_CACHE_DIR)
229
  llm = Llama(
230
- model_path=model_path, n_ctx=N_CTX, n_threads=N_THREADS, n_batch=N_BATCH, verbose=False,
231
- n_gpu_layers=-1, flash_attn=USE_FLASH_ATTN, logits_all=ENABLE_FULL_CONFIDENCE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  )
233
  return llm
234
  except Exception as e:
235
- print(f"❌ Error: {e}")
236
- raise e
237
 
238
  class AnalyzeRequest(BaseModel):
239
  text: str
@@ -262,28 +313,34 @@ async def analyze(req: AnalyzeRequest):
262
  async with GEN_LOCK:
263
  start_time = time.time()
264
  output = llm(
265
- prompt, max_tokens=req.max_new_tokens, temperature=req.temperature, top_p=0.95,
266
- repeat_penalty=1.15, stop=["</s>", "```"], echo=False, logprobs=req_logprobs
 
 
 
 
 
 
267
  )
268
  gen_time = time.time() - start_time
269
 
270
- raw_text = output['choices'][0]['text']
271
 
272
  tokens = []
273
  logprobs = []
274
  top_logprobs = []
275
 
276
- if ENABLE_FULL_CONFIDENCE and 'logprobs' in output['choices'][0]:
277
- lp_data = output['choices'][0]['logprobs']
278
- tokens = lp_data.get('tokens', [])
279
- logprobs = lp_data.get('token_logprobs', [])
280
- top_logprobs = lp_data.get('top_logprobs', [])
281
 
282
  cleaned_text = clean_and_repair_json(raw_text)
283
- result_json = {}
284
  success = False
285
  technical_confidence = 0.0
286
- label_distribution = {}
287
 
288
  try:
289
  result_json = json.loads(cleaned_text)
@@ -292,10 +349,8 @@ async def analyze(req: AnalyzeRequest):
292
  if result_json.get("has_fallacy") and result_json.get("fallacies"):
293
  for fallacy in result_json["fallacies"]:
294
  d_type = fallacy.get("type", "")
295
-
296
  if ENABLE_FULL_CONFIDENCE:
297
  info = extract_label_info(d_type, tokens, logprobs, top_logprobs)
298
-
299
  spec_conf = info["conf"]
300
  label_distribution = info["dist"]
301
 
@@ -305,11 +360,12 @@ async def analyze(req: AnalyzeRequest):
305
  declared = fallacy.get("confidence", 0.8)
306
  fallacy["confidence"] = round((declared + spec_conf) / 2, 2)
307
 
308
- if technical_confidence == 0.0: technical_confidence = spec_conf
 
309
  else:
310
- if ENABLE_FULL_CONFIDENCE:
311
- info = extract_label_info("has_fallacy", tokens, logprobs, top_logprobs)
312
- label_distribution = info["dist"]
313
 
314
  except json.JSONDecodeError:
315
  result_json = {"error": "JSON Error", "raw": raw_text}
@@ -321,8 +377,8 @@ async def analyze(req: AnalyzeRequest):
321
  "meta": {
322
  "tech_conf": technical_confidence,
323
  "distribution": label_distribution,
324
- "time": round(gen_time, 2)
325
- }
326
  }
327
 
328
  @app.post("/rewrite")
@@ -331,14 +387,22 @@ async def rewrite(req: RewriteRequest):
331
  system_prompt = REWRITE_SYS_PROMPT.format(fallacy_type=req.fallacy_type, rationale=req.rationale)
332
  prompt = f"[INST] {system_prompt}\n\nTEXT TO FIX:\n{req.text} [/INST]"
333
  async with GEN_LOCK:
334
- output = llm(prompt, max_tokens=req.max_new_tokens, temperature=0.7, repeat_penalty=1.1, stop=["</s>", "}"])
 
 
 
 
 
 
335
  try:
336
- res = json.loads(clean_and_repair_json(output['choices'][0]['text']))
337
  ok = True
338
- except:
339
- res = {"raw": output['choices'][0]['text']}
340
  ok = False
341
  return {"ok": ok, "result": res}
342
 
343
  if __name__ == "__main__":
344
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
3
  import time
4
  import math
5
  import asyncio
 
6
  from functools import lru_cache
7
+ from typing import Any, Dict, List
8
  from fastapi.middleware.cors import CORSMiddleware
 
9
  import uvicorn
10
  from fastapi import FastAPI
11
  from pydantic import BaseModel
12
  from huggingface_hub import hf_hub_download
13
  from llama_cpp import Llama
14
 
15
+ # ----------------------------
16
+ # Config (env overridable)
17
+ # ----------------------------
18
+ def _int_env(name: str, default: int) -> int:
19
+ try:
20
+ return int(os.getenv(name, str(default)))
21
+ except Exception:
22
+ return default
23
+
24
+ def _bool_env(name: str, default: bool) -> bool:
25
+ v = os.getenv(name, None)
26
+ if v is None:
27
+ return default
28
+ return v.strip().lower() in {"1", "true", "yes", "y", "on"}
29
+
30
+ ENABLE_FULL_CONFIDENCE = _bool_env("ENABLE_FULL_CONFIDENCE", True)
31
+ USE_FLASH_ATTN = _bool_env("USE_FLASH_ATTN", True)
32
+
33
+ N_BATCH = _int_env("N_BATCH", 1024)
34
+ N_THREADS = _int_env("N_THREADS", 6)
35
+ N_CTX = _int_env("N_CTX", 1024)
36
+
37
+ # For CPU builds, keep this at 0
38
+ N_GPU_LAYERS = _int_env("N_GPU_LAYERS", 0)
39
+
40
+ # ----------------------------
41
+ # Cache dir (portable)
42
+ # ----------------------------
43
+ # Colab Drive (optional)
44
  DRIVE_CACHE_DIR = "/content/drive/MyDrive/FADES_Models_Cache"
 
 
 
45
 
46
+ # HF Spaces / Docker-friendly cache (your Dockerfile sets these to /data/...)
47
+ HF_CACHE = (
48
+ os.getenv("HUGGINGFACE_HUB_CACHE")
49
+ or (os.path.join(os.getenv("HF_HOME", "/data"), ".cache", "huggingface", "hub"))
50
+ )
51
+
52
+ # Choose best available cache dir
53
+ if os.path.exists("/content/drive"):
54
+ CACHE_DIR = DRIVE_CACHE_DIR
55
+ else:
56
+ CACHE_DIR = HF_CACHE or "/tmp/hf_cache"
57
+
58
+ try:
59
+ os.makedirs(CACHE_DIR, exist_ok=True)
60
+ except Exception:
61
+ pass
62
+
63
+ GGUF_REPO_ID = os.getenv("GGUF_REPO_ID", "maxime-antoine-dev/fades-mistral-v02-gguf")
64
+ GGUF_FILENAME = os.getenv("GGUF_FILENAME", "mistral_v02_fades.Q4_K_M.gguf")
65
+
66
  GEN_LOCK = asyncio.Lock()
67
  app = FastAPI(title="FADES Fallacy Detector API (Final)")
68
 
 
90
  "fallacy of relevance", "fallacy of credibility", "miscellaneous", "intentional"
91
  ]
92
 
 
93
  LABEL_MAPPING = {
94
  "none": ["none"],
95
  "faulty": ["faulty generalization"],
 
103
  "intentional": ["intentional"]
104
  }
105
 
 
106
  ANALYZE_SYS_PROMPT = """You are a logic expert. Detect logical fallacies.
107
  OUTPUT JSON ONLY.
108
 
 
151
  "overall_explanation": string
152
  }}
153
  """
154
+
155
  REWRITE_SYS_PROMPT = """You are a text editor. Rewrite to remove the fallacy.
156
  Output Format (JSON):
157
  {{
 
162
 
163
  def clean_and_repair_json(text: str) -> str:
164
  text = text.replace("```json", "").replace("```", "").strip()
 
 
165
  start = text.find("{")
166
+ if start == -1:
167
+ return text
168
 
169
  depth = 0
170
  for i, char in enumerate(text[start:], start=start):
 
173
  elif char == "}":
174
  depth -= 1
175
  if depth == 0:
176
+ potential_json = text[start:i + 1]
177
  try:
178
  json.loads(potential_json)
179
+ return potential_json
180
+ except Exception:
181
  pass
182
+
183
  end = text.rfind("}")
184
  if start != -1 and end != -1:
185
+ return text[start:end + 1]
 
186
  return text
187
 
188
  def analyze_alternatives(start_index: int, top_logprobs_list: List[Dict[str, float]]) -> Dict[str, float]:
 
 
 
 
 
189
  if start_index < 0 or start_index >= len(top_logprobs_list):
190
  return {}
191
  candidates = top_logprobs_list[start_index]
192
 
193
+ distribution: Dict[str, float] = {}
 
 
194
  for token, logprob in candidates.items():
195
  clean_tok = str(token).replace(" ", "").lower().strip()
196
  prob = math.exp(logprob)
 
198
  matched = False
199
  for key, group in LABEL_MAPPING.items():
200
  if clean_tok.startswith(key):
201
+ group_name = (
202
+ f"{key.capitalize()} ({'/'.join([g.split()[-1] for g in group])})"
203
+ if len(group) > 1
204
+ else group[0].title()
205
+ )
206
  distribution[group_name] = distribution.get(group_name, 0.0) + prob
207
  matched = True
208
  break
 
210
  if not matched:
211
  distribution["_other_"] = distribution.get("_other_", 0.0) + prob
212
 
 
 
213
  return {k: round(v, 4) for k, v in distribution.items() if v > 0.001}
214
 
215
  def extract_label_info(target_label: str, tokens: List[str], logprobs: List[float], top_logprobs: List[Dict]) -> Dict:
216
+ if not target_label:
217
+ return {"conf": 0.0, "dist": {}}
218
 
219
  target_clean = target_label.lower().strip()
220
  current_text = ""
221
  start_index = -1
222
 
 
223
  for i, token in enumerate(tokens):
224
+ tok_str = str(token) if not isinstance(token, bytes) else token.decode("utf-8", errors="ignore")
225
  current_text += tok_str
 
226
  if target_clean in current_text.lower() and start_index == -1:
227
  start_index = max(0, i - 5)
 
 
228
  for j in range(start_index, i + 1):
229
  t_s = str(tokens[j]).lower()
230
+ if target_clean and target_clean[0] in t_s:
 
231
  start_index = j
232
  break
233
  break
234
 
235
  conf = 0.0
236
+ dist: Dict[str, float] = {}
237
 
238
  if start_index != -1:
239
+ valid = [
240
+ math.exp(logprobs[k])
241
+ for k in range(start_index, min(len(logprobs), start_index + 3))
242
+ if logprobs[k] is not None
243
+ ]
244
+ conf = round(sum(valid) / len(valid), 4) if valid else 0.0
245
  if top_logprobs:
246
  dist = analyze_alternatives(start_index, top_logprobs)
247
 
 
249
 
250
  @lru_cache(maxsize=1)
251
  def get_model():
252
+ print("📦 Loading Model...")
253
+ model_path = hf_hub_download(
254
+ repo_id=GGUF_REPO_ID,
255
+ filename=GGUF_FILENAME,
256
+ cache_dir=CACHE_DIR,
257
+ repo_type="model",
258
+ )
259
+
260
+ # Try with flash_attn + gpu layers (if supported), otherwise fallback safely (CPU)
261
  try:
 
262
  llm = Llama(
263
+ model_path=model_path,
264
+ n_ctx=N_CTX,
265
+ n_threads=N_THREADS,
266
+ n_batch=N_BATCH,
267
+ verbose=False,
268
+ n_gpu_layers=N_GPU_LAYERS,
269
+ flash_attn=USE_FLASH_ATTN,
270
+ logits_all=ENABLE_FULL_CONFIDENCE,
271
+ )
272
+ return llm
273
+ except TypeError:
274
+ # Older builds may not accept flash_attn
275
+ llm = Llama(
276
+ model_path=model_path,
277
+ n_ctx=N_CTX,
278
+ n_threads=N_THREADS,
279
+ n_batch=N_BATCH,
280
+ verbose=False,
281
+ n_gpu_layers=0,
282
+ logits_all=ENABLE_FULL_CONFIDENCE,
283
  )
284
  return llm
285
  except Exception as e:
286
+ print(f"❌ Error while loading model: {e}")
287
+ raise
288
 
289
  class AnalyzeRequest(BaseModel):
290
  text: str
 
313
  async with GEN_LOCK:
314
  start_time = time.time()
315
  output = llm(
316
+ prompt,
317
+ max_tokens=req.max_new_tokens,
318
+ temperature=req.temperature,
319
+ top_p=0.95,
320
+ repeat_penalty=1.15,
321
+ stop=["</s>", "```"],
322
+ echo=False,
323
+ logprobs=req_logprobs,
324
  )
325
  gen_time = time.time() - start_time
326
 
327
+ raw_text = output["choices"][0]["text"]
328
 
329
  tokens = []
330
  logprobs = []
331
  top_logprobs = []
332
 
333
+ if ENABLE_FULL_CONFIDENCE and "logprobs" in output["choices"][0]:
334
+ lp_data = output["choices"][0]["logprobs"]
335
+ tokens = lp_data.get("tokens", [])
336
+ logprobs = lp_data.get("token_logprobs", [])
337
+ top_logprobs = lp_data.get("top_logprobs", [])
338
 
339
  cleaned_text = clean_and_repair_json(raw_text)
340
+ result_json: Dict[str, Any] = {}
341
  success = False
342
  technical_confidence = 0.0
343
+ label_distribution: Dict[str, float] = {}
344
 
345
  try:
346
  result_json = json.loads(cleaned_text)
 
349
  if result_json.get("has_fallacy") and result_json.get("fallacies"):
350
  for fallacy in result_json["fallacies"]:
351
  d_type = fallacy.get("type", "")
 
352
  if ENABLE_FULL_CONFIDENCE:
353
  info = extract_label_info(d_type, tokens, logprobs, top_logprobs)
 
354
  spec_conf = info["conf"]
355
  label_distribution = info["dist"]
356
 
 
360
  declared = fallacy.get("confidence", 0.8)
361
  fallacy["confidence"] = round((declared + spec_conf) / 2, 2)
362
 
363
+ if technical_confidence == 0.0:
364
+ technical_confidence = spec_conf
365
  else:
366
+ if ENABLE_FULL_CONFIDENCE:
367
+ info = extract_label_info("has_fallacy", tokens, logprobs, top_logprobs)
368
+ label_distribution = info["dist"]
369
 
370
  except json.JSONDecodeError:
371
  result_json = {"error": "JSON Error", "raw": raw_text}
 
377
  "meta": {
378
  "tech_conf": technical_confidence,
379
  "distribution": label_distribution,
380
+ "time": round(gen_time, 2),
381
+ },
382
  }
383
 
384
  @app.post("/rewrite")
 
387
  system_prompt = REWRITE_SYS_PROMPT.format(fallacy_type=req.fallacy_type, rationale=req.rationale)
388
  prompt = f"[INST] {system_prompt}\n\nTEXT TO FIX:\n{req.text} [/INST]"
389
  async with GEN_LOCK:
390
+ output = llm(
391
+ prompt,
392
+ max_tokens=req.max_new_tokens,
393
+ temperature=0.7,
394
+ repeat_penalty=1.1,
395
+ stop=["</s>", "}"],
396
+ )
397
  try:
398
+ res = json.loads(clean_and_repair_json(output["choices"][0]["text"]))
399
  ok = True
400
+ except Exception:
401
+ res = {"raw": output["choices"][0]["text"]}
402
  ok = False
403
  return {"ok": ok, "result": res}
404
 
405
  if __name__ == "__main__":
406
+ # Works both locally + HF Spaces
407
+ port = _int_env("PORT", 7860)
408
+ uvicorn.run(app, host="0.0.0.0", port=port)
utils.py CHANGED
@@ -1,7 +1,17 @@
1
  import json
2
  import re
3
  from typing import Any, Dict, Optional, List
4
- from prompts import ALLOWED_LABELS
 
 
 
 
 
 
 
 
 
 
5
 
6
  # ----------------------------
7
  # Robust JSON extraction
@@ -65,7 +75,7 @@ def strip_template_sentence(text: str) -> str:
65
  out = _TEMPLATE_RE.sub("", text)
66
  out = out.replace("..", ".").strip()
67
  out = re.sub(r"\s{2,}", " ", out)
68
- out = re.sub(r"^\s*[\-–—:;,\.\s]+", "", out).strip()
69
  return out
70
 
71
 
 
1
  import json
2
  import re
3
  from typing import Any, Dict, Optional, List
4
+
5
+ # If prompts.py doesn't exist, keep a safe fallback
6
+ try:
7
+ from prompts import ALLOWED_LABELS # type: ignore
8
+ except Exception:
9
+ ALLOWED_LABELS = [
10
+ "none", "faulty generalization", "false causality", "circular reasoning",
11
+ "ad populum", "ad hominem", "fallacy of logic", "appeal to emotion",
12
+ "false dilemma", "equivocation", "fallacy of extension",
13
+ "fallacy of relevance", "fallacy of credibility", "miscellaneous", "intentional"
14
+ ]
15
 
16
  # ----------------------------
17
  # Robust JSON extraction
 
75
  out = _TEMPLATE_RE.sub("", text)
76
  out = out.replace("..", ".").strip()
77
  out = re.sub(r"\s{2,}", " ", out)
78
+ out = re.sub(r"^\s*[\-–—:;\.\s]+", "", out).strip()
79
  return out
80
 
81