Spaces:

maxime-antoine-dev
/

fades-api

Sleeping

App Files Files Community

maxime-antoine-dev commited on 21 days ago

Commit

d0d7bc6

1 Parent(s): df0ce09

fixed build hf

Browse files

Files changed (6) hide show

Dockerfile +4 -2
data/.cache/huggingface/hub/.locks/models--maxime-antoine-dev--fades-mistral-v02-gguf/bb616db9af8e0a80a6e48d6848ebadc8cff7a20bdf21c4e752c1320ca60725f6.lock +0 -0
data/.cache/huggingface/hub/models--maxime-antoine-dev--fades-mistral-v02-gguf/blobs/bb616db9af8e0a80a6e48d6848ebadc8cff7a20bdf21c4e752c1320ca60725f6.incomplete +0 -0
data/.cache/huggingface/hub/models--maxime-antoine-dev--fades-mistral-v02-gguf/refs/main +1 -0
main.py +142 -78
utils.py +12 -2

Dockerfile CHANGED Viewed

@@ -19,11 +19,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 COPY requirements.txt /app/requirements.txt
-# pip tooling up-to-date helps a lot for pyproject builds
 RUN pip install --upgrade pip setuptools wheel \
  && pip install -r /app/requirements.txt
 COPY main.py /app/main.py
 EXPOSE 7860
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

 COPY requirements.txt /app/requirements.txt
 RUN pip install --upgrade pip setuptools wheel \
  && pip install -r /app/requirements.txt
 COPY main.py /app/main.py
+COPY utils.py /app/utils.py
 EXPOSE 7860
+# PORT is set by HF Spaces; default to 7860 locally
+CMD ["bash", "-lc", "uvicorn main:app --host 0.0.0.0 --port ${PORT:-7860}"]

data/.cache/huggingface/hub/.locks/models--maxime-antoine-dev--fades-mistral-v02-gguf/bb616db9af8e0a80a6e48d6848ebadc8cff7a20bdf21c4e752c1320ca60725f6.lock ADDED Viewed

File without changes

data/.cache/huggingface/hub/models--maxime-antoine-dev--fades-mistral-v02-gguf/blobs/bb616db9af8e0a80a6e48d6848ebadc8cff7a20bdf21c4e752c1320ca60725f6.incomplete ADDED Viewed

File without changes

data/.cache/huggingface/hub/models--maxime-antoine-dev--fades-mistral-v02-gguf/refs/main ADDED Viewed

	@@ -0,0 +1 @@


1	+ 18135d5f557c580cdb31f394dc47b11be2e2e09e

main.py CHANGED Viewed

@@ -3,30 +3,66 @@ import json
 import time
 import math
 import asyncio
-import re
 from functools import lru_cache
-from typing import Any, Dict, List, Optional
 from fastapi.middleware.cors import CORSMiddleware
-import nest_asyncio
 import uvicorn
 from fastapi import FastAPI
 from pydantic import BaseModel
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-ENABLE_FULL_CONFIDENCE = True
-USE_FLASH_ATTN = True
-N_BATCH = 1024
-N_THREADS = 6
-N_CTX = 1024
 DRIVE_CACHE_DIR = "/content/drive/MyDrive/FADES_Models_Cache"
-if os.path.exists("/content/drive") and not os.path.exists(DRIVE_CACHE_DIR):
-    try: os.makedirs(DRIVE_CACHE_DIR)
-    except: pass
-GGUF_REPO_ID = "maxime-antoine-dev/fades-mistral-v02-gguf"
-GGUF_FILENAME = "mistral_v02_fades.Q4_K_M.gguf"
 GEN_LOCK = asyncio.Lock()
 app = FastAPI(title="FADES Fallacy Detector API (Final)")
@@ -54,7 +90,6 @@ ALLOWED_LABELS = [
     "fallacy of relevance", "fallacy of credibility", "miscellaneous", "intentional"
 ]
-# mapping des premiers mots vers les labels (pour regrouper les probas)
 LABEL_MAPPING = {
     "none": ["none"],
     "faulty": ["faulty generalization"],
@@ -68,7 +103,6 @@ LABEL_MAPPING = {
     "intentional": ["intentional"]
 }
-# On ajoute des exemples (Few-Shot) pour guider le modèle
 ANALYZE_SYS_PROMPT = """You are a logic expert. Detect logical fallacies.
 OUTPUT JSON ONLY.
@@ -117,6 +151,7 @@ JSON SCHEMA:
   "overall_explanation": string
 }}
 """
 REWRITE_SYS_PROMPT = """You are a text editor. Rewrite to remove the fallacy.
 Output Format (JSON):
 {{
@@ -127,10 +162,9 @@ Output Format (JSON):
 def clean_and_repair_json(text: str) -> str:
     text = text.replace("```json", "").replace("```", "").strip()
-    # 2. On cherche le premier '{'
     start = text.find("{")
-    if start == -1: return text
     depth = 0
     for i, char in enumerate(text[start:], start=start):
@@ -139,31 +173,24 @@ def clean_and_repair_json(text: str) -> str:
         elif char == "}":
             depth -= 1
             if depth == 0:
-                potential_json = text[start:i+1]
                 try:
                     json.loads(potential_json)
-                    return potential_json
-                except:
                     pass
     end = text.rfind("}")
     if start != -1 and end != -1:
-        return text[start:end+1]
     return text
 def analyze_alternatives(start_index: int, top_logprobs_list: List[Dict[str, float]]) -> Dict[str, float]:
-    """
-    Regarde les 'top_logprobs' au moment où le label a commencé à être écrit.
-    Retourne un dictionnaire des probabilités pour chaque FAMILLE de label.
-    Ex: {"Ad ...": 0.8, "Faulty ...": 0.1, "None": 0.05}
-    """
     if start_index < 0 or start_index >= len(top_logprobs_list):
         return {}
     candidates = top_logprobs_list[start_index]
-    distribution = {}
-    total_prob = 0.0
     for token, logprob in candidates.items():
         clean_tok = str(token).replace(" ", "").lower().strip()
         prob = math.exp(logprob)
@@ -171,7 +198,11 @@ def analyze_alternatives(start_index: int, top_logprobs_list: List[Dict[str, flo
         matched = False
         for key, group in LABEL_MAPPING.items():
             if clean_tok.startswith(key):
-                group_name = f"{key.capitalize()} ({'/'.join([g.split()[-1] for g in group])})" if len(group) > 1 else group[0].title()
                 distribution[group_name] = distribution.get(group_name, 0.0) + prob
                 matched = True
                 break
@@ -179,43 +210,38 @@ def analyze_alternatives(start_index: int, top_logprobs_list: List[Dict[str, flo
         if not matched:
             distribution["_other_"] = distribution.get("_other_", 0.0) + prob
-        total_prob += prob
     return {k: round(v, 4) for k, v in distribution.items() if v > 0.001}
 def extract_label_info(target_label: str, tokens: List[str], logprobs: List[float], top_logprobs: List[Dict]) -> Dict:
-    """Récupère la confiance spécifique ET la distribution des alternatives"""
-    if not target_label: return {"conf": 0.0, "dist": {}}
     target_clean = target_label.lower().strip()
     current_text = ""
     start_index = -1
-    # on chreche trouver où commence le label
     for i, token in enumerate(tokens):
-        tok_str = str(token) if not isinstance(token, bytes) else token.decode('utf-8', errors='ignore')
         current_text += tok_str
-        #oOn cherche le label s'il apparaît
         if target_clean in current_text.lower() and start_index == -1:
             start_index = max(0, i - 5)
-            # on affine pour trouver le vrai début (souvent précédé de guillemets)
-            # c'est approximatif mais suffisant pour choper le bon token
             for j in range(start_index, i + 1):
                 t_s = str(tokens[j]).lower()
-                # si le token commence par la première lettre du label
-                if target_clean[0] in t_s:
                     start_index = j
                     break
             break
     conf = 0.0
-    dist = {}
     if start_index != -1:
-        valid = [math.exp(logprobs[k]) for k in range(start_index, min(len(logprobs), start_index+3)) if logprobs[k] is not None]
-        conf = round(sum(valid)/len(valid), 4) if valid else 0.0
         if top_logprobs:
             dist = analyze_alternatives(start_index, top_logprobs)
@@ -223,17 +249,42 @@ def extract_label_info(target_label: str, tokens: List[str], logprobs: List[floa
 @lru_cache(maxsize=1)
 def get_model():
-    print(f"📦 Loading Model...")
     try:
-        model_path = hf_hub_download(repo_id=GGUF_REPO_ID, filename=GGUF_FILENAME, cache_dir=DRIVE_CACHE_DIR)
         llm = Llama(
-            model_path=model_path, n_ctx=N_CTX, n_threads=N_THREADS, n_batch=N_BATCH, verbose=False,
-            n_gpu_layers=-1, flash_attn=USE_FLASH_ATTN, logits_all=ENABLE_FULL_CONFIDENCE
         )
         return llm
     except Exception as e:
-        print(f"❌ Error: {e}")
-        raise e
 class AnalyzeRequest(BaseModel):
     text: str
@@ -262,28 +313,34 @@ async def analyze(req: AnalyzeRequest):
     async with GEN_LOCK:
         start_time = time.time()
         output = llm(
-            prompt, max_tokens=req.max_new_tokens, temperature=req.temperature, top_p=0.95,
-            repeat_penalty=1.15, stop=["</s>", "```"], echo=False, logprobs=req_logprobs
         )
         gen_time = time.time() - start_time
-    raw_text = output['choices'][0]['text']
     tokens = []
     logprobs = []
     top_logprobs = []
-    if ENABLE_FULL_CONFIDENCE and 'logprobs' in output['choices'][0]:
-        lp_data = output['choices'][0]['logprobs']
-        tokens = lp_data.get('tokens', [])
-        logprobs = lp_data.get('token_logprobs', [])
-        top_logprobs = lp_data.get('top_logprobs', [])
     cleaned_text = clean_and_repair_json(raw_text)
-    result_json = {}
     success = False
     technical_confidence = 0.0
-    label_distribution = {}
     try:
         result_json = json.loads(cleaned_text)
@@ -292,10 +349,8 @@ async def analyze(req: AnalyzeRequest):
         if result_json.get("has_fallacy") and result_json.get("fallacies"):
             for fallacy in result_json["fallacies"]:
                 d_type = fallacy.get("type", "")
                 if ENABLE_FULL_CONFIDENCE:
                     info = extract_label_info(d_type, tokens, logprobs, top_logprobs)
                     spec_conf = info["conf"]
                     label_distribution = info["dist"]
@@ -305,11 +360,12 @@ async def analyze(req: AnalyzeRequest):
                     declared = fallacy.get("confidence", 0.8)
                     fallacy["confidence"] = round((declared + spec_conf) / 2, 2)
-                    if technical_confidence == 0.0: technical_confidence = spec_conf
         else:
-             if ENABLE_FULL_CONFIDENCE:
-                 info = extract_label_info("has_fallacy", tokens, logprobs, top_logprobs)
-                 label_distribution = info["dist"]
     except json.JSONDecodeError:
         result_json = {"error": "JSON Error", "raw": raw_text}
@@ -321,8 +377,8 @@ async def analyze(req: AnalyzeRequest):
         "meta": {
             "tech_conf": technical_confidence,
             "distribution": label_distribution,
-            "time": round(gen_time, 2)
-        }
     }
 @app.post("/rewrite")
@@ -331,14 +387,22 @@ async def rewrite(req: RewriteRequest):
     system_prompt = REWRITE_SYS_PROMPT.format(fallacy_type=req.fallacy_type, rationale=req.rationale)
     prompt = f"[INST] {system_prompt}\n\nTEXT TO FIX:\n{req.text} [/INST]"
     async with GEN_LOCK:
-        output = llm(prompt, max_tokens=req.max_new_tokens, temperature=0.7, repeat_penalty=1.1, stop=["</s>", "}"])
     try:
-        res = json.loads(clean_and_repair_json(output['choices'][0]['text']))
         ok = True
-    except:
-        res = {"raw": output['choices'][0]['text']}
         ok = False
     return {"ok": ok, "result": res}
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 import time
 import math
 import asyncio
 from functools import lru_cache
+from typing import Any, Dict, List
 from fastapi.middleware.cors import CORSMiddleware
 import uvicorn
 from fastapi import FastAPI
 from pydantic import BaseModel
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+# ----------------------------
+# Config (env overridable)
+# ----------------------------
+def _int_env(name: str, default: int) -> int:
+    try:
+        return int(os.getenv(name, str(default)))
+    except Exception:
+        return default
+def _bool_env(name: str, default: bool) -> bool:
+    v = os.getenv(name, None)
+    if v is None:
+        return default
+    return v.strip().lower() in {"1", "true", "yes", "y", "on"}
+ENABLE_FULL_CONFIDENCE = _bool_env("ENABLE_FULL_CONFIDENCE", True)
+USE_FLASH_ATTN = _bool_env("USE_FLASH_ATTN", True)
+N_BATCH = _int_env("N_BATCH", 1024)
+N_THREADS = _int_env("N_THREADS", 6)
+N_CTX = _int_env("N_CTX", 1024)
+# For CPU builds, keep this at 0
+N_GPU_LAYERS = _int_env("N_GPU_LAYERS", 0)
+# ----------------------------
+# Cache dir (portable)
+# ----------------------------
+# Colab Drive (optional)
 DRIVE_CACHE_DIR = "/content/drive/MyDrive/FADES_Models_Cache"
+# HF Spaces / Docker-friendly cache (your Dockerfile sets these to /data/...)
+HF_CACHE = (
+    os.getenv("HUGGINGFACE_HUB_CACHE")
+    or (os.path.join(os.getenv("HF_HOME", "/data"), ".cache", "huggingface", "hub"))
+)
+# Choose best available cache dir
+if os.path.exists("/content/drive"):
+    CACHE_DIR = DRIVE_CACHE_DIR
+else:
+    CACHE_DIR = HF_CACHE or "/tmp/hf_cache"
+try:
+    os.makedirs(CACHE_DIR, exist_ok=True)
+except Exception:
+    pass
+GGUF_REPO_ID = os.getenv("GGUF_REPO_ID", "maxime-antoine-dev/fades-mistral-v02-gguf")
+GGUF_FILENAME = os.getenv("GGUF_FILENAME", "mistral_v02_fades.Q4_K_M.gguf")
 GEN_LOCK = asyncio.Lock()
 app = FastAPI(title="FADES Fallacy Detector API (Final)")
     "fallacy of relevance", "fallacy of credibility", "miscellaneous", "intentional"
 ]
 LABEL_MAPPING = {
     "none": ["none"],
     "faulty": ["faulty generalization"],
     "intentional": ["intentional"]
 }
 ANALYZE_SYS_PROMPT = """You are a logic expert. Detect logical fallacies.
 OUTPUT JSON ONLY.
   "overall_explanation": string
 }}
 """
 REWRITE_SYS_PROMPT = """You are a text editor. Rewrite to remove the fallacy.
 Output Format (JSON):
 {{
 def clean_and_repair_json(text: str) -> str:
     text = text.replace("```json", "").replace("```", "").strip()
     start = text.find("{")
+    if start == -1:
+        return text
     depth = 0
     for i, char in enumerate(text[start:], start=start):
         elif char == "}":
             depth -= 1
             if depth == 0:
+                potential_json = text[start:i + 1]
                 try:
                     json.loads(potential_json)
+                    return potential_json
+                except Exception:
                     pass
     end = text.rfind("}")
     if start != -1 and end != -1:
+        return text[start:end + 1]
     return text
 def analyze_alternatives(start_index: int, top_logprobs_list: List[Dict[str, float]]) -> Dict[str, float]:
     if start_index < 0 or start_index >= len(top_logprobs_list):
         return {}
     candidates = top_logprobs_list[start_index]
+    distribution: Dict[str, float] = {}
     for token, logprob in candidates.items():
         clean_tok = str(token).replace(" ", "").lower().strip()
         prob = math.exp(logprob)
         matched = False
         for key, group in LABEL_MAPPING.items():
             if clean_tok.startswith(key):
+                group_name = (
+                    f"{key.capitalize()} ({'/'.join([g.split()[-1] for g in group])})"
+                    if len(group) > 1
+                    else group[0].title()
+                )
                 distribution[group_name] = distribution.get(group_name, 0.0) + prob
                 matched = True
                 break
         if not matched:
             distribution["_other_"] = distribution.get("_other_", 0.0) + prob
     return {k: round(v, 4) for k, v in distribution.items() if v > 0.001}
 def extract_label_info(target_label: str, tokens: List[str], logprobs: List[float], top_logprobs: List[Dict]) -> Dict:
+    if not target_label:
+        return {"conf": 0.0, "dist": {}}
     target_clean = target_label.lower().strip()
     current_text = ""
     start_index = -1
     for i, token in enumerate(tokens):
+        tok_str = str(token) if not isinstance(token, bytes) else token.decode("utf-8", errors="ignore")
         current_text += tok_str
         if target_clean in current_text.lower() and start_index == -1:
             start_index = max(0, i - 5)
             for j in range(start_index, i + 1):
                 t_s = str(tokens[j]).lower()
+                if target_clean and target_clean[0] in t_s:
                     start_index = j
                     break
             break
     conf = 0.0
+    dist: Dict[str, float] = {}
     if start_index != -1:
+        valid = [
+            math.exp(logprobs[k])
+            for k in range(start_index, min(len(logprobs), start_index + 3))
+            if logprobs[k] is not None
+        ]
+        conf = round(sum(valid) / len(valid), 4) if valid else 0.0
         if top_logprobs:
             dist = analyze_alternatives(start_index, top_logprobs)
 @lru_cache(maxsize=1)
 def get_model():
+    print("📦 Loading Model...")
+    model_path = hf_hub_download(
+        repo_id=GGUF_REPO_ID,
+        filename=GGUF_FILENAME,
+        cache_dir=CACHE_DIR,
+        repo_type="model",
+    )
+    # Try with flash_attn + gpu layers (if supported), otherwise fallback safely (CPU)
     try:
         llm = Llama(
+            model_path=model_path,
+            n_ctx=N_CTX,
+            n_threads=N_THREADS,
+            n_batch=N_BATCH,
+            verbose=False,
+            n_gpu_layers=N_GPU_LAYERS,
+            flash_attn=USE_FLASH_ATTN,
+            logits_all=ENABLE_FULL_CONFIDENCE,
+        )
+        return llm
+    except TypeError:
+        # Older builds may not accept flash_attn
+        llm = Llama(
+            model_path=model_path,
+            n_ctx=N_CTX,
+            n_threads=N_THREADS,
+            n_batch=N_BATCH,
+            verbose=False,
+            n_gpu_layers=0,
+            logits_all=ENABLE_FULL_CONFIDENCE,
         )
         return llm
     except Exception as e:
+        print(f"❌ Error while loading model: {e}")
+        raise
 class AnalyzeRequest(BaseModel):
     text: str
     async with GEN_LOCK:
         start_time = time.time()
         output = llm(
+            prompt,
+            max_tokens=req.max_new_tokens,
+            temperature=req.temperature,
+            top_p=0.95,
+            repeat_penalty=1.15,
+            stop=["</s>", "```"],
+            echo=False,
+            logprobs=req_logprobs,
         )
         gen_time = time.time() - start_time
+    raw_text = output["choices"][0]["text"]
     tokens = []
     logprobs = []
     top_logprobs = []
+    if ENABLE_FULL_CONFIDENCE and "logprobs" in output["choices"][0]:
+        lp_data = output["choices"][0]["logprobs"]
+        tokens = lp_data.get("tokens", [])
+        logprobs = lp_data.get("token_logprobs", [])
+        top_logprobs = lp_data.get("top_logprobs", [])
     cleaned_text = clean_and_repair_json(raw_text)
+    result_json: Dict[str, Any] = {}
     success = False
     technical_confidence = 0.0
+    label_distribution: Dict[str, float] = {}
     try:
         result_json = json.loads(cleaned_text)
         if result_json.get("has_fallacy") and result_json.get("fallacies"):
             for fallacy in result_json["fallacies"]:
                 d_type = fallacy.get("type", "")
                 if ENABLE_FULL_CONFIDENCE:
                     info = extract_label_info(d_type, tokens, logprobs, top_logprobs)
                     spec_conf = info["conf"]
                     label_distribution = info["dist"]
                     declared = fallacy.get("confidence", 0.8)
                     fallacy["confidence"] = round((declared + spec_conf) / 2, 2)
+                    if technical_confidence == 0.0:
+                        technical_confidence = spec_conf
         else:
+            if ENABLE_FULL_CONFIDENCE:
+                info = extract_label_info("has_fallacy", tokens, logprobs, top_logprobs)
+                label_distribution = info["dist"]
     except json.JSONDecodeError:
         result_json = {"error": "JSON Error", "raw": raw_text}
         "meta": {
             "tech_conf": technical_confidence,
             "distribution": label_distribution,
+            "time": round(gen_time, 2),
+        },
     }
 @app.post("/rewrite")
     system_prompt = REWRITE_SYS_PROMPT.format(fallacy_type=req.fallacy_type, rationale=req.rationale)
     prompt = f"[INST] {system_prompt}\n\nTEXT TO FIX:\n{req.text} [/INST]"
     async with GEN_LOCK:
+        output = llm(
+            prompt,
+            max_tokens=req.max_new_tokens,
+            temperature=0.7,
+            repeat_penalty=1.1,
+            stop=["</s>", "}"],
+        )
     try:
+        res = json.loads(clean_and_repair_json(output["choices"][0]["text"]))
         ok = True
+    except Exception:
+        res = {"raw": output["choices"][0]["text"]}
         ok = False
     return {"ok": ok, "result": res}
 if __name__ == "__main__":
+    # Works both locally + HF Spaces
+    port = _int_env("PORT", 7860)
+    uvicorn.run(app, host="0.0.0.0", port=port)

utils.py CHANGED Viewed

@@ -1,7 +1,17 @@
 import json
 import re
 from typing import Any, Dict, Optional, List
-from prompts import ALLOWED_LABELS
 # ----------------------------
 # Robust JSON extraction
@@ -65,7 +75,7 @@ def strip_template_sentence(text: str) -> str:
     out = _TEMPLATE_RE.sub("", text)
     out = out.replace("..", ".").strip()
     out = re.sub(r"\s{2,}", " ", out)
-    out = re.sub(r"^\s*[\-–—:;,\.\s]+", "", out).strip()
     return out

 import json
 import re
 from typing import Any, Dict, Optional, List
+# If prompts.py doesn't exist, keep a safe fallback
+try:
+    from prompts import ALLOWED_LABELS  # type: ignore
+except Exception:
+    ALLOWED_LABELS = [
+        "none", "faulty generalization", "false causality", "circular reasoning",
+        "ad populum", "ad hominem", "fallacy of logic", "appeal to emotion",
+        "false dilemma", "equivocation", "fallacy of extension",
+        "fallacy of relevance", "fallacy of credibility", "miscellaneous", "intentional"
+    ]
 # ----------------------------
 # Robust JSON extraction
     out = _TEMPLATE_RE.sub("", text)
     out = out.replace("..", ".").strip()
     out = re.sub(r"\s{2,}", " ", out)
+    out = re.sub(r"^\s*[\-–—:;\.\s]+", "", out).strip()
     return out