Spaces:

quangbm12
/

simple_NER

Runtime error

quangbmk66dsai02 commited on Aug 26, 2025

Commit

6090e79

1 Parent(s): 0d2e9ac

init

Files changed (4) hide show

Dockerfile ADDED Viewed

+# Base image
+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Install system deps if needed (optional, but useful for datasets)
+RUN apt-get update && apt-get install -y git
+# Copy requirements first (better caching)
+COPY requirements.txt .
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy app code
+COPY app_NER.py .
+# Expose port
+EXPOSE 7860
+# Start FastAPI with uvicorn
+CMD ["uvicorn", "app_NER:app", "--host", "0.0.0.0", "--port", "7860"]

app_NER.py ADDED Viewed

+from fastapi import FastAPI
+from pydantic import BaseModel
+import joblib
+from datasets import load_dataset
+# --- Load model and label map ---
+crf = joblib.load("crf_model.pkl")
+raw = load_dataset("DFKI-SLT/few-nerd", "supervised")
+label_map = raw['train'].features['ner_tags'].feature.int2str
+def word2features(tokens, i):
+    w = tokens[i]
+    f = {
+        'word.lower()': w.lower(),
+        'word.isupper()': w.isupper(),
+        'word.istitle()': w.istitle(),
+        'word.isdigit()': w.isdigit(),
+        'bias': 1.0,
+    }
+    if i > 0:
+        f['prev.lower()'] = tokens[i - 1].lower()
+    else:
+        f['BOS'] = True
+    if i < len(tokens) - 1:
+        f['next.lower()'] = tokens[i + 1].lower()
+    else:
+        f['EOS'] = True
+    return f
+def sentence_to_features(tokens):
+    return [word2features(tokens, i) for i in range(len(tokens))]
+# --- API schema ---
+class SentenceRequest(BaseModel):
+    tokens: list[str]
+# --- Initialize app ---
+app = FastAPI(title="NER with CRF")
+@app.post("/predict")
+def predict(req: SentenceRequest):
+    features = [sentence_to_features(req.tokens)]
+    y_pred = crf.predict(features)[0]
+    # Convert to plain Python list
+    y_pred = list(map(str, y_pred))
+    # Return JSON-serializable dict
+    return {
+        "tokens": req.tokens,
+        "predicted_labels": y_pred
+    }
+@app.post("/split")
+def split(sent):
+    tokens = sent.split()
+    return {"tokens": tokens}

crf_model.pkl ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:6be71e2c46e92d9e05dc22ace52affb88da6088518cad7e3047240fc09d26e45
+size 23248083

requirements.txt ADDED Viewed

+fastapi
+uvicorn
+joblib
+datasets
+scikit-learn
+scikit-learn-crfsuite