Spaces:

ST-THOMAS-OF-AQUINAS
/

document_Authenification

Runtime error

App Files Files Community

ST-THOMAS-OF-AQUINAS commited on Sep 29, 2025

Commit

8e32a8a

verified ·

1 Parent(s): 613a048

Create app.py

Browse files

Files changed (1) hide show

app.py +58 -0

app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import os
+import joblib
+import torch
+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import DistilBertTokenizerFast, DistilBertModel
+# 🔹 Config
+MODEL_DIR = "svm_models"
+# 🔹 FastAPI app
+app = FastAPI(title="Author Identification API")
+# 🔹 Load tokenizer & BERT
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
+bert_model = DistilBertModel.from_pretrained("distilbert-base-uncased").to(device)
+bert_model.eval()
+# 🔹 Load SVM models
+author_svms = {}
+for file in os.listdir(MODEL_DIR):
+    if file.endswith("_svm.pkl"):
+        author = file.replace("_svm.pkl", "")
+        clf = joblib.load(os.path.join(MODEL_DIR, file))
+        author_svms[author] = clf
+print(f"✅ Loaded {len(author_svms)} author models")
+# 🔹 Embedding function
+def embed_text(text):
+    enc = tokenizer([text], return_tensors="pt", truncation=True, padding=True, max_length=256)
+    enc = {k: v.to(device) for k, v in enc.items()}
+    with torch.no_grad():
+        outputs = bert_model(**enc)
+    pooled = outputs.last_hidden_state[:, 0, :].cpu().numpy()
+    return pooled
+# 🔹 Request schema
+class InputText(BaseModel):
+    text: str
+# 🔹 API route
+@app.post("/predict")
+def predict_author(input: InputText):
+    text = input.text
+    emb = embed_text(text)
+    scores = {}
+    for author, clf in author_svms.items():
+        pred = clf.predict(emb)[0]
+        score = clf.decision_function(emb)[0]
+        scores[author] = float(score) if pred == 1 else -9999
+    if all(s == -9999 for s in scores.values()):
+        return {"author": "Unknown", "score": None}
+    best_author = max(scores, key=scores.get)
+    return {"author": best_author, "score": round(scores[best_author], 4)}