Spaces:

ST-THOMAS-OF-AQUINAS
/

document_Authenification

Runtime error

ST-THOMAS-OF-AQUINAS commited on Sep 29, 2025

Commit

d89415b

verified ·

1 Parent(s): 1741dfb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,29 +7,33 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import List
-# 🔹 Set device
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # 🔹 Load tokenizer & BERT model
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
 bert_model = AutoModel.from_pretrained("distilbert-base-uncased").to(device)
 bert_model.eval()
-# 🔹 Path where SVM models are stored (same directory or "models/" subfolder)
-MODEL_DIR = "./models"  # or "./models" if you put models in a subfolder
-# 🔹 Load SVM models dynamically from local directory
 author_svms = {}
-for file in os.listdir(MODEL_DIR):
-    if file.endswith("_svm.pkl"):
-        author = file.replace("_svm.pkl", "")
-        clf = joblib.load(os.path.join(MODEL_DIR, file))
-        author_svms[author] = clf
-print(f"✅ Loaded {len(author_svms)} author models")
 # 🔹 Text embedding
-def embed_text(text):
     enc = tokenizer([text], return_tensors="pt", truncation=True, padding=True, max_length=256)
     enc = {k: v.to(device) for k, v in enc.items()}
     with torch.no_grad():
@@ -38,7 +42,7 @@ def embed_text(text):
     return pooled
 # 🔹 Prediction function
-def predict_author(text):
     emb = embed_text(text)
     predictions = {author: clf.predict(emb)[0] for author, clf in author_svms.items()}

 from pydantic import BaseModel
 from typing import List
+# 🔹 Ensure Transformers cache is writable (optional)
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
+os.environ["HF_HOME"] = "/tmp/hf_cache"
 # 🔹 Load tokenizer & BERT model
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
 bert_model = AutoModel.from_pretrained("distilbert-base-uncased").to(device)
 bert_model.eval()
+# 🔹 Load SVM models from `models/` folder
+MODEL_DIR = "models"
+MODEL_FILES = ["Dean of students_svm.pkl", "Registra_svm.pkl"]
 author_svms = {}
+for file in MODEL_FILES:
+    path = os.path.join(MODEL_DIR, file)
+    if not os.path.exists(path):
+        raise FileNotFoundError(f"Model file not found: {path}")
+    author = file.replace("_svm.pkl", "")
+    clf = joblib.load(path)
+    author_svms[author] = clf
+print(f"✅ Loaded {len(author_svms)} author models from {MODEL_DIR}")
 # 🔹 Text embedding
+def embed_text(text: str):
     enc = tokenizer([text], return_tensors="pt", truncation=True, padding=True, max_length=256)
     enc = {k: v.to(device) for k, v in enc.items()}
     with torch.no_grad():
     return pooled
 # 🔹 Prediction function
+def predict_author(text: str):
     emb = embed_text(text)
     predictions = {author: clf.predict(emb)[0] for author, clf in author_svms.items()}