Spaces:

ST-THOMAS-OF-AQUINAS
/

document_Authenification

Runtime error

App Files Files Community

ST-THOMAS-OF-AQUINAS commited on Sep 29, 2025

Commit

47eba64

verified ·

1 Parent(s): b6bfd9e

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -12

app.py CHANGED Viewed

@@ -7,18 +7,24 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import List
-# 🔹 Ensure Transformers cache is writable (optional)
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
 os.environ["HF_HOME"] = "/tmp/hf_cache"
-# 🔹 Load tokenizer & BERT model
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
-bert_model = AutoModel.from_pretrained("distilbert-base-uncased").to(device)
-bert_model.eval()
 # 🔹 Load SVM models from `models/` folder
-MODEL_DIR = "models"
 MODEL_FILES = ["Dean of students_svm.pkl", "Registra_svm.pkl"]
 author_svms = {}
@@ -27,14 +33,19 @@ for file in MODEL_FILES:
     if not os.path.exists(path):
         raise FileNotFoundError(f"Model file not found: {path}")
     author = file.replace("_svm.pkl", "")
-    clf = joblib.load(path)
-    author_svms[author] = clf
 print(f"✅ Loaded {len(author_svms)} author models from {MODEL_DIR}")
-# 🔹 Text embedding
 def embed_text(text: str):
-    enc = tokenizer([text], return_tensors="pt", truncation=True, padding=True, max_length=256)
     enc = {k: v.to(device) for k, v in enc.items()}
     with torch.no_grad():
         outputs = bert_model(**enc)
@@ -44,13 +55,19 @@ def embed_text(text: str):
 # 🔹 Prediction function
 def predict_author(text: str):
     emb = embed_text(text)
-    predictions = {author: clf.predict(emb)[0] for author, clf in author_svms.items()}
     accepted = [author for author, pred in predictions.items() if pred == 1]
     if len(accepted) == 1:
         return accepted[0]
     elif len(accepted) > 1:
-        return accepted[0]
     else:
         return "Unknown"

 from pydantic import BaseModel
 from typing import List
+# 🔹 Ensure Transformers cache is writable
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
 os.environ["HF_HOME"] = "/tmp/hf_cache"
+os.makedirs("/tmp/hf_cache", exist_ok=True)
+# 🔹 Device setup
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# 🔹 Load tokenizer & BERT model with error handling
+try:
+    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
+    bert_model = AutoModel.from_pretrained("distilbert-base-uncased").to(device)
+    bert_model.eval()
+except Exception as e:
+    raise RuntimeError(f"Failed to load BERT model: {e}")
 # 🔹 Load SVM models from `models/` folder
+MODEL_DIR = "models/"
 MODEL_FILES = ["Dean of students_svm.pkl", "Registra_svm.pkl"]
 author_svms = {}
     if not os.path.exists(path):
         raise FileNotFoundError(f"Model file not found: {path}")
     author = file.replace("_svm.pkl", "")
+    try:
+        clf = joblib.load(path)
+        author_svms[author] = clf
+    except Exception as e:
+        raise RuntimeError(f"Failed to load SVM model {file}: {e}")
 print(f"✅ Loaded {len(author_svms)} author models from {MODEL_DIR}")
+# 🔹 Text embedding function
 def embed_text(text: str):
+    enc = tokenizer(
+        [text], return_tensors="pt", truncation=True, padding=True, max_length=256
+    )
     enc = {k: v.to(device) for k, v in enc.items()}
     with torch.no_grad():
         outputs = bert_model(**enc)
 # 🔹 Prediction function
 def predict_author(text: str):
     emb = embed_text(text)
+    predictions = {}
+    for author, clf in author_svms.items():
+        try:
+            predictions[author] = clf.predict(emb)[0]
+        except Exception as e:
+            predictions[author] = -1  # mark as failed
+            print(f"⚠️ Prediction failed for {author}: {e}")
     accepted = [author for author, pred in predictions.items() if pred == 1]
     if len(accepted) == 1:
         return accepted[0]
     elif len(accepted) > 1:
+        return accepted[0]  # pick first if multiple
     else:
         return "Unknown"