| import os |
| import re |
| import pickle |
| import warnings |
| from functools import lru_cache |
|
|
| import numpy as np |
| import torch |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| from deep_translator import GoogleTranslator |
|
|
| warnings.filterwarnings("ignore") |
|
|
| CLASSES = ["anxiety", "depression", "stress"] |
|
|
|
|
| @lru_cache(maxsize=1) |
| def load_xlmr(): |
| model_id = os.getenv("HF_MODEL_ID", "AliSakr9997/Mental-XLMR-Model") |
| token = os.getenv("HF_TOKEN") |
| kwargs = {"token": token} if token else {} |
| local_dir = os.path.join(os.path.dirname(__file__), "mental_xlmr_final") |
| local_weights = any( |
| os.path.exists(os.path.join(local_dir, fname)) |
| for fname in ("pytorch_model.bin", "model.safetensors") |
| ) |
| source = local_dir if local_weights else model_id |
| tokenizer = AutoTokenizer.from_pretrained(source, **kwargs) |
| model = AutoModelForSequenceClassification.from_pretrained(source, **kwargs) |
| le_path = os.path.join(os.path.dirname(__file__), "mental_xlmr_final", "label_encoder.pkl") |
| with open(le_path, "rb") as f: |
| le = pickle.load(f) |
| model.eval() |
| return tokenizer, model, le |
|
|
|
|
| @lru_cache(maxsize=1) |
| def load_survey(): |
| scaler = pickle.load(open(os.path.join(os.path.dirname(__file__), "scaler.pkl"), "rb")) |
| weights = pickle.load(open(os.path.join(os.path.dirname(__file__), "model_weights.pkl"), "rb")) |
|
|
| def predict(x): |
| for w in weights: |
| if len(w) == 2: |
| x = np.dot(x, w[0]) + w[1] |
| x = np.maximum(0, x) |
| x = np.exp(x) / np.sum(np.exp(x)) |
| return x |
|
|
| return scaler, predict |
|
|
|
|
| def clean_text(text: str) -> str: |
| text = re.sub(r"(.)\1{2,}", r"\1\1", text) |
| text = re.sub(r"[^\w\s\u0600-\u06FF\[\]]", " ", text) |
| return re.sub(r"\s+", " ", text).strip() |
|
|
|
|
| def translate_to_en(text: str) -> str: |
| try: |
| return GoogleTranslator(source="auto", target="en").translate(text) |
| except Exception: |
| return "" |
|
|
|
|
| def predict_text(text: str) -> dict: |
| tokenizer, model, le = load_xlmr() |
| cleaned = clean_text(text) |
| text_en = translate_to_en(cleaned) |
| combined = (text_en + " [SEP] " + cleaned) if text_en else cleaned |
| inputs = tokenizer(combined, return_tensors="pt", truncation=True, max_length=192, padding=True) |
| with torch.no_grad(): |
| probs = torch.softmax(model(**inputs).logits, dim=-1).squeeze().numpy() |
| return {c: round(float(p), 4) for c, p in zip(le.classes_, probs)} |
|
|
|
|
| def predict_survey(answers: list) -> dict: |
| scaler, survey_predict = load_survey() |
| data = scaler.transform(np.array(answers).reshape(1, -1)) |
| pred = survey_predict(data)[0] |
| return { |
| "depression": round(float(pred[0]), 4), |
| "anxiety": round(float(pred[1]), 4), |
| "stress": round(float(pred[2]), 4), |
| } |
|
|
|
|
| def fuse_scores(text_s, survey_s, w_text=0.4, w_survey=0.6): |
| return {c: round(w_text * text_s[c] + w_survey * survey_s[c], 4) for c in CLASSES} |
|
|