DEPRESSION-DETECTION
Browse files- source_code/app_utilities.py +40 -18
- source_code/requirements.txt +1 -0
source_code/app_utilities.py
CHANGED
|
@@ -38,28 +38,50 @@ def tweet_prediction(tweet: str) -> int:
|
|
| 38 |
Returns:
|
| 39 |
int: 1 for Depressive, 0 for Non-depressive.
|
| 40 |
"""
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
# Step 2: Convert text to numbers using spaCy
|
| 47 |
nlp_engine = en_core_web_lg.load()
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
# Step 4: Load the pre-trained Support Vector Machine (SVM) model artifact
|
| 57 |
-
# The SVM was selected for its robust performance in high-dimensional text classification
|
| 58 |
-
model_path = "./assets/models/model_svm1.pkl"
|
| 59 |
with open(model_path, 'rb') as model_file:
|
| 60 |
classifier = pickle.load(model_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
-
# Step
|
|
|
|
| 63 |
prediction_result = classifier.predict(semantic_vectors)
|
| 64 |
|
| 65 |
return int(prediction_result[0])
|
|
|
|
| 38 |
Returns:
|
| 39 |
int: 1 for Depressive, 0 for Non-depressive.
|
| 40 |
"""
|
| 41 |
+
# Global initialization of heavy resources to optimize runtime performance
|
| 42 |
+
# Loading these once at startup eliminates significant latency during individual requests
|
| 43 |
+
|
| 44 |
+
# 1. Load spaCy NLP engine
|
| 45 |
+
try:
|
|
|
|
| 46 |
nlp_engine = en_core_web_lg.load()
|
| 47 |
+
except Exception as e:
|
| 48 |
+
print(f"Error loading spaCy model: {e}")
|
| 49 |
+
sys.exit(1)
|
| 50 |
+
|
| 51 |
+
# 2. Load pre-trained SVM Classifier
|
| 52 |
+
model_path = "./assets/models/model_svm1.pkl"
|
| 53 |
+
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
with open(model_path, 'rb') as model_file:
|
| 55 |
classifier = pickle.load(model_file)
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"Error loading SVM model from {model_path}: {e}")
|
| 58 |
+
sys.exit(1)
|
| 59 |
+
|
| 60 |
+
def tweet_prediction(tweet: str) -> int:
|
| 61 |
+
"""
|
| 62 |
+
Takes a tweet and returns whether it's classified as depressive (1) or not (0).
|
| 63 |
+
|
| 64 |
+
The process:
|
| 65 |
+
1. Clean the text using our utility module.
|
| 66 |
+
2. Convert text to numbers using the pre-loaded spaCy engine.
|
| 67 |
+
3. Use the pre-loaded SVM model to make a prediction.
|
| 68 |
+
Args:
|
| 69 |
+
tweet (str): The tweet text from the user.
|
| 70 |
+
|
| 71 |
+
Returns:
|
| 72 |
+
int: 1 for Depressive, 0 for Non-depressive.
|
| 73 |
+
"""
|
| 74 |
+
# Step 1: Clean the text
|
| 75 |
+
cleaned_text = CU.tweets_cleaner(tweet)
|
| 76 |
+
|
| 77 |
+
# Step 2: Compute centroid word embeddings
|
| 78 |
+
# We calculate the mean vector of all tokens to represent the tweet's semantic context
|
| 79 |
+
# Note: Global 'nlp_engine' is used here, avoiding reload overhead
|
| 80 |
+
vector = np.array([token.vector for token in nlp_engine(cleaned_text)]).mean(axis=0) * np.ones((300))
|
| 81 |
+
semantic_vectors = np.array([vector])
|
| 82 |
|
| 83 |
+
# Step 3: Perform binary classification
|
| 84 |
+
# Note: Global 'classifier' is used here
|
| 85 |
prediction_result = classifier.predict(semantic_vectors)
|
| 86 |
|
| 87 |
return int(prediction_result[0])
|
source_code/requirements.txt
CHANGED
|
@@ -35,3 +35,4 @@ Flask-Bootstrap==3.3.7.1
|
|
| 35 |
Jinja2==3.0.3
|
| 36 |
Werkzeug==2.0.3
|
| 37 |
itsdangerous==2.0.1
|
|
|
|
|
|
| 35 |
Jinja2==3.0.3
|
| 36 |
Werkzeug==2.0.3
|
| 37 |
itsdangerous==2.0.1
|
| 38 |
+
|