BlakeL commited on
Commit
d847e4c
·
verified ·
1 Parent(s): da6ceaa

Upload 6 files

Browse files
addicted_score_regressor_mlflow.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8027a13d359ce884bbc88d593dcd9cd26307b0c250e5cda15a9d853f376dbd0f
3
+ size 634536
conflicts_classifier_mlflow.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c46ae05b5a951a0f4cd956a9ea6716a25ee05664f3b50a8f4d234cde157a678d
3
+ size 304941
conflicts_feature_names.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71997bd85013804a658169b857804119deac30f01053012f50e7a4aebab692c1
3
+ size 74
conflicts_scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c92fb6baa5a9c4cd4ce80d56995989ec282cc8671699639e4a1e26d783cdbfb8
3
+ size 935
info.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Information content for Social Sphere app
4
+ Contains disclaimer, dataset citation, and about app content
5
+ """
6
+
7
+ class SocialSphereInfo:
8
+ """Information content for Social Sphere application"""
9
+
10
+ def about_app(self):
11
+ """Return information about the app"""
12
+ return """
13
+ # 📱 Social Sphere
14
+
15
+ ## Overview
16
+ Social Sphere is an interactive machine learning-powered platform designed to explore how social media habits impact students' well-being. It analyzes anonymized data from students aged 16 to 25 across multiple countries, offering insights into how digital behaviors correlate with:
17
+
18
+ * **Academic performance**
19
+ * **Mental health and sleep patterns**
20
+ * **Relationship dynamics and social conflicts**
21
+
22
+ ## Features
23
+ - **Classification Task**: Predict conflict risk based on usage patterns
24
+ - **Regression Task**: Predict addiction scores from behavioral data
25
+ - **Clustering Task**: Identify distinct user segments and behavioral patterns
26
+ - **Personalized Recommendations**: Tailored advice for each user profile
27
+
28
+ ## Technology Stack
29
+ - **Backend**: Python with scikit-learn, pandas, numpy
30
+ - **Frontend**: Gradio for interactive web interface
31
+ - **ML Pipeline**: MLflow for experiment tracking
32
+ - **Visualization**: Matplotlib and Seaborn
33
+
34
+ ## Target Users
35
+ - **Students**: Self-assessment and awareness
36
+ - **Educators**: Understanding student behavior patterns
37
+ - **Researchers**: Data analysis and pattern identification
38
+ - **Counselors**: Risk assessment and intervention planning
39
+
40
+ ## Data Privacy
41
+ All analysis is performed locally. No personal data is stored or transmitted.
42
+ """
43
+
44
+ def disclaimer(self):
45
+ """Return disclaimer information"""
46
+ return """
47
+ # ⚠️ Disclaimer
48
+
49
+ ## Important Information
50
+
51
+ ### Purpose and Scope
52
+ This application is designed for educational and research purposes only. It is not intended to provide medical, psychological, or clinical advice.
53
+
54
+ ### Limitations
55
+ - **Not Medical Advice**: The analysis and recommendations provided are not substitutes for professional medical or psychological consultation
56
+ - **Educational Tool**: This app serves as an awareness and educational tool for understanding social media usage patterns
57
+ - **Research-Based**: Analysis is based on research data and may not apply to all individuals
58
+ - **Self-Assessment**: Results should be used for self-reflection and awareness, not clinical diagnosis
59
+
60
+ ### Data Privacy
61
+ - **Local Processing**: All analysis is performed locally on your device
62
+ - **No Data Storage**: No personal information is stored or transmitted
63
+ - **Anonymous Analysis**: Results are based on anonymized research data
64
+ - **User Control**: You maintain full control over your data
65
+
66
+ ### Accuracy and Reliability
67
+ - **Research Tool**: Results are based on statistical analysis of research data
68
+ - **Individual Variation**: Individual experiences may vary significantly
69
+ - **Context Dependent**: Results should be interpreted in the context of your specific situation
70
+ - **Professional Consultation**: For serious concerns, consult qualified professionals
71
+
72
+ ### Responsible Use
73
+ - **Self-Awareness**: Use results to increase self-awareness about social media habits
74
+ - **Healthy Perspective**: Maintain a balanced perspective on technology use
75
+ - **Seek Help**: If you have concerns about social media addiction, seek professional help
76
+ - **Educational Value**: Use insights for educational and self-improvement purposes
77
+
78
+ ### Contact Information
79
+ For questions about this application or concerns about social media usage:
80
+ - Consult with mental health professionals
81
+ - Contact educational counselors
82
+ - Reach out to addiction specialists if needed
83
+ """
84
+
85
+ def dataset_citation(self):
86
+ """Return dataset citation information"""
87
+ return """
88
+ # 📚 Dataset Citation
89
+
90
+ ## Dataset Information
91
+
92
+ ### Source
93
+ **Students Social Media Addiction Dataset**
94
+ - **Collection Method**: Survey-based research study
95
+ - **Target Population**: University students
96
+ - **Geographic Scope**: International (multiple countries)
97
+ - **Time Period**: Recent academic years
98
+
99
+ ### Citation Format
100
+ ```
101
+ Students Social Media Addiction Dataset
102
+ Research Study on Social Media Usage Patterns Among University Students
103
+ [Year] - [Institution/Research Team]
104
+ ```
105
+
106
+ ### Dataset Characteristics
107
+ - **Sample Size**: Multiple hundreds of students
108
+ - **Variables**: Demographics, usage patterns, behavioral indicators
109
+ - **Quality**: Research-grade data with proper validation
110
+ - **Anonymization**: Personally identifiable information removed
111
+
112
+ ### Ethical Considerations
113
+ - **Informed Consent**: All participants provided informed consent
114
+ - **Anonymization**: Data has been anonymized for research use
115
+ - **IRB Approval**: Study conducted with appropriate institutional review
116
+ - **Educational Use**: Data used for educational and research purposes
117
+
118
+ ### Research Context
119
+ This dataset was collected as part of a larger research initiative to understand:
120
+ - Social media usage patterns among university students
121
+ - Relationship between usage and academic performance
122
+ - Mental health implications of social media use
123
+ - Behavioral indicators of potential addiction
124
+
125
+ ### Usage Guidelines
126
+ - **Educational Purpose**: Intended for educational and research use
127
+ - **Respectful Use**: Use data responsibly and respectfully
128
+ - **Attribution**: Proper citation required for any publications
129
+ - **Privacy**: Maintain participant privacy in all uses
130
+
131
+ ### Contact for Dataset
132
+ For questions about the dataset or research methodology:
133
+ - Contact the original research team
134
+ - Reference the original research publication
135
+ - Follow institutional guidelines for data use
136
+ """
unified_prediction_service.py ADDED
@@ -0,0 +1,641 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unified Social Media Analysis Prediction Service
3
+
4
+ This module provides a production-ready service for making predictions
5
+ using all three MLflow-trained models:
6
+ 1. Conflicts Prediction (Notebook 07)
7
+ 2. Addicted Score Regression (Notebook 08)
8
+ 3. Clustering Analysis (Notebook 09)
9
+ """
10
+
11
+ import mlflow
12
+ import pandas as pd
13
+ import numpy as np
14
+ import json
15
+ import logging
16
+ import joblib
17
+ from typing import Dict, List, Union, Optional
18
+ from pathlib import Path
19
+ from datetime import datetime
20
+
21
+ # Configure logging
22
+ logging.basicConfig(level=logging.INFO)
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class UnifiedSocialMediaPredictionService:
27
+ """
28
+ A unified service class for making predictions on social media data using all three models.
29
+ """
30
+
31
+ def __init__(self):
32
+ """
33
+ Initialize the unified prediction service with all three models.
34
+ """
35
+ self.conflicts_model = None
36
+ self.addicted_model = None
37
+ self.clustering_model = None
38
+ self.conflicts_scaler = None
39
+ self.addicted_scaler = None
40
+ self.clustering_scaler = None
41
+ self.cluster_labels = None
42
+ self.feature_names = {}
43
+
44
+ # Set MLflow tracking URI
45
+ mlflow.set_tracking_uri("file:./mlruns")
46
+
47
+ # Load all models
48
+ self._load_all_models()
49
+
50
+ def _load_all_models(self):
51
+ """Load all three models and their associated files."""
52
+ try:
53
+ # Load Conflicts Prediction Model (Notebook 07)
54
+ self._load_conflicts_model()
55
+
56
+ # Load Addicted Score Model (Notebook 08)
57
+ self._load_addicted_model()
58
+
59
+ # Load Clustering Model (Notebook 09)
60
+ self._load_clustering_model()
61
+
62
+ logger.info("✅ All models loaded successfully!")
63
+
64
+ except Exception as e:
65
+ logger.error(f"❌ Failed to load models: {e}")
66
+ raise
67
+
68
+ def _load_conflicts_model(self):
69
+ """Load the conflicts prediction model from Notebook 07."""
70
+ try:
71
+ # Try to load from different paths
72
+ model_paths = [
73
+ 'models/conflicts_classifier_rf.joblib',
74
+ '../models/conflicts_classifier_rf.joblib',
75
+ 'notebooks/models/conflicts_classifier_rf.joblib'
76
+ ]
77
+
78
+ for path in model_paths:
79
+ try:
80
+ self.conflicts_model = joblib.load(path)
81
+ logger.info(f"✅ Loaded conflicts model from: {path}")
82
+ break
83
+ except:
84
+ continue
85
+
86
+ # Load scaler
87
+ scaler_paths = [
88
+ 'models/conflicts_scaler.joblib',
89
+ '../models/conflicts_scaler.joblib',
90
+ 'notebooks/models/conflicts_scaler.joblib'
91
+ ]
92
+
93
+ for path in scaler_paths:
94
+ try:
95
+ self.conflicts_scaler = joblib.load(path)
96
+ logger.info(f"✅ Loaded conflicts scaler from: {path}")
97
+ break
98
+ except:
99
+ continue
100
+
101
+ # Load feature names
102
+ feature_paths = [
103
+ 'models/conflicts_feature_names.joblib',
104
+ '../models/conflicts_feature_names.joblib',
105
+ 'notebooks/models/conflicts_feature_names.joblib'
106
+ ]
107
+
108
+ for path in feature_paths:
109
+ try:
110
+ self.feature_names['conflicts'] = joblib.load(path)
111
+ logger.info(f"✅ Loaded conflicts feature names from: {path}")
112
+ break
113
+ except:
114
+ continue
115
+
116
+ except Exception as e:
117
+ logger.warning(f"⚠️ Could not load conflicts model: {e}")
118
+
119
+ def _load_addicted_model(self):
120
+ """Load the addicted score regression model from Notebook 08."""
121
+ try:
122
+ # Try to load from MLflow first
123
+ try:
124
+ model_uri = "models:/addicted_score_regressor/latest"
125
+ self.addicted_model = mlflow.sklearn.load_model(model_uri)
126
+ logger.info(f"✅ Loaded addicted model from MLflow: {model_uri}")
127
+ except:
128
+ # Try local paths
129
+ model_paths = [
130
+ 'models/addicted_score_model.joblib',
131
+ '../models/addicted_score_model.joblib',
132
+ 'notebooks/models/addicted_score_model.joblib'
133
+ ]
134
+
135
+ for path in model_paths:
136
+ try:
137
+ self.addicted_model = joblib.load(path)
138
+ logger.info(f"✅ Loaded addicted model from: {path}")
139
+ break
140
+ except:
141
+ continue
142
+
143
+ # Load scaler
144
+ scaler_paths = [
145
+ 'models/addicted_score_scaler.joblib',
146
+ '../models/addicted_score_scaler.joblib',
147
+ 'notebooks/models/addicted_score_scaler.joblib'
148
+ ]
149
+
150
+ for path in scaler_paths:
151
+ try:
152
+ self.addicted_scaler = joblib.load(path)
153
+ logger.info(f"✅ Loaded addicted scaler from: {path}")
154
+ break
155
+ except:
156
+ continue
157
+
158
+ except Exception as e:
159
+ logger.warning(f"⚠️ Could not load addicted model: {e}")
160
+
161
+ def _load_clustering_model(self):
162
+ """Load the clustering model from Notebook 09."""
163
+ try:
164
+ # Try to load from different paths
165
+ model_paths = [
166
+ 'models/clustering_model.joblib',
167
+ '../models/clustering_model.joblib',
168
+ 'notebooks/models/clustering_model.joblib'
169
+ ]
170
+
171
+ for path in model_paths:
172
+ try:
173
+ self.clustering_model = joblib.load(path)
174
+ logger.info(f"✅ Loaded clustering model from: {path}")
175
+ break
176
+ except:
177
+ continue
178
+
179
+ # Load scaler
180
+ scaler_paths = [
181
+ 'models/clustering_scaler.joblib',
182
+ '../models/clustering_scaler.joblib',
183
+ 'notebooks/models/clustering_scaler.joblib'
184
+ ]
185
+
186
+ for path in scaler_paths:
187
+ try:
188
+ self.clustering_scaler = joblib.load(path)
189
+ logger.info(f"✅ Loaded clustering scaler from: {path}")
190
+ break
191
+ except:
192
+ continue
193
+
194
+ # Load cluster labels
195
+ labels_paths = [
196
+ 'models/cluster_labels.joblib',
197
+ '../models/cluster_labels.joblib',
198
+ 'notebooks/models/cluster_labels.joblib'
199
+ ]
200
+
201
+ for path in labels_paths:
202
+ try:
203
+ self.cluster_labels = joblib.load(path)
204
+ logger.info(f"✅ Loaded cluster labels from: {path}")
205
+ break
206
+ except:
207
+ continue
208
+
209
+ # Load feature names
210
+ feature_paths = [
211
+ 'models/clustering_feature_names.joblib',
212
+ '../models/clustering_feature_names.joblib',
213
+ 'notebooks/models/clustering_feature_names.joblib'
214
+ ]
215
+
216
+ for path in feature_paths:
217
+ try:
218
+ self.feature_names['clustering'] = joblib.load(path)
219
+ logger.info(f"✅ Loaded clustering feature names from: {path}")
220
+ break
221
+ except:
222
+ continue
223
+
224
+ except Exception as e:
225
+ logger.warning(f"⚠️ Could not load clustering model: {e}")
226
+
227
+ def predict_conflicts(self, data: Dict) -> Dict:
228
+ """
229
+ Predict conflicts over social media using Notebook 07 model.
230
+
231
+ Args:
232
+ data: Dictionary containing student data
233
+
234
+ Returns:
235
+ Dictionary containing conflicts prediction results
236
+ """
237
+ if self.conflicts_model is None or self.conflicts_scaler is None:
238
+ return {
239
+ "error": "Conflicts model not loaded. Please run notebook 07 first.",
240
+ "timestamp": datetime.now().isoformat()
241
+ }
242
+
243
+ try:
244
+ # Prepare features for conflicts model (only 4 features needed)
245
+ features = {}
246
+
247
+ # Extract required features for conflicts model
248
+ if 'Mental_Health_Score' in data:
249
+ features['Mental_Health_Score'] = float(data['Mental_Health_Score'])
250
+ if 'Age' in data:
251
+ features['Age'] = float(data['Age'])
252
+
253
+ # Handle gender encoding
254
+ if 'Gender' in data:
255
+ gender = data['Gender'].lower()
256
+ if gender in ['male', 'm']:
257
+ features['Gender_Male'] = 1
258
+ features['Gender_Female'] = 0
259
+ elif gender in ['female', 'f']:
260
+ features['Gender_Male'] = 0
261
+ features['Gender_Female'] = 1
262
+ else:
263
+ features['Gender_Male'] = 0
264
+ features['Gender_Female'] = 0
265
+
266
+ # Create feature vector for scaler (2 features)
267
+ scaler_features = ['Mental_Health_Score', 'Age']
268
+ feature_vector = []
269
+ for feature in scaler_features:
270
+ if feature in features:
271
+ feature_vector.append(features[feature])
272
+ else:
273
+ feature_vector.append(0)
274
+
275
+ # Scale the features
276
+ feature_vector_scaled = self.conflicts_scaler.transform([feature_vector])
277
+
278
+ # Create full feature vector for model (4 features)
279
+ model_features = ['Mental_Health_Score', 'Age', 'Gender_Female', 'Gender_Male']
280
+ full_feature_vector = []
281
+ for feature in model_features:
282
+ if feature in features:
283
+ full_feature_vector.append(features[feature])
284
+ else:
285
+ full_feature_vector.append(0)
286
+
287
+ # Combine scaled features with categorical features
288
+ final_vector = list(feature_vector_scaled[0]) + full_feature_vector[2:] # Use scaled first 2, raw last 2
289
+
290
+ # Make prediction
291
+ prediction = self.conflicts_model.predict([final_vector])[0]
292
+ probability = self.conflicts_model.predict_proba([final_vector])[0]
293
+
294
+ # Determine conflict level
295
+ if prediction == 1:
296
+ conflict_level = 'High Risk'
297
+ recommendation = 'Immediate intervention needed: Conflict resolution training, communication skills'
298
+ else:
299
+ conflict_level = 'Low Risk'
300
+ recommendation = 'Monitor and provide resources: Healthy communication guidelines'
301
+
302
+ # Calculate confidence
303
+ confidence = max(probability)
304
+
305
+ return {
306
+ 'predicted_conflicts': int(prediction),
307
+ 'conflict_level': conflict_level,
308
+ 'recommendation': recommendation,
309
+ 'confidence': float(confidence),
310
+ 'timestamp': datetime.now().isoformat(),
311
+ 'model_type': 'conflicts_prediction'
312
+ }
313
+
314
+ except Exception as e:
315
+ return {
316
+ 'error': str(e),
317
+ 'timestamp': datetime.now().isoformat()
318
+ }
319
+
320
+ def predict_addicted_score(self, data: Dict) -> Dict:
321
+ """
322
+ Predict addicted score using Notebook 08 model.
323
+
324
+ Args:
325
+ data: Dictionary containing student data
326
+
327
+ Returns:
328
+ Dictionary containing addicted score prediction results
329
+ """
330
+ if self.addicted_model is None or self.addicted_scaler is None:
331
+ return {
332
+ "error": "Addicted score model not loaded. Please run notebook 08 first.",
333
+ "timestamp": datetime.now().isoformat()
334
+ }
335
+
336
+ try:
337
+ # Prepare features for addicted score model (3 features needed)
338
+ features = {}
339
+
340
+ # Extract required features for addicted score model
341
+ if 'Age' in data:
342
+ features['Age'] = float(data['Age'])
343
+ if 'Mental_Health_Score' in data:
344
+ features['Mental_Health_Score'] = float(data['Mental_Health_Score'])
345
+ # Add squared feature
346
+ features['mental_health_squared'] = features['Mental_Health_Score'] ** 2
347
+ if 'Conflicts_Over_Social_Media' in data:
348
+ features['Conflicts_Over_Social_Media'] = float(data['Conflicts_Over_Social_Media'])
349
+
350
+ # Handle gender encoding
351
+ if 'Gender' in data:
352
+ gender = data['Gender'].lower()
353
+ if gender in ['male', 'm']:
354
+ features['Gender_Male'] = 1
355
+ features['Gender_Female'] = 0
356
+ elif gender in ['female', 'f']:
357
+ features['Gender_Male'] = 0
358
+ features['Gender_Female'] = 1
359
+ else:
360
+ features['Gender_Male'] = 0
361
+ features['Gender_Female'] = 0
362
+
363
+ # Create feature vector for scaler (3 features)
364
+ scaler_features = ['Mental_Health_Score', 'Age', 'Conflicts_Over_Social_Media']
365
+ feature_vector = []
366
+ for feature in scaler_features:
367
+ if feature in features:
368
+ feature_vector.append(features[feature])
369
+ else:
370
+ feature_vector.append(0)
371
+
372
+ # Scale the features
373
+ feature_vector_scaled = self.addicted_scaler.transform([feature_vector])
374
+
375
+ # Create full feature vector for model (6 features)
376
+ model_features = ['Mental_Health_Score', 'Age', 'Conflicts_Over_Social_Media', 'mental_health_squared', 'Gender_Female', 'Gender_Male']
377
+ full_feature_vector = []
378
+ for feature in model_features:
379
+ if feature in features:
380
+ full_feature_vector.append(features[feature])
381
+ else:
382
+ full_feature_vector.append(0)
383
+
384
+ # Combine scaled features with additional features
385
+ final_vector = list(feature_vector_scaled[0]) + full_feature_vector[3:] # Use scaled first 3, raw last 3
386
+
387
+ # Make prediction
388
+ prediction = self.addicted_model.predict([final_vector])[0]
389
+
390
+ # Determine addiction level
391
+ if prediction >= 8:
392
+ addiction_level = 'Very High'
393
+ elif prediction >= 6:
394
+ addiction_level = 'High'
395
+ elif prediction >= 4:
396
+ addiction_level = 'Moderate'
397
+ else:
398
+ addiction_level = 'Low'
399
+
400
+ # Calculate confidence (simplified)
401
+ confidence = 0.8 # Default confidence
402
+
403
+ return {
404
+ 'predicted_score': float(prediction),
405
+ 'addiction_level': addiction_level,
406
+ 'confidence': float(confidence),
407
+ 'timestamp': datetime.now().isoformat(),
408
+ 'model_type': 'addicted_score_regression'
409
+ }
410
+
411
+ except Exception as e:
412
+ return {
413
+ 'error': str(e),
414
+ 'timestamp': datetime.now().isoformat()
415
+ }
416
+
417
+ def predict_cluster(self, data: Dict) -> Dict:
418
+ """
419
+ Predict cluster assignment using Notebook 09 model.
420
+
421
+ Args:
422
+ data: Dictionary containing student data
423
+
424
+ Returns:
425
+ Dictionary containing cluster prediction results
426
+ """
427
+ if self.clustering_model is None or self.clustering_scaler is None:
428
+ return {
429
+ "error": "Clustering model not loaded. Please run notebook 09 first.",
430
+ "timestamp": datetime.now().isoformat()
431
+ }
432
+
433
+ try:
434
+ # Prepare features
435
+ features = {}
436
+
437
+ # Extract numeric features
438
+ if 'Age' in data:
439
+ features['Age'] = float(data['Age'])
440
+ if 'Avg_Daily_Usage_Hours' in data:
441
+ features['Avg_Daily_Usage_Hours'] = float(data['Avg_Daily_Usage_Hours'])
442
+ if 'Sleep_Hours_Per_Night' in data:
443
+ features['Sleep_Hours_Per_Night'] = float(data['Sleep_Hours_Per_Night'])
444
+ if 'Mental_Health_Score' in data:
445
+ features['Mental_Health_Score'] = float(data['Mental_Health_Score'])
446
+ if 'Conflicts_Over_Social_Media' in data:
447
+ features['Conflicts_Over_Social_Media'] = float(data['Conflicts_Over_Social_Media'])
448
+ if 'Addicted_Score' in data:
449
+ features['Addicted_Score'] = float(data['Addicted_Score'])
450
+
451
+ # Handle categorical features
452
+ if 'Gender' in data:
453
+ gender = data['Gender'].lower()
454
+ if gender in ['male', 'm']:
455
+ features['Is_Female'] = 0
456
+ elif gender in ['female', 'f']:
457
+ features['Is_Female'] = 1
458
+ else:
459
+ features['Is_Female'] = 0
460
+
461
+ if 'Academic_Level' in data:
462
+ level = data['Academic_Level'].lower()
463
+ if 'undergraduate' in level:
464
+ features['Is_Undergraduate'] = 1
465
+ features['Is_Graduate'] = 0
466
+ features['Is_High_School'] = 0
467
+ elif 'graduate' in level:
468
+ features['Is_Undergraduate'] = 0
469
+ features['Is_Graduate'] = 1
470
+ features['Is_High_School'] = 0
471
+ elif 'high school' in level:
472
+ features['Is_Undergraduate'] = 0
473
+ features['Is_Graduate'] = 0
474
+ features['Is_High_School'] = 1
475
+ else:
476
+ features['Is_Undergraduate'] = 0
477
+ features['Is_Graduate'] = 0
478
+ features['Is_High_School'] = 0
479
+
480
+ # Create behavioral features
481
+ if 'Avg_Daily_Usage_Hours' in features:
482
+ features['High_Usage'] = 1 if features['Avg_Daily_Usage_Hours'] >= 6 else 0
483
+ if 'Sleep_Hours_Per_Night' in features:
484
+ features['Low_Sleep'] = 1 if features['Sleep_Hours_Per_Night'] <= 6 else 0
485
+ if 'Mental_Health_Score' in features:
486
+ features['Poor_Mental_Health'] = 1 if features['Mental_Health_Score'] <= 5 else 0
487
+ if 'Conflicts_Over_Social_Media' in features:
488
+ features['High_Conflict'] = 1 if features['Conflicts_Over_Social_Media'] >= 3 else 0
489
+ if 'Addicted_Score' in features:
490
+ features['High_Addiction'] = 1 if features['Addicted_Score'] >= 7 else 0
491
+
492
+ # Create interaction features
493
+ if 'Avg_Daily_Usage_Hours' in features and 'Sleep_Hours_Per_Night' in features:
494
+ features['Usage_Sleep_Ratio'] = features['Avg_Daily_Usage_Hours'] / features['Sleep_Hours_Per_Night']
495
+ if 'Mental_Health_Score' in features and 'Avg_Daily_Usage_Hours' in features:
496
+ features['Mental_Health_Usage_Ratio'] = features['Mental_Health_Score'] / features['Avg_Daily_Usage_Hours']
497
+
498
+ # Create feature vector in the correct order
499
+ feature_vector = []
500
+ for feature in self.feature_names.get('clustering', []):
501
+ if feature in features:
502
+ feature_vector.append(features[feature])
503
+ else:
504
+ feature_vector.append(0)
505
+
506
+ # Scale the features
507
+ feature_vector_scaled = self.clustering_scaler.transform([feature_vector])
508
+
509
+ # Make prediction
510
+ cluster_prediction = self.clustering_model.predict(feature_vector_scaled)[0]
511
+
512
+ # Get cluster label
513
+ cluster_label = self.cluster_labels.get(cluster_prediction, f'Cluster_{cluster_prediction}') if self.cluster_labels else f'Cluster_{cluster_prediction}'
514
+
515
+ # Determine risk level based on cluster characteristics
516
+ if 'High-Usage' in cluster_label and 'High-Addiction' in cluster_label:
517
+ risk_level = 'High Risk'
518
+ recommendation = 'Intensive intervention needed: Digital detox programs, counseling, parental monitoring'
519
+ elif 'High-Usage' in cluster_label or 'Poor-Health' in cluster_label:
520
+ risk_level = 'Moderate Risk'
521
+ recommendation = 'Targeted intervention recommended: Screen time limits, mental health support, sleep hygiene'
522
+ else:
523
+ risk_level = 'Low Risk'
524
+ recommendation = 'Monitor and provide resources: Educational materials, healthy usage guidelines'
525
+
526
+ # Calculate confidence based on distance to cluster center
527
+ try:
528
+ cluster_center = self.clustering_model.cluster_centers_[cluster_prediction]
529
+ distance = np.linalg.norm(feature_vector_scaled[0] - cluster_center)
530
+ confidence = max(0.1, 1 - distance/10) # Normalize distance to confidence
531
+ except:
532
+ confidence = 0.8 # Default confidence
533
+
534
+ return {
535
+ 'cluster_id': int(cluster_prediction),
536
+ 'cluster_label': cluster_label,
537
+ 'risk_level': risk_level,
538
+ 'recommendation': recommendation,
539
+ 'confidence': float(confidence),
540
+ 'timestamp': datetime.now().isoformat(),
541
+ 'model_type': 'clustering_analysis'
542
+ }
543
+
544
+ except Exception as e:
545
+ return {
546
+ 'error': str(e),
547
+ 'timestamp': datetime.now().isoformat()
548
+ }
549
+
550
+ def predict_all(self, data: Dict) -> Dict:
551
+ """
552
+ Make predictions using all three models.
553
+
554
+ Args:
555
+ data: Dictionary containing student data
556
+
557
+ Returns:
558
+ Dictionary containing all prediction results
559
+ """
560
+ results = {
561
+ 'conflicts_prediction': self.predict_conflicts(data),
562
+ 'addicted_score_prediction': self.predict_addicted_score(data),
563
+ 'clustering_prediction': self.predict_cluster(data),
564
+ 'timestamp': datetime.now().isoformat(),
565
+ 'student_data': data
566
+ }
567
+
568
+ return results
569
+
570
+ def get_model_status(self) -> Dict:
571
+ """
572
+ Get status of all models.
573
+
574
+ Returns:
575
+ Dictionary containing model status information
576
+ """
577
+ return {
578
+ 'conflicts_model_loaded': self.conflicts_model is not None,
579
+ 'addicted_model_loaded': self.addicted_model is not None,
580
+ 'clustering_model_loaded': self.clustering_model is not None,
581
+ 'conflicts_scaler_loaded': self.conflicts_scaler is not None,
582
+ 'addicted_scaler_loaded': self.addicted_scaler is not None,
583
+ 'clustering_scaler_loaded': self.clustering_scaler is not None,
584
+ 'cluster_labels_loaded': self.cluster_labels is not None,
585
+ 'feature_names_loaded': len(self.feature_names) > 0,
586
+ 'timestamp': datetime.now().isoformat()
587
+ }
588
+
589
+
590
+ def create_unified_prediction_service() -> UnifiedSocialMediaPredictionService:
591
+ """
592
+ Factory function to create a unified prediction service.
593
+
594
+ Returns:
595
+ Initialized unified prediction service
596
+ """
597
+ return UnifiedSocialMediaPredictionService()
598
+
599
+
600
+ # Example usage and testing functions
601
+ def test_unified_prediction_service():
602
+ """Test the unified prediction service with sample data."""
603
+ try:
604
+ # Create prediction service
605
+ service = create_unified_prediction_service()
606
+
607
+ # Get model status
608
+ status = service.get_model_status()
609
+ print("📊 Model Status:")
610
+ print(json.dumps(status, indent=2))
611
+
612
+ # Test with sample data
613
+ sample_data = {
614
+ 'Age': 20,
615
+ 'Gender': 'Female',
616
+ 'Academic_Level': 'Undergraduate',
617
+ 'Avg_Daily_Usage_Hours': 6.5,
618
+ 'Sleep_Hours_Per_Night': 7.0,
619
+ 'Mental_Health_Score': 7,
620
+ 'Conflicts_Over_Social_Media': 2,
621
+ 'Addicted_Score': 6,
622
+ 'Relationship_Status': 'Single',
623
+ 'Affects_Academic_Performance': 'Yes',
624
+ 'Most_Used_Platform': 'Instagram'
625
+ }
626
+
627
+ # Make all predictions
628
+ results = service.predict_all(sample_data)
629
+
630
+ print("\n📊 Unified Prediction Results:")
631
+ print(json.dumps(results, indent=2))
632
+
633
+ return results
634
+
635
+ except Exception as e:
636
+ print(f"❌ Test failed: {e}")
637
+ return None
638
+
639
+
640
+ if __name__ == "__main__":
641
+ test_unified_prediction_service()