BlakeL commited on
Commit
307db4e
Β·
verified Β·
1 Parent(s): 0f45310

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +612 -547
app.py CHANGED
@@ -9,611 +9,676 @@ import pandas as pd
9
  import numpy as np
10
  import matplotlib.pyplot as plt
11
  import seaborn as sns
12
- from sklearn.preprocessing import StandardScaler, LabelEncoder
13
- from sklearn.cluster import KMeans
14
- from sklearn.linear_model import LinearRegression, LogisticRegression
15
- from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
16
- from sklearn.metrics import silhouette_score, mean_squared_error, accuracy_score, classification_report
17
- import plotly.express as px
18
- import plotly.graph_objects as go
19
- from plotly.subplots import make_subplots
20
  import warnings
 
 
21
  warnings.filterwarnings('ignore')
 
 
 
 
 
22
 
23
- # Set style
24
  plt.style.use('seaborn-v0_8')
25
  sns.set_palette("husl")
26
 
27
- class ComprehensiveSocialMediaAnalyzer:
28
  def __init__(self):
29
- """Initialize the analyzer with all models"""
30
- self.df = None
31
- self.scaler = StandardScaler()
32
- self.kmeans_model = None
33
- self.regression_model = None
34
- self.conflicts_model = None
35
- self.feature_names = None
36
  self.load_data()
37
- self.train_all_models()
38
-
 
39
  def load_data(self):
40
- """Load and prepare the dataset"""
41
  try:
42
- # Load the dataset
43
- import os
44
- import glob
45
-
46
- # Get current working directory
47
- cwd = os.getcwd()
48
- print(f"πŸ” Current working directory: {cwd}")
49
-
50
  # Try multiple possible paths
51
  possible_paths = [
52
  "data/Students Social Media Addiction.csv",
53
- "./data/Students Social Media Addiction.csv",
54
  "../data/Students Social Media Addiction.csv",
55
- os.path.join(cwd, "data", "Students Social Media Addiction.csv"),
56
- os.path.join(os.path.dirname(__file__), "data", "Students Social Media Addiction.csv")
57
  ]
58
 
59
- # Also try to find any CSV file in data directory
60
- data_files = glob.glob("data/*.csv")
61
- print(f"πŸ” Found CSV files in data/: {data_files}")
62
-
63
  for path in possible_paths:
64
- print(f"πŸ” Trying path: {path}")
65
- if os.path.exists(path):
66
- try:
67
- self.df = pd.read_csv(path)
68
- print(f"βœ… Data loaded from: {path}")
69
- print(f" Shape: {self.df.shape}")
70
- print(f" Columns: {list(self.df.columns)}")
71
- break
72
- except Exception as e:
73
- print(f"❌ Error reading {path}: {e}")
74
- continue
75
  else:
76
- # If no file found, try to use any CSV in data directory
77
- if data_files:
78
- try:
79
- self.df = pd.read_csv(data_files[0])
80
- print(f"βœ… Data loaded from fallback: {data_files[0]}")
81
- print(f" Shape: {self.df.shape}")
82
- print(f" Columns: {list(self.df.columns)}")
83
- except Exception as e:
84
- print(f"❌ Error reading fallback file: {e}")
85
- raise FileNotFoundError("Could not load any data file")
86
- else:
87
- raise FileNotFoundError("Could not find the data file in any expected location")
88
-
89
- # Create binary features for categorical variables
90
- self.df['Is_Female'] = (self.df['Gender'] == 'Female').astype(int)
91
- self.df['Is_Male'] = (self.df['Gender'] == 'Male').astype(int)
92
-
93
- # Academic level features
94
- self.df['Is_Undergraduate'] = (self.df['Academic_Level'] == 'Undergraduate').astype(int)
95
- self.df['Is_Graduate'] = (self.df['Academic_Level'] == 'Graduate').astype(int)
96
- self.df['Is_High_School'] = (self.df['Academic_Level'] == 'High School').astype(int)
97
-
98
- # Relationship status features
99
- self.df['Is_Single'] = (self.df['Relationship_Status'] == 'Single').astype(int)
100
- self.df['Is_In_Relationship'] = (self.df['Relationship_Status'] == 'In Relationship').astype(int)
101
- self.df['Is_Complicated'] = (self.df['Relationship_Status'] == 'Complicated').astype(int)
102
-
103
- # Academic performance
104
- self.df['Affects_Academic'] = (self.df['Affects_Academic_Performance'] == 'Yes').astype(int)
105
-
106
- # Create platform dummies (top 6 platforms)
107
- top_platforms = self.df['Most_Used_Platform'].value_counts().head(6).index
108
- for platform in top_platforms:
109
- self.df[f'Uses_{platform}'] = (self.df['Most_Used_Platform'] == platform).astype(int)
110
-
111
- # Create behavioral features
112
- self.df['High_Usage'] = (self.df['Avg_Daily_Usage_Hours'] >= 6).astype(int)
113
- self.df['Low_Sleep'] = (self.df['Sleep_Hours_Per_Night'] <= 6).astype(int)
114
- self.df['Poor_Mental_Health'] = (self.df['Mental_Health_Score'] <= 5).astype(int)
115
- self.df['High_Conflict'] = (self.df['Conflicts_Over_Social_Media'] >= 3).astype(int)
116
- self.df['High_Addiction'] = (self.df['Addicted_Score'] >= 7).astype(int)
117
-
118
- # Create interaction features
119
- self.df['Usage_Sleep_Ratio'] = self.df['Avg_Daily_Usage_Hours'] / self.df['Sleep_Hours_Per_Night']
120
- self.df['Mental_Health_Usage_Ratio'] = self.df['Mental_Health_Score'] / self.df['Avg_Daily_Usage_Hours']
121
-
122
- print("βœ… Data loaded successfully!")
123
-
124
  except Exception as e:
125
  print(f"❌ Error loading data: {e}")
126
- # Create sample data if file not found
127
  self.create_sample_data()
128
 
129
  def create_sample_data(self):
130
  """Create sample data for demonstration"""
131
  np.random.seed(42)
132
- n_samples = 1000
133
 
134
- self.df = pd.DataFrame({
135
- 'Age': np.random.randint(16, 30, n_samples),
136
  'Gender': np.random.choice(['Male', 'Female'], n_samples),
137
- 'Academic_Level': np.random.choice(['High School', 'Undergraduate', 'Graduate'], n_samples),
138
  'Relationship_Status': np.random.choice(['Single', 'In Relationship', 'Complicated'], n_samples),
 
139
  'Most_Used_Platform': np.random.choice(['Instagram', 'TikTok', 'Facebook', 'Twitter', 'Snapchat'], n_samples),
140
- 'Avg_Daily_Usage_Hours': np.random.normal(4.5, 2, n_samples),
141
- 'Sleep_Hours_Per_Night': np.random.normal(7, 1.5, n_samples),
142
- 'Mental_Health_Score': np.random.normal(6.5, 2, n_samples),
143
- 'Conflicts_Over_Social_Media': np.random.randint(0, 6, n_samples),
144
- 'Addicted_Score': np.random.normal(5.5, 2, n_samples),
145
  'Affects_Academic_Performance': np.random.choice(['Yes', 'No'], n_samples)
146
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
- # Apply the same feature engineering
149
- self.load_data()
150
-
151
- def train_all_models(self):
152
- """Train clustering, regression, and classification models"""
153
- try:
154
- # Select numerical features for all models
155
- numerical_features = [
156
- 'Age', 'Avg_Daily_Usage_Hours', 'Sleep_Hours_Per_Night',
157
- 'Mental_Health_Score', 'Conflicts_Over_Social_Media', 'Addicted_Score',
158
- 'Is_Female', 'Is_Undergraduate', 'Is_Graduate', 'Is_High_School',
159
- 'Is_Single', 'Is_In_Relationship', 'Is_Complicated', 'Affects_Academic',
160
- 'High_Usage', 'Low_Sleep', 'Poor_Mental_Health', 'High_Conflict', 'High_Addiction',
161
- 'Usage_Sleep_Ratio', 'Mental_Health_Usage_Ratio'
162
- ]
163
-
164
- # Add platform features
165
- platform_features = [col for col in self.df.columns if col.startswith('Uses_')]
166
- numerical_features.extend(platform_features)
167
-
168
- # Filter to only include features that exist
169
- self.feature_names = [f for f in numerical_features if f in self.df.columns]
170
-
171
- # Create feature matrix
172
- X = self.df[self.feature_names].copy()
173
-
174
- # Handle missing values
175
- X = X.fillna(X.mean())
176
-
177
- # Scale features
178
- X_scaled = self.scaler.fit_transform(X)
179
-
180
- # 1. Train Clustering Model (K-Means)
181
- self.kmeans_model = KMeans(n_clusters=4, random_state=42, n_init=10)
182
- self.df['Cluster'] = self.kmeans_model.fit_predict(X_scaled)
183
-
184
- # 2. Train Regression Model (Predict Addiction Score)
185
- self.regression_model = RandomForestRegressor(n_estimators=100, random_state=42)
186
- self.regression_model.fit(X_scaled, self.df['Addicted_Score'])
187
-
188
- # 3. Train Classification Model (Predict Conflicts)
189
- # Create binary conflict target (High conflict if >= 3)
190
- conflict_target = (self.df['Conflicts_Over_Social_Media'] >= 3).astype(int)
191
- self.conflicts_model = RandomForestClassifier(n_estimators=100, random_state=42)
192
- self.conflicts_model.fit(X_scaled, conflict_target)
193
-
194
- print("βœ… All models trained successfully!")
195
- print(f" - Clustering: {len(set(self.df['Cluster']))} clusters")
196
- print(f" - Regression: Addiction score prediction")
197
- print(f" - Classification: Conflict prediction")
198
-
199
- except Exception as e:
200
- print(f"❌ Error training models: {e}")
201
-
202
- def analyze_individual(self, age, gender, academic_level, relationship_status,
203
- platform, daily_usage, sleep_hours, mental_health,
204
- conflicts, addiction_score, affects_academic):
205
- """Comprehensive individual analysis"""
206
 
207
- # Create individual data point
208
- individual_data = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  'Age': age,
210
  'Gender': gender,
211
  'Academic_Level': academic_level,
212
  'Relationship_Status': relationship_status,
 
213
  'Most_Used_Platform': platform,
214
  'Avg_Daily_Usage_Hours': daily_usage,
215
  'Sleep_Hours_Per_Night': sleep_hours,
216
  'Mental_Health_Score': mental_health,
217
  'Conflicts_Over_Social_Media': conflicts,
218
- 'Addicted_Score': addiction_score,
219
  'Affects_Academic_Performance': affects_academic
220
  }
 
 
 
221
 
222
- # Create binary features
223
- individual_data['Is_Female'] = 1 if gender == 'Female' else 0
224
- individual_data['Is_Undergraduate'] = 1 if academic_level == 'Undergraduate' else 0
225
- individual_data['Is_Graduate'] = 1 if academic_level == 'Graduate' else 0
226
- individual_data['Is_High_School'] = 1 if academic_level == 'High School' else 0
227
- individual_data['Is_Single'] = 1 if relationship_status == 'Single' else 0
228
- individual_data['Is_In_Relationship'] = 1 if relationship_status == 'In Relationship' else 0
229
- individual_data['Is_Complicated'] = 1 if relationship_status == 'Complicated' else 0
230
- individual_data['Affects_Academic'] = 1 if affects_academic == 'Yes' else 0
231
-
232
- # Platform features
233
- for platform_name in ['Instagram', 'TikTok', 'Facebook', 'Twitter', 'Snapchat']:
234
- individual_data[f'Uses_{platform_name}'] = 1 if platform == platform_name else 0
235
-
236
- # Behavioral features
237
- individual_data['High_Usage'] = 1 if daily_usage >= 6 else 0
238
- individual_data['Low_Sleep'] = 1 if sleep_hours <= 6 else 0
239
- individual_data['Poor_Mental_Health'] = 1 if mental_health <= 5 else 0
240
- individual_data['High_Conflict'] = 1 if conflicts >= 3 else 0
241
- individual_data['High_Addiction'] = 1 if addiction_score >= 7 else 0
242
-
243
- # Interaction features
244
- individual_data['Usage_Sleep_Ratio'] = daily_usage / sleep_hours if sleep_hours > 0 else 0
245
- individual_data['Mental_Health_Usage_Ratio'] = mental_health / daily_usage if daily_usage > 0 else 0
246
-
247
- # Create feature vector
248
- features = []
249
- for feature in self.feature_names:
250
- if feature in individual_data:
251
- features.append(individual_data[feature])
252
- else:
253
- features.append(0)
254
-
255
- # Scale features
256
- features_scaled = self.scaler.transform([features])
257
-
258
- # 1. Clustering Analysis
259
- cluster = self.kmeans_model.predict(features_scaled)[0]
260
- cluster_data = self.df[self.df['Cluster'] == cluster]
261
-
262
- # 2. Regression Analysis (Predict Addiction Score)
263
- predicted_addiction = self.regression_model.predict(features_scaled)[0]
264
-
265
- # 3. Classification Analysis (Predict Conflict Risk)
266
- conflict_probability = self.conflicts_model.predict_proba(features_scaled)[0]
267
- high_conflict_prob = conflict_probability[1] # Probability of high conflict
268
-
269
- # Calculate risk factors
270
- risk_factors = []
271
- if daily_usage >= 6:
272
- risk_factors.append("High daily usage (β‰₯6 hours)")
273
- if sleep_hours <= 6:
274
- risk_factors.append("Low sleep (≀6 hours)")
275
- if mental_health <= 5:
276
- risk_factors.append("Poor mental health (≀5/10)")
277
- if conflicts >= 3:
278
- risk_factors.append("High social media conflicts (β‰₯3)")
279
- if addiction_score >= 7:
280
- risk_factors.append("High addiction score (β‰₯7/10)")
281
-
282
- # Generate recommendations
283
- recommendations = []
284
- if daily_usage >= 6:
285
- recommendations.append("Consider setting daily usage limits")
286
- if sleep_hours <= 6:
287
- recommendations.append("Improve sleep hygiene and reduce screen time before bed")
288
- if mental_health <= 5:
289
- recommendations.append("Consider mental health support and digital detox")
290
- if conflicts >= 3:
291
- recommendations.append("Work on communication skills and boundary setting")
292
- if addiction_score >= 7:
293
- recommendations.append("Seek professional help for digital addiction")
294
-
295
- if not recommendations:
296
- recommendations.append("Maintain healthy social media habits")
297
-
298
- # Format comprehensive results
299
- output = f"""
300
- ## πŸ“Š Comprehensive Analysis Results
301
-
302
- ### 🎯 Clustering Analysis
303
- **Cluster {cluster}** - You belong to a group with {len(cluster_data)} similar students
304
-
305
- **Cluster Characteristics (Average):**
306
- - Daily Usage: {cluster_data['Avg_Daily_Usage_Hours'].mean():.1f} hours
307
- - Mental Health Score: {cluster_data['Mental_Health_Score'].mean():.1f}/10
308
- - Sleep Hours: {cluster_data['Sleep_Hours_Per_Night'].mean():.1f} hours/night
309
- - Addiction Score: {cluster_data['Addicted_Score'].mean():.1f}/10
310
-
311
- ### πŸ“ˆ Regression Analysis (Addiction Prediction)
312
- **Your Current Addiction Score:** {addiction_score:.1f}/10
313
- **Predicted Addiction Score:** {predicted_addiction:.1f}/10
314
- **Difference:** {predicted_addiction - addiction_score:+.1f} points
315
-
316
- ### ⚠️ Conflict Risk Analysis
317
- **Current Conflicts:** {conflicts}/5
318
- **High Conflict Risk Probability:** {high_conflict_prob:.1%}
319
- **Risk Level:** {'High' if high_conflict_prob > 0.6 else 'Medium' if high_conflict_prob > 0.3 else 'Low'}
320
-
321
- ### 🚨 Risk Factors Identified
322
- """
323
 
324
- if risk_factors:
325
- for factor in risk_factors:
326
- output += f"- {factor}\n"
327
- else:
328
- output += "- No significant risk factors identified\n"
329
-
330
- output += "\n### πŸ’‘ Personalized Recommendations\n"
331
- for rec in recommendations:
332
- output += f"- {rec}\n"
333
-
334
- # Add model-specific recommendations
335
- if predicted_addiction > addiction_score + 1:
336
- output += "- Consider reducing social media usage to prevent addiction escalation\n"
337
- if high_conflict_prob > 0.6:
338
- output += "- Focus on improving communication and conflict resolution skills\n"
339
-
340
- return output
341
-
342
- def create_comprehensive_dashboard(self):
343
- """Create comprehensive dashboard with all analyses"""
344
-
345
- # 1. Usage Distribution
346
- fig1 = px.histogram(self.df, x='Avg_Daily_Usage_Hours',
347
- title='Daily Social Media Usage Distribution',
348
- nbins=20, color_discrete_sequence=['#1f77b4'])
349
- fig1.update_layout(xaxis_title='Hours per Day', yaxis_title='Number of Students')
350
-
351
- # 2. Mental Health vs Usage by Cluster
352
- fig2 = px.scatter(self.df, x='Avg_Daily_Usage_Hours', y='Mental_Health_Score',
353
- color='Cluster', title='Mental Health vs Daily Usage by Cluster',
354
- color_discrete_sequence=px.colors.qualitative.Set1)
355
- fig2.update_layout(xaxis_title='Daily Usage (Hours)', yaxis_title='Mental Health Score')
356
-
357
- # 3. Cluster Distribution
358
- cluster_counts = self.df['Cluster'].value_counts().sort_index()
359
- fig3 = px.bar(x=cluster_counts.index, y=cluster_counts.values,
360
- title='Student Distribution by Cluster',
361
- color_discrete_sequence=['#2ca02c'])
362
- fig3.update_layout(xaxis_title='Cluster', yaxis_title='Number of Students')
363
-
364
- # 4. Addiction Score Distribution
365
- fig4 = px.histogram(self.df, x='Addicted_Score',
366
- title='Addiction Score Distribution',
367
- nbins=20, color_discrete_sequence=['#d62728'])
368
- fig4.update_layout(xaxis_title='Addiction Score', yaxis_title='Number of Students')
369
-
370
- # 5. Conflicts Analysis
371
- conflict_counts = self.df['Conflicts_Over_Social_Media'].value_counts().sort_index()
372
- fig5 = px.bar(x=conflict_counts.index, y=conflict_counts.values,
373
- title='Social Media Conflicts Distribution',
374
- color_discrete_sequence=['#ff7f0e'])
375
- fig5.update_layout(xaxis_title='Number of Conflicts', yaxis_title='Number of Students')
376
-
377
- # 6. Platform Usage
378
- platform_counts = self.df['Most_Used_Platform'].value_counts()
379
- fig6 = px.pie(values=platform_counts.values, names=platform_counts.index,
380
- title='Most Used Social Media Platforms')
381
-
382
- # 7. Cluster Characteristics Heatmap
383
- cluster_stats = self.df.groupby('Cluster').agg({
384
- 'Avg_Daily_Usage_Hours': 'mean',
385
- 'Mental_Health_Score': 'mean',
386
- 'Sleep_Hours_Per_Night': 'mean',
387
- 'Addicted_Score': 'mean',
388
- 'Conflicts_Over_Social_Media': 'mean'
389
- }).round(2)
390
-
391
- fig7 = px.imshow(cluster_stats.T,
392
- title='Cluster Characteristics Heatmap',
393
- color_continuous_scale='RdYlBu_r',
394
- aspect='auto')
395
- fig7.update_layout(xaxis_title='Cluster', yaxis_title='Metrics')
396
-
397
- # 8. Correlation Matrix
398
- corr_features = ['Avg_Daily_Usage_Hours', 'Mental_Health_Score', 'Sleep_Hours_Per_Night',
399
- 'Addicted_Score', 'Conflicts_Over_Social_Media']
400
- corr_matrix = self.df[corr_features].corr()
401
-
402
- fig8 = px.imshow(corr_matrix,
403
- title='Feature Correlation Matrix',
404
- color_continuous_scale='RdBu',
405
- aspect='auto')
406
- fig8.update_layout(xaxis_title='Features', yaxis_title='Features')
407
-
408
- return fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8
409
-
410
- def get_comprehensive_stats(self):
411
- """Get comprehensive summary statistics"""
412
- stats = {
413
- "total_students": len(self.df),
414
- "avg_age": self.df['Age'].mean(),
415
- "avg_daily_usage": self.df['Avg_Daily_Usage_Hours'].mean(),
416
- "avg_mental_health": self.df['Mental_Health_Score'].mean(),
417
- "avg_sleep": self.df['Sleep_Hours_Per_Night'].mean(),
418
- "avg_addiction": self.df['Addicted_Score'].mean(),
419
- "avg_conflicts": self.df['Conflicts_Over_Social_Media'].mean(),
420
- "high_risk_students": len(self.df[self.df['Addicted_Score'] >= 7]),
421
- "high_conflict_students": len(self.df[self.df['Conflicts_Over_Social_Media'] >= 3]),
422
- "most_used_platform": self.df['Most_Used_Platform'].mode()[0],
423
- "n_clusters": len(set(self.df['Cluster']))
424
- }
425
- return stats
426
 
427
- # Initialize the analyzer
428
- analyzer = ComprehensiveSocialMediaAnalyzer()
429
 
430
- def individual_analysis(age, gender, academic_level, relationship_status,
431
- platform, daily_usage, sleep_hours, mental_health,
432
- conflicts, addiction_score, affects_academic):
433
- """Gradio interface for comprehensive individual analysis"""
434
-
435
- try:
436
- results = analyzer.analyze_individual(
437
- age, gender, academic_level, relationship_status,
438
- platform, daily_usage, sleep_hours, mental_health,
439
- conflicts, addiction_score, affects_academic
440
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
 
442
- return results
 
443
 
444
- except Exception as e:
445
- return f"❌ Error in analysis: {str(e)}"
446
 
447
- def comprehensive_dashboard():
448
- """Create comprehensive dashboard with all analyses"""
449
- try:
450
- fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8 = analyzer.create_comprehensive_dashboard()
451
- stats = analyzer.get_comprehensive_stats()
452
-
453
- # Create comprehensive summary text
454
- summary = f"""
455
- ## πŸ“Š Comprehensive Dataset Overview
456
 
457
- ### πŸ“ˆ Basic Statistics
458
- - **Total Students**: {stats['total_students']:,}
459
- - **Average Age**: {stats['avg_age']:.1f} years
460
- - **Average Daily Usage**: {stats['avg_daily_usage']:.1f} hours
461
- - **Average Mental Health Score**: {stats['avg_mental_health']:.1f}/10
462
- - **Average Sleep**: {stats['avg_sleep']:.1f} hours/night
463
- - **Average Addiction Score**: {stats['avg_addiction']:.1f}/10
464
- - **Average Conflicts**: {stats['avg_conflicts']:.1f}/5
465
 
466
- ### ⚠️ Risk Assessment
467
- - **High Risk Students (Addiction β‰₯7)**: {stats['high_risk_students']} ({stats['high_risk_students']/stats['total_students']*100:.1f}%)
468
- - **High Conflict Students (β‰₯3)**: {stats['high_conflict_students']} ({stats['high_conflict_students']/stats['total_students']*100:.1f}%)
469
 
470
- ### 🎯 Analysis Results
471
- - **Clusters Identified**: {stats['n_clusters']}
472
- - **Most Used Platform**: {stats['most_used_platform']}
 
 
 
 
 
 
 
 
 
 
473
 
474
- ### πŸ“Š Model Performance
475
- - **Clustering**: K-Means with {stats['n_clusters']} clusters
476
- - **Regression**: Random Forest for addiction score prediction
477
- - **Classification**: Random Forest for conflict risk prediction
478
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
 
480
- return summary, fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8
 
 
 
 
 
481
 
482
- except Exception as e:
483
- return f"❌ Error creating dashboard: {str(e)}", None, None, None, None, None, None, None, None
484
-
485
- # Create Gradio interface
486
- with gr.Blocks(title="Social Media Addiction Analysis - Comprehensive", theme=gr.themes.Soft()) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
 
488
- gr.Markdown("""
489
- # πŸ“± Social Media Addiction Analysis - Comprehensive
 
 
 
490
 
491
- ## Overview
492
- This application provides comprehensive analysis of student social media usage patterns including:
493
- - **Clustering Analysis**: Identify behavioral segments
494
- - **Regression Analysis**: Predict addiction scores
495
- - **Classification Analysis**: Predict conflict risks
496
- - **Risk Assessment**: Identify potential concerns
497
- - **Personalized Recommendations**: Actionable advice
 
498
 
499
- ### Features:
500
- - **Individual Analysis**: Get personalized insights with all three model types
501
- - **Comprehensive Dashboard**: Explore patterns across all analyses
502
- - **Risk Assessment**: Multi-factor evaluation
503
- - **Predictive Analytics**: ML-powered predictions
504
- """)
505
 
506
- with gr.Tabs():
507
-
508
- # Individual Analysis Tab
509
- with gr.Tab("πŸ” Individual Analysis"):
510
- gr.Markdown("### Enter your social media usage information for comprehensive analysis")
511
-
512
- with gr.Row():
513
- with gr.Column():
514
- age = gr.Slider(minimum=16, maximum=30, value=20, step=1, label="Age")
515
- gender = gr.Radio(choices=["Male", "Female"], value="Male", label="Gender")
516
- academic_level = gr.Radio(choices=["High School", "Undergraduate", "Graduate"],
517
- value="Undergraduate", label="Academic Level")
518
- relationship_status = gr.Radio(choices=["Single", "In Relationship", "Complicated"],
519
- value="Single", label="Relationship Status")
520
-
521
- with gr.Column():
522
- platform = gr.Radio(choices=["Instagram", "TikTok", "Facebook", "Twitter", "Snapchat"],
523
- value="Instagram", label="Most Used Platform")
524
- daily_usage = gr.Slider(minimum=0, maximum=12, value=4, step=0.5,
525
- label="Average Daily Usage (Hours)")
526
- sleep_hours = gr.Slider(minimum=4, maximum=12, value=7, step=0.5,
527
- label="Sleep Hours per Night")
528
- mental_health = gr.Slider(minimum=1, maximum=10, value=7, step=1,
529
- label="Mental Health Score (1-10)")
530
-
531
- with gr.Column():
532
- conflicts = gr.Slider(minimum=0, maximum=5, value=2, step=1,
533
- label="Conflicts Over Social Media (0-5)")
534
- addiction_score = gr.Slider(minimum=1, maximum=10, value=5, step=1,
535
- label="Addiction Score (1-10)")
536
- affects_academic = gr.Radio(choices=["Yes", "No"], value="No",
537
- label="Affects Academic Performance")
538
-
539
- analyze_btn = gr.Button("πŸ” Analyze My Usage", variant="primary")
540
- analysis_output = gr.Markdown(label="Comprehensive Analysis Results")
541
-
542
- analyze_btn.click(
543
- fn=individual_analysis,
544
- inputs=[age, gender, academic_level, relationship_status, platform,
545
- daily_usage, sleep_hours, mental_health, conflicts, addiction_score, affects_academic],
546
- outputs=analysis_output
547
- )
548
-
549
- # Comprehensive Dashboard Tab
550
- with gr.Tab("πŸ“Š Comprehensive Dashboard"):
551
- gr.Markdown("### Explore comprehensive patterns and all analyses")
552
-
553
- dashboard_btn = gr.Button("πŸ“Š Generate Comprehensive Dashboard", variant="primary")
554
-
555
- with gr.Row():
556
- summary_output = gr.Markdown(label="Comprehensive Summary Statistics")
557
-
558
- with gr.Row():
559
- plot1 = gr.Plot(label="Usage Distribution")
560
- plot2 = gr.Plot(label="Mental Health vs Usage by Cluster")
561
-
562
- with gr.Row():
563
- plot3 = gr.Plot(label="Cluster Distribution")
564
- plot4 = gr.Plot(label="Addiction Score Distribution")
565
-
566
- with gr.Row():
567
- plot5 = gr.Plot(label="Conflicts Distribution")
568
- plot6 = gr.Plot(label="Platform Usage")
569
-
570
- with gr.Row():
571
- plot7 = gr.Plot(label="Cluster Characteristics Heatmap")
572
- plot8 = gr.Plot(label="Feature Correlation Matrix")
573
-
574
- dashboard_btn.click(
575
- fn=comprehensive_dashboard,
576
- outputs=[summary_output, plot1, plot2, plot3, plot4, plot5, plot6, plot7, plot8]
577
- )
578
-
579
- # About Tab
580
- with gr.Tab("ℹ️ About"):
581
- gr.Markdown("""
582
- ## About This Comprehensive Application
583
-
584
- ### Purpose
585
- This application provides comprehensive analysis of student social media usage patterns using multiple machine learning approaches.
586
-
587
- ### Methodology
588
- - **Clustering Analysis**: K-Means clustering to identify distinct behavioral segments
589
- - **Regression Analysis**: Random Forest to predict addiction scores
590
- - **Classification Analysis**: Random Forest to predict conflict risks
591
- - **Risk Assessment**: Multi-factor evaluation of potential concerns
592
- - **Personalized Recommendations**: Actionable advice based on all analyses
593
-
594
- ### Key Metrics
595
- - **Daily Usage**: Hours spent on social media per day
596
- - **Mental Health Score**: Self-reported mental health (1-10 scale)
597
- - **Sleep Hours**: Average sleep duration per night
598
- - **Addiction Score**: Self-reported addiction level (1-10 scale)
599
- - **Conflicts**: Number of conflicts related to social media use
600
-
601
- ### Model Performance
602
- - **Clustering**: Identifies 4 distinct behavioral clusters
603
- - **Regression**: Predicts addiction scores with high accuracy
604
- - **Classification**: Predicts conflict risk probability
605
-
606
- ### Recommendations
607
- - Set daily usage limits
608
- - Improve sleep hygiene
609
- - Seek mental health support when needed
610
- - Develop healthy digital boundaries
611
- - Work on communication skills
612
-
613
- ### Data Source
614
- Analysis based on comprehensive student social media usage survey data.
615
- """)
616
-
617
- # Launch the app
618
- if __name__ == "__main__":
619
- demo.launch(share=True)
 
9
  import numpy as np
10
  import matplotlib.pyplot as plt
11
  import seaborn as sns
12
+ from pathlib import Path
 
 
 
 
 
 
 
13
  import warnings
14
+ import io
15
+ import base64
16
  warnings.filterwarnings('ignore')
17
+ import sys
18
+ sys.path.append('src')
19
+ from social_sphere_llm.unified_prediction_service import UnifiedSocialMediaPredictionService
20
+ from info import SocialSphereInfo
21
+ from graphs import create_conflict_pie_chart, create_addiction_score_chart, create_addiction_gauge_chart, create_clustering_charts
22
 
23
+ # Set style for plots
24
  plt.style.use('seaborn-v0_8')
25
  sns.set_palette("husl")
26
 
27
+ class SocialMediaAnalyzer:
28
  def __init__(self):
29
+ self.data = None
 
 
 
 
 
 
30
  self.load_data()
31
+ self.unified_service = UnifiedSocialMediaPredictionService()
32
+ self.info = SocialSphereInfo()
33
+
34
  def load_data(self):
35
+ """Load the dataset with fallback options"""
36
  try:
 
 
 
 
 
 
 
 
37
  # Try multiple possible paths
38
  possible_paths = [
39
  "data/Students Social Media Addiction.csv",
40
+ "data/cleaned_data.csv",
41
  "../data/Students Social Media Addiction.csv",
42
+ "../data/cleaned_data.csv"
 
43
  ]
44
 
 
 
 
 
45
  for path in possible_paths:
46
+ if Path(path).exists():
47
+ self.data = pd.read_csv(path)
48
+ print(f"βœ… Data loaded from: {path}")
49
+ break
 
 
 
 
 
 
 
50
  else:
51
+ # Create sample data if file not found
52
+ print("⚠️ Data file not found, creating sample data...")
53
+ self.create_sample_data()
54
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  except Exception as e:
56
  print(f"❌ Error loading data: {e}")
 
57
  self.create_sample_data()
58
 
59
  def create_sample_data(self):
60
  """Create sample data for demonstration"""
61
  np.random.seed(42)
62
+ n_samples = 100
63
 
64
+ self.data = pd.DataFrame({
65
+ 'Age': np.random.randint(18, 25, n_samples),
66
  'Gender': np.random.choice(['Male', 'Female'], n_samples),
67
+ 'Academic_Level': np.random.choice(['Undergraduate', 'Graduate', 'High School'], n_samples),
68
  'Relationship_Status': np.random.choice(['Single', 'In Relationship', 'Complicated'], n_samples),
69
+ 'Country': np.random.choice(['USA', 'UK', 'Canada', 'Australia'], n_samples),
70
  'Most_Used_Platform': np.random.choice(['Instagram', 'TikTok', 'Facebook', 'Twitter', 'Snapchat'], n_samples),
71
+ 'Avg_Daily_Usage_Hours': np.random.uniform(1, 12, n_samples),
72
+ 'Sleep_Hours_Per_Night': np.random.uniform(4, 10, n_samples),
73
+ 'Mental_Health_Score': np.random.uniform(1, 10, n_samples),
74
+ 'Conflicts_Over_Social_Media': np.random.randint(0, 5, n_samples),
75
+ 'Addicted_Score': np.random.uniform(1, 10, n_samples),
76
  'Affects_Academic_Performance': np.random.choice(['Yes', 'No'], n_samples)
77
  })
78
+ print("βœ… Sample data created successfully!")
79
+
80
+ def create_conflict_pie_chart(self, result):
81
+ """Create a pie chart for conflict prediction results"""
82
+ # Create the pie chart
83
+ fig, ax = plt.subplots(figsize=(3, 2))
84
+
85
+ # Define colors and labels
86
+ if result['conflict_level'] == 'High Risk':
87
+ colors = ['#ff6b6b', '#4ecdc4'] # Red for High Risk, Green for Low Risk
88
+ sizes = [result['confidence'], 1 - result['confidence']]
89
+ labels = ['High Risk', 'Low Risk']
90
+ else:
91
+ colors = ['#4ecdc4', '#ff6b6b'] # Green for Low Risk, Red for High Risk
92
+ sizes = [result['confidence'], 1 - result['confidence']]
93
+ labels = ['Low Risk', 'High Risk']
94
 
95
+ # Create pie chart
96
+ wedges, texts, autotexts = ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%',
97
+ startangle=90, explode=(0.1, 0))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ # Customize the chart
100
+ ax.set_title(f'Conflict Risk Prediction\nConfidence: {result["confidence"]:.1%}',
101
+ fontsize=14, fontweight='bold', pad=20)
102
+
103
+ # Make the chart more visually appealing
104
+ for autotext in autotexts:
105
+ autotext.set_color('white')
106
+ autotext.set_fontweight('bold')
107
+
108
+ # Add a legend
109
+ ax.legend(wedges, labels, title="Risk Levels", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))
110
+
111
+ plt.tight_layout()
112
+
113
+ # Convert plot to base64 string for embedding in markdown
114
+ img_buffer = io.BytesIO()
115
+ plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
116
+ img_buffer.seek(0)
117
+ img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
118
+ plt.close()
119
+
120
+ return f"data:image/png;base64,{img_base64}"
121
+
122
+ def create_addiction_score_chart(self, result):
123
+ """Create a histogram with prediction line for addiction score results"""
124
+ # Create the figure
125
+ fig, ax = plt.subplots(figsize=(10, 6))
126
+
127
+ # Generate sample distribution for context (if we have data)
128
+ if self.data is not None and 'Addicted_Score' in self.data.columns:
129
+ # Use actual data distribution
130
+ scores = self.data['Addicted_Score'].dropna()
131
+ else:
132
+ # Create a realistic distribution
133
+ np.random.seed(42)
134
+ scores = np.random.normal(5.5, 1.5, 1000)
135
+ scores = np.clip(scores, 1, 10) # Clip to valid range
136
+
137
+ # Create histogram
138
+ n, bins, patches = ax.hist(scores, bins=20, alpha=0.7, color='#4ecdc4',
139
+ edgecolor='black', linewidth=0.5)
140
+
141
+ # Add prediction line
142
+ predicted_score = result['predicted_score']
143
+ ax.axvline(x=predicted_score, color='#ff6b6b', linewidth=3,
144
+ label=f'Your Prediction: {predicted_score:.2f}')
145
+
146
+ # Add confidence interval if available
147
+ if 'confidence' in result:
148
+ confidence = result['confidence']
149
+ # Add a shaded area around the prediction
150
+ ax.axvspan(predicted_score - 0.5, predicted_score + 0.5,
151
+ alpha=0.3, color='#ff6b6b',
152
+ label=f'Confidence: {confidence:.2f}')
153
+
154
+ # Customize the chart
155
+ ax.set_title('Addiction Score Distribution with Your Prediction',
156
+ fontsize=16, fontweight='bold', pad=20)
157
+ ax.set_xlabel('Addiction Score (1-10)', fontsize=12, fontweight='bold')
158
+ ax.set_ylabel('Frequency', fontsize=12, fontweight='bold')
159
+
160
+ # Add addiction level zones
161
+ ax.axvspan(1, 3, alpha=0.2, color='green', label='Low Addiction (1-3)')
162
+ ax.axvspan(3, 7, alpha=0.2, color='orange', label='Moderate Addiction (3-7)')
163
+ ax.axvspan(7, 10, alpha=0.2, color='red', label='High Addiction (7-10)')
164
+
165
+ # Add legend
166
+ ax.legend(loc='upper right', fontsize=10)
167
+
168
+ # Add grid
169
+ ax.grid(True, alpha=0.3)
170
+
171
+ # Set x-axis limits
172
+ ax.set_xlim(0, 10)
173
+
174
+ plt.tight_layout()
175
+
176
+ # Convert plot to base64 string for embedding in markdown
177
+ img_buffer = io.BytesIO()
178
+ plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
179
+ img_buffer.seek(0)
180
+ img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
181
+ plt.close()
182
+
183
+ return f"data:image/png;base64,{img_base64}"
184
+
185
+ def create_addiction_gauge_chart(self, result):
186
+ """Create a gauge chart for addiction score results"""
187
+ # Create the figure
188
+ fig, ax = plt.subplots(figsize=(3, 2), subplot_kw={'projection': 'polar'})
189
+
190
+ # Get the predicted score
191
+ predicted_score = result['predicted_score']
192
+
193
+ # Convert score to angle (0-180 degrees, where 0 is low addiction, 180 is high)
194
+ # Map 1-10 score to 0-180 degrees
195
+ angle = (predicted_score - 1) * 20 # 20 degrees per unit (180/9)
196
+
197
+ # Create the gauge
198
+ # Background circle (full range)
199
+ theta = np.linspace(0, np.pi, 100)
200
+ ax.plot(theta, [1]*100, 'k-', linewidth=3)
201
+
202
+ # Color zones
203
+ # Low addiction (1-3): Green
204
+ low_angle = np.linspace(0, 2*20*np.pi/180, 50)
205
+ ax.fill_between(low_angle, 0, 1, alpha=0.3, color='green', label='Low (1-3)')
206
+
207
+ # Moderate addiction (3-7): Orange
208
+ mod_angle = np.linspace(2*20*np.pi/180, 6*20*np.pi/180, 50)
209
+ ax.fill_between(mod_angle, 0, 1, alpha=0.3, color='orange', label='Moderate (3-7)')
210
+
211
+ # High addiction (7-10): Red
212
+ high_angle = np.linspace(6*20*np.pi/180, np.pi, 50)
213
+ ax.fill_between(high_angle, 0, 1, alpha=0.3, color='red', label='High (7-10)')
214
+
215
+ # Add the needle
216
+ needle_angle = angle * np.pi / 180
217
+ ax.plot([needle_angle, needle_angle], [0, 1.2], 'k-', linewidth=4, label=f'Your Score: {predicted_score:.1f}')
218
+
219
+ # Add a circle at the needle tip
220
+ ax.plot(needle_angle, 1.2, 'ko', markersize=10, markeredgecolor='white', markeredgewidth=2)
221
+
222
+ # Customize the chart
223
+ ax.set_title(f'Addiction Score Gauge\nPredicted: {predicted_score:.1f}/10',
224
+ fontsize=14, fontweight='bold', pad=20)
225
+
226
+ # Remove axis labels and ticks
227
+ ax.set_xticks([])
228
+ ax.set_yticks([])
229
+ ax.set_ylim(0, 1.3)
230
+
231
+ # Add text labels
232
+ ax.text(0, 1.4, 'Low\n(1-3)', ha='center', va='center', fontsize=10, fontweight='bold')
233
+ ax.text(np.pi/2, 1.4, 'Moderate\n(3-7)', ha='center', va='center', fontsize=10, fontweight='bold')
234
+ ax.text(np.pi, 1.4, 'High\n(7-10)', ha='center', va='center', fontsize=10, fontweight='bold')
235
+
236
+ # Add confidence if available
237
+ if 'confidence' in result:
238
+ confidence = result['confidence']
239
+ ax.text(0, -0.3, f'Confidence: {confidence:.2f}', ha='center', va='center',
240
+ fontsize=10, fontweight='bold', bbox=dict(boxstyle="round,pad=0.3", facecolor="lightblue"))
241
+
242
+ plt.tight_layout()
243
+
244
+ # Convert plot to base64 string for embedding in markdown
245
+ img_buffer = io.BytesIO()
246
+ plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
247
+ img_buffer.seek(0)
248
+ img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
249
+ plt.close()
250
+
251
+ return f"data:image/png;base64,{img_base64}"
252
+
253
+ def create_clustering_charts(self, result):
254
+ """Create visualization charts for clustering results"""
255
+ # Create the figure with subplots
256
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
257
+
258
+ # Chart 1: Elbow Method for Optimal K
259
+ k_values = range(1, 11)
260
+ inertias = [150, 120, 85, 65, 55, 50, 47, 45, 43, 42] # Example inertias
261
+
262
+ ax1.plot(k_values, inertias, 'bo-', linewidth=2, markersize=8)
263
+ ax1.set_xlabel('Number of Clusters (k)', fontweight='bold')
264
+ ax1.set_ylabel('Inertia', fontweight='bold')
265
+ ax1.set_title('Elbow Method: Optimal K Selection', fontsize=12, fontweight='bold')
266
+ ax1.grid(True, alpha=0.3)
267
+
268
+ # Highlight the optimal k (usually around 3-5)
269
+ optimal_k = 3
270
+ ax1.axvline(x=optimal_k, color='red', linestyle='--', alpha=0.7, label=f'Optimal k = {optimal_k}')
271
+ ax1.legend()
272
+
273
+ # Chart 2: Cluster Scatter Plot
274
+ # Generate sample data for visualization
275
+ np.random.seed(42)
276
+ n_samples = 200
277
+
278
+ # Create clusters with different centers for Sleep vs Age
279
+ cluster_centers = np.array([[7, 20], [6, 22], [5, 21]]) # Sleep hours vs Age
280
+ cluster_sizes = [60, 80, 60]
281
+
282
+ data = []
283
+ colors = ['#4ecdc4', '#ffd93d', '#ff6b6b']
284
+ labels = ['Low Risk', 'Moderate Risk', 'High Risk']
285
+
286
+ for i, (center, size, color, label) in enumerate(zip(cluster_centers, cluster_sizes, colors, labels)):
287
+ cluster_data = np.random.normal(center, 0.8, (size, 2))
288
+ data.append(cluster_data)
289
+
290
+ # Plot each cluster
291
+ ax2.scatter(cluster_data[:, 0], cluster_data[:, 1], c=color,
292
+ alpha=0.7, s=50, label=label)
293
+
294
+ # Highlight the user's cluster
295
+ user_cluster_idx = 0 if 'Low' in result['risk_level'] else (1 if 'Moderate' in result['risk_level'] else 2)
296
+ user_data = data[user_cluster_idx]
297
+ ax2.scatter(user_data[:, 0], user_data[:, 1], c=colors[user_cluster_idx],
298
+ alpha=1.0, s=100, edgecolors='black', linewidth=2,
299
+ label=f'Your Cluster: {labels[user_cluster_idx]}')
300
+
301
+ ax2.set_xlabel('Sleep Hours per Night', fontweight='bold')
302
+ ax2.set_ylabel('Age', fontweight='bold')
303
+ ax2.set_title(f'Cluster Analysis: Sleep vs Age (k={optimal_k})\nYour Cluster: {result["cluster_label"]}',
304
+ fontsize=12, fontweight='bold')
305
+ ax2.legend()
306
+ ax2.grid(True, alpha=0.3)
307
+
308
+ plt.tight_layout()
309
+
310
+ # Convert plot to base64 string for embedding in markdown
311
+ img_buffer = io.BytesIO()
312
+ plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
313
+ img_buffer.seek(0)
314
+ img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
315
+ plt.close()
316
+
317
+ return f"data:image/png;base64,{img_base64}"
318
+
319
+ def get_clustering_assignments(self):
320
+ """Return DataFrame with Sleep, Age, and cluster assignments for all data."""
321
+ if self.data is None or self.unified_service.clustering_model is None or self.unified_service.clustering_scaler is None:
322
+ return None
323
+ # Build feature matrix for all rows
324
+ feature_names = self.unified_service.feature_names.get('clustering', [])
325
+ df = self.data.copy()
326
+ # Build features as in predict_cluster
327
+ def build_features(row):
328
+ features = {}
329
+ features['Age'] = float(row.get('Age', 0))
330
+ features['Avg_Daily_Usage_Hours'] = float(row.get('Avg_Daily_Usage_Hours', 0))
331
+ features['Sleep_Hours_Per_Night'] = float(row.get('Sleep_Hours_Per_Night', 0))
332
+ features['Mental_Health_Score'] = float(row.get('Mental_Health_Score', 0))
333
+ features['Conflicts_Over_Social_Media'] = float(row.get('Conflicts_Over_Social_Media', 0))
334
+ features['Addicted_Score'] = float(row.get('Addicted_Score', 0))
335
+ # Gender
336
+ gender = str(row.get('Gender', '')).lower()
337
+ features['Is_Female'] = 1 if gender in ['female', 'f'] else 0
338
+ # Academic Level
339
+ level = str(row.get('Academic_Level', '')).lower()
340
+ features['Is_Undergraduate'] = 1 if 'undergraduate' in level else 0
341
+ features['Is_Graduate'] = 1 if 'graduate' in level else 0
342
+ features['Is_High_School'] = 1 if 'high school' in level else 0
343
+ # Behavioral
344
+ features['High_Usage'] = 1 if features['Avg_Daily_Usage_Hours'] >= 6 else 0
345
+ features['Low_Sleep'] = 1 if features['Sleep_Hours_Per_Night'] <= 6 else 0
346
+ features['Poor_Mental_Health'] = 1 if features['Mental_Health_Score'] <= 5 else 0
347
+ features['High_Conflict'] = 1 if features['Conflicts_Over_Social_Media'] >= 3 else 0
348
+ features['High_Addiction'] = 1 if features['Addicted_Score'] >= 7 else 0
349
+ # Interactions
350
+ features['Usage_Sleep_Ratio'] = features['Avg_Daily_Usage_Hours'] / features['Sleep_Hours_Per_Night'] if features['Sleep_Hours_Per_Night'] else 0
351
+ features['Mental_Health_Usage_Ratio'] = features['Mental_Health_Score'] / features['Avg_Daily_Usage_Hours'] if features['Avg_Daily_Usage_Hours'] else 0
352
+ return [features.get(f, 0) for f in feature_names]
353
+ X = np.array([build_features(row) for _, row in df.iterrows()])
354
+ X_scaled = self.unified_service.clustering_scaler.transform(X)
355
+ clusters = self.unified_service.clustering_model.predict(X_scaled)
356
+ df = df.copy()
357
+ df['cluster'] = clusters
358
+ return df[['Sleep_Hours_Per_Night', 'Age', 'cluster']]
359
+
360
+ def classification_task(self, age, gender, academic_level, relationship_status,
361
+ country, platform, daily_usage, sleep_hours, mental_health,
362
+ conflicts, addicted_score, affects_academic):
363
+ """Classification task interface (now uses real ML pipeline)"""
364
+ # Prepare input dict for unified pipeline
365
+ input_data = {
366
  'Age': age,
367
  'Gender': gender,
368
  'Academic_Level': academic_level,
369
  'Relationship_Status': relationship_status,
370
+ 'Country': country,
371
  'Most_Used_Platform': platform,
372
  'Avg_Daily_Usage_Hours': daily_usage,
373
  'Sleep_Hours_Per_Night': sleep_hours,
374
  'Mental_Health_Score': mental_health,
375
  'Conflicts_Over_Social_Media': conflicts,
376
+ 'Addicted_Score': addicted_score,
377
  'Affects_Academic_Performance': affects_academic
378
  }
379
+ result = self.unified_service.predict_conflicts(input_data)
380
+ if 'error' in result:
381
+ return f"❌ Error: {result['error']}"
382
 
383
+ # Create the pie chart
384
+ pie_chart_img = create_conflict_pie_chart(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
+ return f"""
387
+ # πŸ” Classification Task: Conflict Risk Prediction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
+ ## πŸ“Š Prediction Results
 
390
 
391
+ **Predicted Conflict Level:** {result['conflict_level']}
392
+
393
+ **Confidence:** {result['confidence']:.2f}
394
+
395
+ **Recommendation:** {result['recommendation']}
396
+
397
+ ## πŸ“ˆ Visual Risk Assessment
398
+
399
+ ![Conflict Risk Prediction]({pie_chart_img})
400
+
401
+ ## πŸ“‹ What This Means
402
+ - **Low Risk (0)**: Predicted to have ≀3 conflicts over social media
403
+ - **High Risk (1)**: Predicted to have >3 conflicts over social media
404
+ - **Confidence**: How certain the model is about this prediction
405
+ """
406
+
407
+ def regression_task(self, age, gender, academic_level, relationship_status,
408
+ country, platform, daily_usage, sleep_hours, mental_health,
409
+ conflicts, affects_academic):
410
+ """Regression task interface (now uses real ML pipeline)"""
411
+ input_data = {
412
+ 'Age': age,
413
+ 'Gender': gender,
414
+ 'Academic_Level': academic_level,
415
+ 'Relationship_Status': relationship_status,
416
+ 'Country': country,
417
+ 'Most_Used_Platform': platform,
418
+ 'Avg_Daily_Usage_Hours': daily_usage,
419
+ 'Sleep_Hours_Per_Night': sleep_hours,
420
+ 'Mental_Health_Score': mental_health,
421
+ 'Conflicts_Over_Social_Media': conflicts,
422
+ 'Affects_Academic_Performance': affects_academic
423
+ }
424
+ result = self.unified_service.predict_addicted_score(input_data)
425
+ if 'error' in result:
426
+ return f"❌ Error: {result['error']}"
427
 
428
+ # Create only the gauge chart
429
+ gauge_img = create_addiction_gauge_chart(result)
430
 
431
+ return f"""
432
+ # πŸ“Š Regression Task: Addiction Score Prediction
433
 
434
+ ## πŸ“Š Prediction Results
 
 
 
 
 
 
 
 
435
 
436
+ **Predicted Addiction Score:** {result['predicted_score']:.2f}
 
 
 
 
 
 
 
437
 
438
+ **Addiction Level:** {result['addiction_level']}
 
 
439
 
440
+ **Confidence:** {result['confidence']:.2f}
441
+
442
+ ## πŸ“ˆ Visual Addiction Score Analysis
443
+
444
+ ![Addiction Score Gauge]({gauge_img})
445
+
446
+ ## πŸ“‹ What This Means
447
+ - **Low Addiction (1-3)**: Minimal social media dependency
448
+ - **Moderate Addiction (3-7)**: Some dependency with room for improvement
449
+ - **High Addiction (7-10)**: Significant dependency requiring attention
450
+ - **Gauge Chart**: Intuitive visual representation of your addiction level
451
+ - **Confidence**: How certain the model is about this prediction
452
+ """
453
 
454
+ def clustering_task(self, age, gender, academic_level, relationship_status,
455
+ country, platform, daily_usage, sleep_hours, mental_health,
456
+ conflicts, addicted_score, affects_academic):
457
+ """Clustering task interface (now uses real ML pipeline)"""
458
+ input_data = {
459
+ 'Age': age,
460
+ 'Gender': gender,
461
+ 'Academic_Level': academic_level,
462
+ 'Relationship_Status': relationship_status,
463
+ 'Country': country,
464
+ 'Most_Used_Platform': platform,
465
+ 'Avg_Daily_Usage_Hours': daily_usage,
466
+ 'Sleep_Hours_Per_Night': sleep_hours,
467
+ 'Mental_Health_Score': mental_health,
468
+ 'Conflicts_Over_Social_Media': conflicts,
469
+ 'Addicted_Score': addicted_score,
470
+ 'Affects_Academic_Performance': affects_academic
471
+ }
472
+ result = self.unified_service.predict_cluster(input_data)
473
+ if 'error' in result:
474
+ return f"❌ Error: {result['error']}"
475
+
476
+ # Get real clustering assignments for all data
477
+ cluster_df = self.get_clustering_assignments()
478
+ # Get user's point and cluster
479
+ user_sleep = input_data.get('Sleep_Hours_Per_Night', None)
480
+ user_age = input_data.get('Age', None)
481
+ user_cluster = result.get('cluster_id', None)
482
+ cluster_labels_map = self.unified_service.cluster_labels if self.unified_service.cluster_labels else {0: 'Cluster 0', 1: 'Cluster 1', 2: 'Cluster 2'}
483
+ # Create the clustering charts using real data
484
+ charts_img = create_clustering_charts(result, cluster_df, user_sleep, user_age, user_cluster, cluster_labels_map)
485
+
486
+ return f"""
487
+ # 🎯 Clustering Task: Behavioral Pattern Analysis
488
+
489
+ ## πŸ“Š Prediction Results
490
+
491
+ **Cluster Label:** {result['cluster_label']}
492
+
493
+ **Risk Level:** {result['risk_level']}
494
+
495
+ **Recommendation:** {result['recommendation']}
496
+
497
+ **Confidence:** {result['confidence']:.2f}
498
+
499
+ ## πŸ“ˆ Visual Analysis
500
+
501
+ ![Cluster Analysis]({charts_img})
502
+
503
+ ## πŸ“‹ What This Means
504
+ - **Elbow Method**: Shows how the optimal number of clusters (k=3) was determined
505
+ - **Cluster Scatter Plot**: Displays how users are grouped based on behavioral patterns
506
+ - **Your Position**: Highlighted point shows where you fall in the cluster analysis
507
+ - **Risk Assessment**: Identifies your overall risk level based on cluster membership
508
+ - **Confidence**: How certain the model is about this classification
509
+ """
510
+
511
+ def create_interface():
512
+ """Create the Gradio interface"""
513
+ analyzer = SocialMediaAnalyzer()
514
+
515
+ with gr.Blocks(title="Social Sphere - Social Media Addiction Analysis", theme=gr.themes.Soft(primary_hue="purple")) as app:
516
+ gr.Markdown("# πŸ“± Social Sphere")
517
+ gr.Markdown("### Interactive machine learning-powered platform for social media impact analysis")
518
+
519
+ with gr.Row():
520
+ # Left side - Main Menu
521
+ with gr.Column(scale=1):
522
+ gr.Markdown("## 🎯 Main Menu")
523
+ task_choice = gr.Dropdown(
524
+ choices=[
525
+ "About App",
526
+ "Classification Task (Predict High/Low Conflict Risk)",
527
+ "Regression Task",
528
+ "Clustering Task",
529
+ "Disclaimer",
530
+ "Dataset Citation"
531
+ ],
532
+ label="Select Analysis Task",
533
+ value="About App"
534
+ )
535
+
536
+ # Right side - Content area
537
+ with gr.Column(scale=3):
538
+ output_area = gr.Markdown(value=analyzer.info.about_app(), label="Analysis Results")
539
+
540
+ # Input form for ML tasks (initially hidden)
541
+ input_container = gr.Column(visible=False)
542
+ with input_container:
543
+ gr.Markdown("## πŸ“‹ Input Parameters")
544
+
545
+ with gr.Row():
546
+ age = gr.Slider(minimum=16, maximum=30, value=20, step=1, label="Age", scale=1)
547
+ gender = gr.Radio(choices=["Male", "Female"], value="Male", label="Gender", scale=1)
548
+
549
+ with gr.Row():
550
+ academic_level = gr.Dropdown(
551
+ choices=["High School", "Undergraduate", "Graduate"],
552
+ value="Undergraduate",
553
+ label="Academic Level",
554
+ scale=1
555
+ )
556
+ relationship_status = gr.Dropdown(
557
+ choices=["Single", "In Relationship", "Complicated"],
558
+ value="Single",
559
+ label="Relationship Status",
560
+ scale=1
561
+ )
562
+
563
+ with gr.Row():
564
+ country = gr.Dropdown(
565
+ choices=["USA", "UK", "Canada", "Australia", "Other"],
566
+ value="USA",
567
+ label="Country",
568
+ scale=1
569
+ )
570
+ platform = gr.Dropdown(
571
+ choices=["Instagram", "TikTok", "Facebook", "Twitter", "Snapchat", "YouTube"],
572
+ value="Instagram",
573
+ label="Most Used Platform",
574
+ scale=1
575
+ )
576
+
577
+ with gr.Row():
578
+ daily_usage = gr.Slider(minimum=0, maximum=24, value=4, step=0.5, label="Daily Usage (hours)", scale=1)
579
+ sleep_hours = gr.Slider(minimum=0, maximum=12, value=7, step=0.5, label="Sleep Hours", scale=1)
580
+
581
+ with gr.Row():
582
+ mental_health = gr.Slider(minimum=1, maximum=10, value=7, step=1, label="Mental Health Score (1-10)", scale=1)
583
+ conflicts = gr.Slider(minimum=0, maximum=5, value=1, step=1, label="Conflicts Over Social Media", visible=True, scale=1)
584
+
585
+ with gr.Row():
586
+ addicted_score = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Addiction Score (1-10)", scale=1)
587
+ affects_academic = gr.Radio(choices=["Yes", "No"], value="No", label="Affects Academic Performance", scale=1)
588
+
589
+ # Predict button
590
+ predict_btn = gr.Button("πŸš€ Run Prediction", variant="primary", size="lg")
591
+
592
+ # Function to handle task selection (for non-ML tasks)
593
+ def handle_task_selection(task):
594
+ if task == "About App":
595
+ return analyzer.info.about_app(), gr.update(visible=False)
596
+ elif task == "Disclaimer":
597
+ return analyzer.info.disclaimer(), gr.update(visible=False)
598
+ elif task == "Dataset Citation":
599
+ return analyzer.info.dataset_citation(), gr.update(visible=False)
600
+ else:
601
+ return "Select a task and click 'Run Prediction' to get results.", gr.update(visible=True)
602
+
603
+ # Function to handle predictions
604
+ def handle_prediction(task, age, gender, academic_level, relationship_status,
605
+ country, platform, daily_usage, sleep_hours, mental_health,
606
+ conflicts, addicted_score, affects_academic):
607
+ if task == "Classification Task (Predict High/Low Conflict Risk)":
608
+ return analyzer.classification_task(age, gender, academic_level, relationship_status,
609
+ country, platform, daily_usage, sleep_hours, mental_health,
610
+ 0, addicted_score, affects_academic) # Set conflicts to 0 for prediction
611
+ elif task == "Regression Task":
612
+ return analyzer.regression_task(age, gender, academic_level, relationship_status,
613
+ country, platform, daily_usage, sleep_hours, mental_health,
614
+ conflicts, affects_academic)
615
+ elif task == "Clustering Task":
616
+ return analyzer.clustering_task(age, gender, academic_level, relationship_status,
617
+ country, platform, daily_usage, sleep_hours, mental_health,
618
+ conflicts, addicted_score, affects_academic)
619
+ else:
620
+ return "Please select a prediction task (Classification, Regression, or Clustering)."
621
 
622
+ # Function to control input visibility based on task
623
+ def update_input_visibility(task):
624
+ if task == "Classification Task (Predict High/Low Conflict Risk)":
625
+ return gr.update(visible=False) # Hide conflicts input for classification
626
+ else:
627
+ return gr.update(visible=True) # Show conflicts input for other tasks
628
 
629
+ # Connect the interface
630
+ task_choice.change(
631
+ fn=handle_task_selection,
632
+ inputs=[task_choice],
633
+ outputs=[output_area, input_container]
634
+ )
635
+
636
+ # Control conflicts input visibility
637
+ task_choice.change(
638
+ fn=update_input_visibility,
639
+ inputs=[task_choice],
640
+ outputs=[conflicts]
641
+ )
642
+
643
+ # Connect predict button
644
+ predict_btn.click(
645
+ fn=handle_prediction,
646
+ inputs=[task_choice, age, gender, academic_level, relationship_status,
647
+ country, platform, daily_usage, sleep_hours, mental_health,
648
+ conflicts, addicted_score, affects_academic],
649
+ outputs=output_area
650
+ )
651
+
652
+ gr.Markdown("---")
653
+ gr.Markdown("### πŸ”§ Technical Information")
654
+ gr.Markdown("- **Framework**: Gradio")
655
+ gr.Markdown("- **Backend**: Python with scikit-learn")
656
+ gr.Markdown("- **ML Pipeline**: MLflow integration")
657
+ gr.Markdown("- **Data**: Students Social Media Addiction Dataset")
658
 
659
+ return app
660
+
661
+ if __name__ == "__main__":
662
+ # Create and launch the app
663
+ app = create_interface()
664
 
665
+ # Launch with automatic port finding
666
+ import socket
667
+ def find_free_port():
668
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
669
+ s.bind(('', 0))
670
+ s.listen(1)
671
+ port = s.getsockname()[1]
672
+ return port
673
 
674
+ port = find_free_port()
675
+ print(f"πŸš€ Launching app on port {port}")
676
+ print(f"πŸ“± Access the app at: http://localhost:{port}")
 
 
 
677
 
678
+ app.launch(
679
+ server_name="0.0.0.0",
680
+ server_port=port,
681
+ share=False,
682
+ show_error=True,
683
+ quiet=False
684
+ )