BlakeL commited on
Commit
6f4fbae
Β·
verified Β·
1 Parent(s): 8ece22e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +538 -126
app.py CHANGED
@@ -1,186 +1,598 @@
 
 
 
 
 
1
 
2
-
3
  import gradio as gr
4
  import pandas as pd
5
  import numpy as np
6
- import plotly.express as px
 
 
7
  from sklearn.cluster import KMeans
8
- from sklearn.preprocessing import StandardScaler
 
 
 
 
 
9
  import warnings
10
  warnings.filterwarnings('ignore')
11
 
12
- class SimpleSocialMediaAnalyzer:
 
 
 
 
13
  def __init__(self):
14
- self.df = self.create_sample_data()
15
- self.train_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  def create_sample_data(self):
18
  """Create sample data for demonstration"""
19
  np.random.seed(42)
20
- n = 1000
21
-
22
- return pd.DataFrame({
23
- 'age': np.random.randint(16, 30, n),
24
- 'daily_usage': np.random.normal(4.5, 2, n),
25
- 'sleep_hours': np.random.normal(7, 1.5, n),
26
- 'mental_health': np.random.normal(6.5, 2, n),
27
- 'conflicts': np.random.randint(0, 6, n),
28
- 'addiction_score': np.random.normal(5.5, 2, n),
29
- 'gender': np.random.choice(['Male', 'Female'], n),
30
- 'platform': np.random.choice(['Instagram', 'TikTok', 'Facebook', 'Twitter', 'Snapchat'], n)
 
 
 
31
  })
 
 
 
32
 
33
- def train_model(self):
34
- """Train a simple clustering model"""
35
- # Select key features for clustering
36
- features = ['daily_usage', 'sleep_hours', 'mental_health', 'addiction_score', 'conflicts']
37
- X = self.df[features].fillna(self.df[features].mean())
38
-
39
- # Scale and cluster
40
- scaler = StandardScaler()
41
- X_scaled = scaler.fit_transform(X)
42
-
43
- kmeans = KMeans(n_clusters=3, random_state=42)
44
- self.df['cluster'] = kmeans.fit_predict(X_scaled)
45
-
46
- # Store for predictions
47
- self.scaler = scaler
48
- self.kmeans = kmeans
49
- self.features = features
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- def analyze_individual(self, age, daily_usage, sleep_hours, mental_health, conflicts, addiction_score, gender, platform):
52
- """Analyze individual user"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # Create feature vector
54
- user_data = [daily_usage, sleep_hours, mental_health, addiction_score, conflicts]
55
- user_scaled = self.scaler.transform([user_data])
56
- cluster = self.kmeans.predict(user_scaled)[0]
 
 
 
 
 
 
57
 
58
- # Get cluster stats
59
- cluster_data = self.df[self.df['cluster'] == cluster]
 
60
 
61
- # Identify risk factors
62
- risks = []
63
- if daily_usage >= 6: risks.append("High daily usage (β‰₯6 hours)")
64
- if sleep_hours <= 6: risks.append("Low sleep (≀6 hours)")
65
- if mental_health <= 5: risks.append("Poor mental health (≀5/10)")
66
- if conflicts >= 3: risks.append("High conflicts (β‰₯3)")
67
- if addiction_score >= 7: risks.append("High addiction score (β‰₯7/10)")
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  # Generate recommendations
70
  recommendations = []
71
- if daily_usage >= 6: recommendations.append("Set daily usage limits")
72
- if sleep_hours <= 6: recommendations.append("Improve sleep hygiene")
73
- if mental_health <= 5: recommendations.append("Consider mental health support")
74
- if conflicts >= 3: recommendations.append("Work on communication skills")
75
- if addiction_score >= 7: recommendations.append("Seek professional help")
 
 
 
 
 
76
 
77
  if not recommendations:
78
- recommendations.append("Maintain healthy habits")
79
 
80
- # Format results
81
- result = f"""
82
- ## πŸ“Š Your Analysis Results
83
 
84
- **Cluster:** {cluster} (Similar to {len(cluster_data)} other students)
 
85
 
86
- **Cluster Averages:**
87
- - Daily Usage: {cluster_data['daily_usage'].mean():.1f} hours
88
- - Mental Health: {cluster_data['mental_health'].mean():.1f}/10
89
- - Sleep: {cluster_data['sleep_hours'].mean():.1f} hours
90
- - Addiction Score: {cluster_data['addiction_score'].mean():.1f}/10
91
 
92
- **Risk Factors:**
93
- {chr(10).join(f"- {risk}" for risk in risks) if risks else "- No significant risks identified"}
 
 
94
 
95
- **Recommendations:**
96
- {chr(10).join(f"- {rec}" for rec in recommendations)}
97
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- def create_dashboard(self):
102
- """Create dashboard plots"""
103
- # Usage distribution
104
- fig1 = px.histogram(self.df, x='daily_usage', title='Daily Usage Distribution',
105
- nbins=20, labels={'daily_usage': 'Hours/Day'})
106
-
107
- # Mental health vs usage by cluster
108
- fig2 = px.scatter(self.df, x='daily_usage', y='mental_health', color='cluster',
109
- title='Mental Health vs Daily Usage by Cluster',
110
- labels={'daily_usage': 'Hours/Day', 'mental_health': 'Mental Health Score'})
111
-
112
- # Platform usage
113
- platform_counts = self.df['platform'].value_counts()
114
- fig3 = px.pie(values=platform_counts.values, names=platform_counts.index,
115
- title='Most Used Platforms')
116
-
117
- # Cluster characteristics
118
- cluster_stats = self.df.groupby('cluster')[['daily_usage', 'mental_health', 'sleep_hours', 'addiction_score']].mean()
119
- fig4 = px.bar(cluster_stats, title='Average Characteristics by Cluster')
120
-
121
- # Summary stats
122
- stats = f"""
123
- ## πŸ“ˆ Dataset Summary
124
- - **Total Students:** {len(self.df):,}
125
- - **Average Daily Usage:** {self.df['daily_usage'].mean():.1f} hours
126
- - **Average Mental Health:** {self.df['mental_health'].mean():.1f}/10
127
- - **Average Sleep:** {self.df['sleep_hours'].mean():.1f} hours
128
- - **High Risk Students:** {len(self.df[self.df['addiction_score'] >= 7])} ({len(self.df[self.df['addiction_score'] >= 7])/len(self.df)*100:.1f}%)
129
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
- return stats, fig1, fig2, fig3, fig4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
- # Initialize analyzer
134
- analyzer = SimpleSocialMediaAnalyzer()
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  # Create Gradio interface
137
- with gr.Blocks(title="Social Media Analysis", theme=gr.themes.Soft()) as demo:
 
 
 
138
 
139
- gr.Markdown("# πŸ“± Social Media Usage Analysis")
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
  with gr.Tabs():
 
 
142
  with gr.Tab("πŸ” Individual Analysis"):
 
 
143
  with gr.Row():
144
  with gr.Column():
145
- age = gr.Slider(16, 30, 20, label="Age")
146
- daily_usage = gr.Slider(0, 12, 4, step=0.5, label="Daily Usage (Hours)")
147
- sleep_hours = gr.Slider(4, 12, 7, step=0.5, label="Sleep Hours")
148
- mental_health = gr.Slider(1, 10, 7, label="Mental Health Score (1-10)")
 
 
149
 
150
  with gr.Column():
151
- conflicts = gr.Slider(0, 5, 2, label="Social Media Conflicts (0-5)")
152
- addiction_score = gr.Slider(1, 10, 5, label="Addiction Score (1-10)")
153
- gender = gr.Radio(["Male", "Female"], "Male", label="Gender")
154
- platform = gr.Radio(["Instagram", "TikTok", "Facebook", "Twitter", "Snapchat"],
155
- "Instagram", label="Most Used Platform")
 
 
 
 
 
 
 
 
 
 
 
156
 
157
- analyze_btn = gr.Button("πŸ” Analyze", variant="primary")
158
- result_output = gr.Markdown()
159
 
160
  analyze_btn.click(
161
- analyzer.analyze_individual,
162
- [age, daily_usage, sleep_hours, mental_health, conflicts, addiction_score, gender, platform],
163
- result_output
 
164
  )
165
 
166
- with gr.Tab("πŸ“Š Dashboard"):
167
- dashboard_btn = gr.Button("πŸ“Š Generate Dashboard", variant="primary")
 
 
 
168
 
169
  with gr.Row():
170
- summary_text = gr.Markdown()
171
 
172
  with gr.Row():
173
- plot1 = gr.Plot()
174
- plot2 = gr.Plot()
175
 
176
  with gr.Row():
177
- plot3 = gr.Plot()
178
- plot4 = gr.Plot()
 
 
 
 
 
 
 
 
179
 
180
  dashboard_btn.click(
181
- analyzer.create_dashboard,
182
- outputs=[summary_text, plot1, plot2, plot3, plot4]
183
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
 
185
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  demo.launch(share=True)
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Social Media Addiction Analysis - Comprehensive Gradio App
4
+ Includes clustering, regression, and conflicts analysis
5
+ """
6
 
 
7
  import gradio as gr
8
  import pandas as pd
9
  import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ import seaborn as sns
12
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
13
  from sklearn.cluster import KMeans
14
+ from sklearn.linear_model import LinearRegression, LogisticRegression
15
+ from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
16
+ from sklearn.metrics import silhouette_score, mean_squared_error, accuracy_score, classification_report
17
+ import plotly.express as px
18
+ import plotly.graph_objects as go
19
+ from plotly.subplots import make_subplots
20
  import warnings
21
  warnings.filterwarnings('ignore')
22
 
23
+ # Set style
24
+ plt.style.use('seaborn-v0_8')
25
+ sns.set_palette("husl")
26
+
27
+ class ComprehensiveSocialMediaAnalyzer:
28
  def __init__(self):
29
+ """Initialize the analyzer with all models"""
30
+ self.df = None
31
+ self.scaler = StandardScaler()
32
+ self.kmeans_model = None
33
+ self.regression_model = None
34
+ self.conflicts_model = None
35
+ self.feature_names = None
36
+ self.load_data()
37
+ self.train_all_models()
38
+
39
+ def load_data(self):
40
+ """Load and prepare the dataset"""
41
+ try:
42
+ # Load the dataset
43
+ self.df = pd.read_csv("data/Students Social Media Addiction.csv")
44
+
45
+ # Create binary features for categorical variables
46
+ self.df['Is_Female'] = (self.df['Gender'] == 'Female').astype(int)
47
+ self.df['Is_Male'] = (self.df['Gender'] == 'Male').astype(int)
48
+
49
+ # Academic level features
50
+ self.df['Is_Undergraduate'] = (self.df['Academic_Level'] == 'Undergraduate').astype(int)
51
+ self.df['Is_Graduate'] = (self.df['Academic_Level'] == 'Graduate').astype(int)
52
+ self.df['Is_High_School'] = (self.df['Academic_Level'] == 'High School').astype(int)
53
+
54
+ # Relationship status features
55
+ self.df['Is_Single'] = (self.df['Relationship_Status'] == 'Single').astype(int)
56
+ self.df['Is_In_Relationship'] = (self.df['Relationship_Status'] == 'In Relationship').astype(int)
57
+ self.df['Is_Complicated'] = (self.df['Relationship_Status'] == 'Complicated').astype(int)
58
+
59
+ # Academic performance
60
+ self.df['Affects_Academic'] = (self.df['Affects_Academic_Performance'] == 'Yes').astype(int)
61
+
62
+ # Create platform dummies (top 6 platforms)
63
+ top_platforms = self.df['Most_Used_Platform'].value_counts().head(6).index
64
+ for platform in top_platforms:
65
+ self.df[f'Uses_{platform}'] = (self.df['Most_Used_Platform'] == platform).astype(int)
66
+
67
+ # Create behavioral features
68
+ self.df['High_Usage'] = (self.df['Avg_Daily_Usage_Hours'] >= 6).astype(int)
69
+ self.df['Low_Sleep'] = (self.df['Sleep_Hours_Per_Night'] <= 6).astype(int)
70
+ self.df['Poor_Mental_Health'] = (self.df['Mental_Health_Score'] <= 5).astype(int)
71
+ self.df['High_Conflict'] = (self.df['Conflicts_Over_Social_Media'] >= 3).astype(int)
72
+ self.df['High_Addiction'] = (self.df['Addicted_Score'] >= 7).astype(int)
73
+
74
+ # Create interaction features
75
+ self.df['Usage_Sleep_Ratio'] = self.df['Avg_Daily_Usage_Hours'] / self.df['Sleep_Hours_Per_Night']
76
+ self.df['Mental_Health_Usage_Ratio'] = self.df['Mental_Health_Score'] / self.df['Avg_Daily_Usage_Hours']
77
+
78
+ print("βœ… Data loaded successfully!")
79
+
80
+ except Exception as e:
81
+ print(f"❌ Error loading data: {e}")
82
+ # Create sample data if file not found
83
+ self.create_sample_data()
84
 
85
  def create_sample_data(self):
86
  """Create sample data for demonstration"""
87
  np.random.seed(42)
88
+ n_samples = 1000
89
+
90
+ self.df = pd.DataFrame({
91
+ 'Age': np.random.randint(16, 30, n_samples),
92
+ 'Gender': np.random.choice(['Male', 'Female'], n_samples),
93
+ 'Academic_Level': np.random.choice(['High School', 'Undergraduate', 'Graduate'], n_samples),
94
+ 'Relationship_Status': np.random.choice(['Single', 'In Relationship', 'Complicated'], n_samples),
95
+ 'Most_Used_Platform': np.random.choice(['Instagram', 'TikTok', 'Facebook', 'Twitter', 'Snapchat'], n_samples),
96
+ 'Avg_Daily_Usage_Hours': np.random.normal(4.5, 2, n_samples),
97
+ 'Sleep_Hours_Per_Night': np.random.normal(7, 1.5, n_samples),
98
+ 'Mental_Health_Score': np.random.normal(6.5, 2, n_samples),
99
+ 'Conflicts_Over_Social_Media': np.random.randint(0, 6, n_samples),
100
+ 'Addicted_Score': np.random.normal(5.5, 2, n_samples),
101
+ 'Affects_Academic_Performance': np.random.choice(['Yes', 'No'], n_samples)
102
  })
103
+
104
+ # Apply the same feature engineering
105
+ self.load_data()
106
 
107
+ def train_all_models(self):
108
+ """Train clustering, regression, and classification models"""
109
+ try:
110
+ # Select numerical features for all models
111
+ numerical_features = [
112
+ 'Age', 'Avg_Daily_Usage_Hours', 'Sleep_Hours_Per_Night',
113
+ 'Mental_Health_Score', 'Conflicts_Over_Social_Media', 'Addicted_Score',
114
+ 'Is_Female', 'Is_Undergraduate', 'Is_Graduate', 'Is_High_School',
115
+ 'Is_Single', 'Is_In_Relationship', 'Is_Complicated', 'Affects_Academic',
116
+ 'High_Usage', 'Low_Sleep', 'Poor_Mental_Health', 'High_Conflict', 'High_Addiction',
117
+ 'Usage_Sleep_Ratio', 'Mental_Health_Usage_Ratio'
118
+ ]
119
+
120
+ # Add platform features
121
+ platform_features = [col for col in self.df.columns if col.startswith('Uses_')]
122
+ numerical_features.extend(platform_features)
123
+
124
+ # Filter to only include features that exist
125
+ self.feature_names = [f for f in numerical_features if f in self.df.columns]
126
+
127
+ # Create feature matrix
128
+ X = self.df[self.feature_names].copy()
129
+
130
+ # Handle missing values
131
+ X = X.fillna(X.mean())
132
+
133
+ # Scale features
134
+ X_scaled = self.scaler.fit_transform(X)
135
+
136
+ # 1. Train Clustering Model (K-Means)
137
+ self.kmeans_model = KMeans(n_clusters=4, random_state=42, n_init=10)
138
+ self.df['Cluster'] = self.kmeans_model.fit_predict(X_scaled)
139
+
140
+ # 2. Train Regression Model (Predict Addiction Score)
141
+ self.regression_model = RandomForestRegressor(n_estimators=100, random_state=42)
142
+ self.regression_model.fit(X_scaled, self.df['Addicted_Score'])
143
+
144
+ # 3. Train Classification Model (Predict Conflicts)
145
+ # Create binary conflict target (High conflict if >= 3)
146
+ conflict_target = (self.df['Conflicts_Over_Social_Media'] >= 3).astype(int)
147
+ self.conflicts_model = RandomForestClassifier(n_estimators=100, random_state=42)
148
+ self.conflicts_model.fit(X_scaled, conflict_target)
149
+
150
+ print("βœ… All models trained successfully!")
151
+ print(f" - Clustering: {len(set(self.df['Cluster']))} clusters")
152
+ print(f" - Regression: Addiction score prediction")
153
+ print(f" - Classification: Conflict prediction")
154
+
155
+ except Exception as e:
156
+ print(f"❌ Error training models: {e}")
157
 
158
+ def analyze_individual(self, age, gender, academic_level, relationship_status,
159
+ platform, daily_usage, sleep_hours, mental_health,
160
+ conflicts, addiction_score, affects_academic):
161
+ """Comprehensive individual analysis"""
162
+
163
+ # Create individual data point
164
+ individual_data = {
165
+ 'Age': age,
166
+ 'Gender': gender,
167
+ 'Academic_Level': academic_level,
168
+ 'Relationship_Status': relationship_status,
169
+ 'Most_Used_Platform': platform,
170
+ 'Avg_Daily_Usage_Hours': daily_usage,
171
+ 'Sleep_Hours_Per_Night': sleep_hours,
172
+ 'Mental_Health_Score': mental_health,
173
+ 'Conflicts_Over_Social_Media': conflicts,
174
+ 'Addicted_Score': addiction_score,
175
+ 'Affects_Academic_Performance': affects_academic
176
+ }
177
+
178
+ # Create binary features
179
+ individual_data['Is_Female'] = 1 if gender == 'Female' else 0
180
+ individual_data['Is_Undergraduate'] = 1 if academic_level == 'Undergraduate' else 0
181
+ individual_data['Is_Graduate'] = 1 if academic_level == 'Graduate' else 0
182
+ individual_data['Is_High_School'] = 1 if academic_level == 'High School' else 0
183
+ individual_data['Is_Single'] = 1 if relationship_status == 'Single' else 0
184
+ individual_data['Is_In_Relationship'] = 1 if relationship_status == 'In Relationship' else 0
185
+ individual_data['Is_Complicated'] = 1 if relationship_status == 'Complicated' else 0
186
+ individual_data['Affects_Academic'] = 1 if affects_academic == 'Yes' else 0
187
+
188
+ # Platform features
189
+ for platform_name in ['Instagram', 'TikTok', 'Facebook', 'Twitter', 'Snapchat']:
190
+ individual_data[f'Uses_{platform_name}'] = 1 if platform == platform_name else 0
191
+
192
+ # Behavioral features
193
+ individual_data['High_Usage'] = 1 if daily_usage >= 6 else 0
194
+ individual_data['Low_Sleep'] = 1 if sleep_hours <= 6 else 0
195
+ individual_data['Poor_Mental_Health'] = 1 if mental_health <= 5 else 0
196
+ individual_data['High_Conflict'] = 1 if conflicts >= 3 else 0
197
+ individual_data['High_Addiction'] = 1 if addiction_score >= 7 else 0
198
+
199
+ # Interaction features
200
+ individual_data['Usage_Sleep_Ratio'] = daily_usage / sleep_hours if sleep_hours > 0 else 0
201
+ individual_data['Mental_Health_Usage_Ratio'] = mental_health / daily_usage if daily_usage > 0 else 0
202
+
203
  # Create feature vector
204
+ features = []
205
+ for feature in self.feature_names:
206
+ if feature in individual_data:
207
+ features.append(individual_data[feature])
208
+ else:
209
+ features.append(0)
210
+
211
+ # Scale features
212
+ features_scaled = self.scaler.transform([features])
213
 
214
+ # 1. Clustering Analysis
215
+ cluster = self.kmeans_model.predict(features_scaled)[0]
216
+ cluster_data = self.df[self.df['Cluster'] == cluster]
217
 
218
+ # 2. Regression Analysis (Predict Addiction Score)
219
+ predicted_addiction = self.regression_model.predict(features_scaled)[0]
220
+
221
+ # 3. Classification Analysis (Predict Conflict Risk)
222
+ conflict_probability = self.conflicts_model.predict_proba(features_scaled)[0]
223
+ high_conflict_prob = conflict_probability[1] # Probability of high conflict
224
+
225
+ # Calculate risk factors
226
+ risk_factors = []
227
+ if daily_usage >= 6:
228
+ risk_factors.append("High daily usage (β‰₯6 hours)")
229
+ if sleep_hours <= 6:
230
+ risk_factors.append("Low sleep (≀6 hours)")
231
+ if mental_health <= 5:
232
+ risk_factors.append("Poor mental health (≀5/10)")
233
+ if conflicts >= 3:
234
+ risk_factors.append("High social media conflicts (β‰₯3)")
235
+ if addiction_score >= 7:
236
+ risk_factors.append("High addiction score (β‰₯7/10)")
237
 
238
  # Generate recommendations
239
  recommendations = []
240
+ if daily_usage >= 6:
241
+ recommendations.append("Consider setting daily usage limits")
242
+ if sleep_hours <= 6:
243
+ recommendations.append("Improve sleep hygiene and reduce screen time before bed")
244
+ if mental_health <= 5:
245
+ recommendations.append("Consider mental health support and digital detox")
246
+ if conflicts >= 3:
247
+ recommendations.append("Work on communication skills and boundary setting")
248
+ if addiction_score >= 7:
249
+ recommendations.append("Seek professional help for digital addiction")
250
 
251
  if not recommendations:
252
+ recommendations.append("Maintain healthy social media habits")
253
 
254
+ # Format comprehensive results
255
+ output = f"""
256
+ ## πŸ“Š Comprehensive Analysis Results
257
 
258
+ ### 🎯 Clustering Analysis
259
+ **Cluster {cluster}** - You belong to a group with {len(cluster_data)} similar students
260
 
261
+ **Cluster Characteristics (Average):**
262
+ - Daily Usage: {cluster_data['Avg_Daily_Usage_Hours'].mean():.1f} hours
263
+ - Mental Health Score: {cluster_data['Mental_Health_Score'].mean():.1f}/10
264
+ - Sleep Hours: {cluster_data['Sleep_Hours_Per_Night'].mean():.1f} hours/night
265
+ - Addiction Score: {cluster_data['Addicted_Score'].mean():.1f}/10
266
 
267
+ ### πŸ“ˆ Regression Analysis (Addiction Prediction)
268
+ **Your Current Addiction Score:** {addiction_score:.1f}/10
269
+ **Predicted Addiction Score:** {predicted_addiction:.1f}/10
270
+ **Difference:** {predicted_addiction - addiction_score:+.1f} points
271
 
272
+ ### ⚠️ Conflict Risk Analysis
273
+ **Current Conflicts:** {conflicts}/5
274
+ **High Conflict Risk Probability:** {high_conflict_prob:.1%}
275
+ **Risk Level:** {'High' if high_conflict_prob > 0.6 else 'Medium' if high_conflict_prob > 0.3 else 'Low'}
276
+
277
+ ### 🚨 Risk Factors Identified
278
+ """
279
+
280
+ if risk_factors:
281
+ for factor in risk_factors:
282
+ output += f"- {factor}\n"
283
+ else:
284
+ output += "- No significant risk factors identified\n"
285
+
286
+ output += "\n### πŸ’‘ Personalized Recommendations\n"
287
+ for rec in recommendations:
288
+ output += f"- {rec}\n"
289
+
290
+ # Add model-specific recommendations
291
+ if predicted_addiction > addiction_score + 1:
292
+ output += "- Consider reducing social media usage to prevent addiction escalation\n"
293
+ if high_conflict_prob > 0.6:
294
+ output += "- Focus on improving communication and conflict resolution skills\n"
295
+
296
+ return output
297
+
298
+ def create_comprehensive_dashboard(self):
299
+ """Create comprehensive dashboard with all analyses"""
300
+
301
+ # 1. Usage Distribution
302
+ fig1 = px.histogram(self.df, x='Avg_Daily_Usage_Hours',
303
+ title='Daily Social Media Usage Distribution',
304
+ nbins=20, color_discrete_sequence=['#1f77b4'])
305
+ fig1.update_layout(xaxis_title='Hours per Day', yaxis_title='Number of Students')
306
+
307
+ # 2. Mental Health vs Usage by Cluster
308
+ fig2 = px.scatter(self.df, x='Avg_Daily_Usage_Hours', y='Mental_Health_Score',
309
+ color='Cluster', title='Mental Health vs Daily Usage by Cluster',
310
+ color_discrete_sequence=px.colors.qualitative.Set1)
311
+ fig2.update_layout(xaxis_title='Daily Usage (Hours)', yaxis_title='Mental Health Score')
312
 
313
+ # 3. Cluster Distribution
314
+ cluster_counts = self.df['Cluster'].value_counts().sort_index()
315
+ fig3 = px.bar(x=cluster_counts.index, y=cluster_counts.values,
316
+ title='Student Distribution by Cluster',
317
+ color_discrete_sequence=['#2ca02c'])
318
+ fig3.update_layout(xaxis_title='Cluster', yaxis_title='Number of Students')
319
+
320
+ # 4. Addiction Score Distribution
321
+ fig4 = px.histogram(self.df, x='Addicted_Score',
322
+ title='Addiction Score Distribution',
323
+ nbins=20, color_discrete_sequence=['#d62728'])
324
+ fig4.update_layout(xaxis_title='Addiction Score', yaxis_title='Number of Students')
325
+
326
+ # 5. Conflicts Analysis
327
+ conflict_counts = self.df['Conflicts_Over_Social_Media'].value_counts().sort_index()
328
+ fig5 = px.bar(x=conflict_counts.index, y=conflict_counts.values,
329
+ title='Social Media Conflicts Distribution',
330
+ color_discrete_sequence=['#ff7f0e'])
331
+ fig5.update_layout(xaxis_title='Number of Conflicts', yaxis_title='Number of Students')
332
+
333
+ # 6. Platform Usage
334
+ platform_counts = self.df['Most_Used_Platform'].value_counts()
335
+ fig6 = px.pie(values=platform_counts.values, names=platform_counts.index,
336
+ title='Most Used Social Media Platforms')
337
+
338
+ # 7. Cluster Characteristics Heatmap
339
+ cluster_stats = self.df.groupby('Cluster').agg({
340
+ 'Avg_Daily_Usage_Hours': 'mean',
341
+ 'Mental_Health_Score': 'mean',
342
+ 'Sleep_Hours_Per_Night': 'mean',
343
+ 'Addicted_Score': 'mean',
344
+ 'Conflicts_Over_Social_Media': 'mean'
345
+ }).round(2)
346
+
347
+ fig7 = px.imshow(cluster_stats.T,
348
+ title='Cluster Characteristics Heatmap',
349
+ color_continuous_scale='RdYlBu_r',
350
+ aspect='auto')
351
+ fig7.update_layout(xaxis_title='Cluster', yaxis_title='Metrics')
352
+
353
+ # 8. Correlation Matrix
354
+ corr_features = ['Avg_Daily_Usage_Hours', 'Mental_Health_Score', 'Sleep_Hours_Per_Night',
355
+ 'Addicted_Score', 'Conflicts_Over_Social_Media']
356
+ corr_matrix = self.df[corr_features].corr()
357
+
358
+ fig8 = px.imshow(corr_matrix,
359
+ title='Feature Correlation Matrix',
360
+ color_continuous_scale='RdBu',
361
+ aspect='auto')
362
+ fig8.update_layout(xaxis_title='Features', yaxis_title='Features')
363
+
364
+ return fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8
365
 
366
+ def get_comprehensive_stats(self):
367
+ """Get comprehensive summary statistics"""
368
+ stats = {
369
+ "total_students": len(self.df),
370
+ "avg_age": self.df['Age'].mean(),
371
+ "avg_daily_usage": self.df['Avg_Daily_Usage_Hours'].mean(),
372
+ "avg_mental_health": self.df['Mental_Health_Score'].mean(),
373
+ "avg_sleep": self.df['Sleep_Hours_Per_Night'].mean(),
374
+ "avg_addiction": self.df['Addicted_Score'].mean(),
375
+ "avg_conflicts": self.df['Conflicts_Over_Social_Media'].mean(),
376
+ "high_risk_students": len(self.df[self.df['Addicted_Score'] >= 7]),
377
+ "high_conflict_students": len(self.df[self.df['Conflicts_Over_Social_Media'] >= 3]),
378
+ "most_used_platform": self.df['Most_Used_Platform'].mode()[0],
379
+ "n_clusters": len(set(self.df['Cluster']))
380
+ }
381
+ return stats
382
+
383
+ # Initialize the analyzer
384
+ analyzer = ComprehensiveSocialMediaAnalyzer()
385
+
386
+ def individual_analysis(age, gender, academic_level, relationship_status,
387
+ platform, daily_usage, sleep_hours, mental_health,
388
+ conflicts, addiction_score, affects_academic):
389
+ """Gradio interface for comprehensive individual analysis"""
390
+
391
+ try:
392
+ results = analyzer.analyze_individual(
393
+ age, gender, academic_level, relationship_status,
394
+ platform, daily_usage, sleep_hours, mental_health,
395
+ conflicts, addiction_score, affects_academic
396
+ )
397
+
398
+ return results
399
+
400
+ except Exception as e:
401
+ return f"❌ Error in analysis: {str(e)}"
402
+
403
+ def comprehensive_dashboard():
404
+ """Create comprehensive dashboard with all analyses"""
405
+ try:
406
+ fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8 = analyzer.create_comprehensive_dashboard()
407
+ stats = analyzer.get_comprehensive_stats()
408
 
409
+ # Create comprehensive summary text
410
+ summary = f"""
411
+ ## πŸ“Š Comprehensive Dataset Overview
412
+
413
+ ### πŸ“ˆ Basic Statistics
414
+ - **Total Students**: {stats['total_students']:,}
415
+ - **Average Age**: {stats['avg_age']:.1f} years
416
+ - **Average Daily Usage**: {stats['avg_daily_usage']:.1f} hours
417
+ - **Average Mental Health Score**: {stats['avg_mental_health']:.1f}/10
418
+ - **Average Sleep**: {stats['avg_sleep']:.1f} hours/night
419
+ - **Average Addiction Score**: {stats['avg_addiction']:.1f}/10
420
+ - **Average Conflicts**: {stats['avg_conflicts']:.1f}/5
421
+
422
+ ### ⚠️ Risk Assessment
423
+ - **High Risk Students (Addiction β‰₯7)**: {stats['high_risk_students']} ({stats['high_risk_students']/stats['total_students']*100:.1f}%)
424
+ - **High Conflict Students (β‰₯3)**: {stats['high_conflict_students']} ({stats['high_conflict_students']/stats['total_students']*100:.1f}%)
425
 
426
+ ### 🎯 Analysis Results
427
+ - **Clusters Identified**: {stats['n_clusters']}
428
+ - **Most Used Platform**: {stats['most_used_platform']}
429
+
430
+ ### πŸ“Š Model Performance
431
+ - **Clustering**: K-Means with {stats['n_clusters']} clusters
432
+ - **Regression**: Random Forest for addiction score prediction
433
+ - **Classification**: Random Forest for conflict risk prediction
434
+ """
435
+
436
+ return summary, fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8
437
+
438
+ except Exception as e:
439
+ return f"❌ Error creating dashboard: {str(e)}", None, None, None, None, None, None, None, None
440
 
441
  # Create Gradio interface
442
+ with gr.Blocks(title="Social Media Addiction Analysis - Comprehensive", theme=gr.themes.Soft()) as demo:
443
+
444
+ gr.Markdown("""
445
+ # πŸ“± Social Media Addiction Analysis - Comprehensive
446
 
447
+ ## Overview
448
+ This application provides comprehensive analysis of student social media usage patterns including:
449
+ - **Clustering Analysis**: Identify behavioral segments
450
+ - **Regression Analysis**: Predict addiction scores
451
+ - **Classification Analysis**: Predict conflict risks
452
+ - **Risk Assessment**: Identify potential concerns
453
+ - **Personalized Recommendations**: Actionable advice
454
+
455
+ ### Features:
456
+ - **Individual Analysis**: Get personalized insights with all three model types
457
+ - **Comprehensive Dashboard**: Explore patterns across all analyses
458
+ - **Risk Assessment**: Multi-factor evaluation
459
+ - **Predictive Analytics**: ML-powered predictions
460
+ """)
461
 
462
  with gr.Tabs():
463
+
464
+ # Individual Analysis Tab
465
  with gr.Tab("πŸ” Individual Analysis"):
466
+ gr.Markdown("### Enter your social media usage information for comprehensive analysis")
467
+
468
  with gr.Row():
469
  with gr.Column():
470
+ age = gr.Slider(minimum=16, maximum=30, value=20, step=1, label="Age")
471
+ gender = gr.Radio(choices=["Male", "Female"], value="Male", label="Gender")
472
+ academic_level = gr.Radio(choices=["High School", "Undergraduate", "Graduate"],
473
+ value="Undergraduate", label="Academic Level")
474
+ relationship_status = gr.Radio(choices=["Single", "In Relationship", "Complicated"],
475
+ value="Single", label="Relationship Status")
476
 
477
  with gr.Column():
478
+ platform = gr.Radio(choices=["Instagram", "TikTok", "Facebook", "Twitter", "Snapchat"],
479
+ value="Instagram", label="Most Used Platform")
480
+ daily_usage = gr.Slider(minimum=0, maximum=12, value=4, step=0.5,
481
+ label="Average Daily Usage (Hours)")
482
+ sleep_hours = gr.Slider(minimum=4, maximum=12, value=7, step=0.5,
483
+ label="Sleep Hours per Night")
484
+ mental_health = gr.Slider(minimum=1, maximum=10, value=7, step=1,
485
+ label="Mental Health Score (1-10)")
486
+
487
+ with gr.Column():
488
+ conflicts = gr.Slider(minimum=0, maximum=5, value=2, step=1,
489
+ label="Conflicts Over Social Media (0-5)")
490
+ addiction_score = gr.Slider(minimum=1, maximum=10, value=5, step=1,
491
+ label="Addiction Score (1-10)")
492
+ affects_academic = gr.Radio(choices=["Yes", "No"], value="No",
493
+ label="Affects Academic Performance")
494
 
495
+ analyze_btn = gr.Button("πŸ” Analyze My Usage", variant="primary")
496
+ analysis_output = gr.Markdown(label="Comprehensive Analysis Results")
497
 
498
  analyze_btn.click(
499
+ fn=individual_analysis,
500
+ inputs=[age, gender, academic_level, relationship_status, platform,
501
+ daily_usage, sleep_hours, mental_health, conflicts, addiction_score, affects_academic],
502
+ outputs=analysis_output
503
  )
504
 
505
+ # Comprehensive Dashboard Tab
506
+ with gr.Tab("πŸ“Š Comprehensive Dashboard"):
507
+ gr.Markdown("### Explore comprehensive patterns and all analyses")
508
+
509
+ dashboard_btn = gr.Button("πŸ“Š Generate Comprehensive Dashboard", variant="primary")
510
 
511
  with gr.Row():
512
+ summary_output = gr.Markdown(label="Comprehensive Summary Statistics")
513
 
514
  with gr.Row():
515
+ plot1 = gr.Plot(label="Usage Distribution")
516
+ plot2 = gr.Plot(label="Mental Health vs Usage by Cluster")
517
 
518
  with gr.Row():
519
+ plot3 = gr.Plot(label="Cluster Distribution")
520
+ plot4 = gr.Plot(label="Addiction Score Distribution")
521
+
522
+ with gr.Row():
523
+ plot5 = gr.Plot(label="Conflicts Distribution")
524
+ plot6 = gr.Plot(label="Platform Usage")
525
+
526
+ with gr.Row():
527
+ plot7 = gr.Plot(label="Cluster Characteristics Heatmap")
528
+ plot8 = gr.Plot(label="Feature Correlation Matrix")
529
 
530
  dashboard_btn.click(
531
+ fn=comprehensive_dashboard,
532
+ outputs=[summary_output, plot1, plot2, plot3, plot4, plot5, plot6, plot7, plot8]
533
  )
534
+
535
+ # About Tab
536
+ with gr.Tab("ℹ️ About"):
537
+ gr.Markdown("""
538
+ ## About This Comprehensive Application
539
+
540
+ ### Purpose
541
+ This application provides comprehensive analysis of student social media usage patterns using multiple machine learning approaches.
542
+
543
+ ### Methodology
544
+ - **Clustering Analysis**: K-Means clustering to identify distinct behavioral segments
545
+ - **Regression Analysis**: Random Forest to predict addiction scores
546
+ - **Classification Analysis**: Random Forest to predict conflict risks
547
+ - **Risk Assessment**: Multi-factor evaluation of potential concerns
548
+ - **Personalized Recommendations**: Actionable advice based on all analyses
549
+
550
+ ### Key Metrics
551
+ - **Daily Usage**: Hours spent on social media per day
552
+ - **Mental Health Score**: Self-reported mental health (1-10 scale)
553
+ - **Sleep Hours**: Average sleep duration per night
554
+ - **Addiction Score**: Self-reported addiction level (1-10 scale)
555
+ - **Conflicts**: Number of conflicts related to social media use
556
+
557
+ ### Model Performance
558
+ - **Clustering**: Identifies 4 distinct behavioral clusters
559
+ - **Regression**: Predicts addiction scores with high accuracy
560
+ - **Classification**: Predicts conflict risk probability
561
+
562
+ ### Recommendations
563
+ - Set daily usage limits
564
+ - Improve sleep hygiene
565
+ - Seek mental health support when needed
566
+ - Develop healthy digital boundaries
567
+ - Work on communication skills
568
+
569
+ ### Data Source
570
+ Analysis based on comprehensive student social media usage survey data.
571
+ """)
572
 
573
+ # Launch the app
574
  if __name__ == "__main__":
575
+ import socket
576
+
577
+ def find_free_port(start_port=7860, max_attempts=10):
578
+ """Find a free port starting from start_port"""
579
+ for port in range(start_port, start_port + max_attempts):
580
+ try:
581
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
582
+ s.bind(('localhost', port))
583
+ return port
584
+ except OSError:
585
+ continue
586
+ return None
587
+
588
+ # Find an available port
589
+ port = find_free_port()
590
+ if port is None:
591
+ print("❌ Could not find an available port. Please close other applications and try again.")
592
+ exit(1)
593
+
594
+ print(f"πŸš€ Starting Comprehensive Gradio app on port {port}")
595
+ print(f"πŸ“± Local URL: http://localhost:{port}")
596
+ print(f"🌐 Public URL will be provided once the app starts")
597
+
598
  demo.launch(share=True)