BlakeL commited on
Commit
822f734
Β·
verified Β·
1 Parent(s): 33f3253

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -457
app.py CHANGED
@@ -1,517 +1,185 @@
1
 
2
- #!/usr/bin/env python3
3
- """
4
- Social Media Addiction Analysis - Gradio App
5
- A comprehensive web application for analyzing student social media usage patterns
6
- """
7
-
8
  import gradio as gr
9
  import pandas as pd
10
  import numpy as np
11
- import matplotlib.pyplot as plt
12
- import seaborn as sns
13
- from sklearn.preprocessing import StandardScaler
14
- from sklearn.cluster import KMeans
15
- from sklearn.metrics import silhouette_score
16
  import plotly.express as px
17
- import plotly.graph_objects as go
18
- from plotly.subplots import make_subplots
19
  import warnings
20
  warnings.filterwarnings('ignore')
21
 
22
- # Set style
23
- plt.style.use('seaborn-v0_8')
24
- sns.set_palette("husl")
25
-
26
- class SocialMediaAnalyzer:
27
  def __init__(self):
28
- """Initialize the analyzer with pre-trained models and data"""
29
- self.df = None
30
- self.scaler = StandardScaler()
31
- self.kmeans_model = None
32
- self.feature_names = None
33
- self.load_data()
34
- self.train_models()
35
-
36
- def load_data(self):
37
- """Load and prepare the dataset"""
38
- try:
39
- # Load the dataset
40
- self.df = pd.read_csv("data/Students Social Media Addiction.csv")
41
-
42
- # Create binary features for categorical variables
43
- self.df['Is_Female'] = (self.df['Gender'] == 'Female').astype(int)
44
- self.df['Is_Male'] = (self.df['Gender'] == 'Male').astype(int)
45
-
46
- # Academic level features
47
- self.df['Is_Undergraduate'] = (self.df['Academic_Level'] == 'Undergraduate').astype(int)
48
- self.df['Is_Graduate'] = (self.df['Academic_Level'] == 'Graduate').astype(int)
49
- self.df['Is_High_School'] = (self.df['Academic_Level'] == 'High School').astype(int)
50
-
51
- # Relationship status features
52
- self.df['Is_Single'] = (self.df['Relationship_Status'] == 'Single').astype(int)
53
- self.df['Is_In_Relationship'] = (self.df['Relationship_Status'] == 'In Relationship').astype(int)
54
- self.df['Is_Complicated'] = (self.df['Relationship_Status'] == 'Complicated').astype(int)
55
-
56
- # Academic performance
57
- self.df['Affects_Academic'] = (self.df['Affects_Academic_Performance'] == 'Yes').astype(int)
58
-
59
- # Create platform dummies (top 6 platforms)
60
- top_platforms = self.df['Most_Used_Platform'].value_counts().head(6).index
61
- for platform in top_platforms:
62
- self.df[f'Uses_{platform}'] = (self.df['Most_Used_Platform'] == platform).astype(int)
63
-
64
- # Create behavioral features
65
- self.df['High_Usage'] = (self.df['Avg_Daily_Usage_Hours'] >= 6).astype(int)
66
- self.df['Low_Sleep'] = (self.df['Sleep_Hours_Per_Night'] <= 6).astype(int)
67
- self.df['Poor_Mental_Health'] = (self.df['Mental_Health_Score'] <= 5).astype(int)
68
- self.df['High_Conflict'] = (self.df['Conflicts_Over_Social_Media'] >= 3).astype(int)
69
- self.df['High_Addiction'] = (self.df['Addicted_Score'] >= 7).astype(int)
70
-
71
- # Create interaction features
72
- self.df['Usage_Sleep_Ratio'] = self.df['Avg_Daily_Usage_Hours'] / self.df['Sleep_Hours_Per_Night']
73
- self.df['Mental_Health_Usage_Ratio'] = self.df['Mental_Health_Score'] / self.df['Avg_Daily_Usage_Hours']
74
-
75
- print("βœ… Data loaded successfully!")
76
-
77
- except Exception as e:
78
- print(f"❌ Error loading data: {e}")
79
- # Create sample data if file not found
80
- self.create_sample_data()
81
 
82
  def create_sample_data(self):
83
  """Create sample data for demonstration"""
84
  np.random.seed(42)
85
- n_samples = 1000
86
-
87
- self.df = pd.DataFrame({
88
- 'Age': np.random.randint(16, 30, n_samples),
89
- 'Gender': np.random.choice(['Male', 'Female'], n_samples),
90
- 'Academic_Level': np.random.choice(['High School', 'Undergraduate', 'Graduate'], n_samples),
91
- 'Relationship_Status': np.random.choice(['Single', 'In Relationship', 'Complicated'], n_samples),
92
- 'Most_Used_Platform': np.random.choice(['Instagram', 'TikTok', 'Facebook', 'Twitter', 'Snapchat'], n_samples),
93
- 'Avg_Daily_Usage_Hours': np.random.normal(4.5, 2, n_samples),
94
- 'Sleep_Hours_Per_Night': np.random.normal(7, 1.5, n_samples),
95
- 'Mental_Health_Score': np.random.normal(6.5, 2, n_samples),
96
- 'Conflicts_Over_Social_Media': np.random.randint(0, 6, n_samples),
97
- 'Addicted_Score': np.random.normal(5.5, 2, n_samples),
98
- 'Affects_Academic_Performance': np.random.choice(['Yes', 'No'], n_samples)
99
  })
100
-
101
- # Apply the same feature engineering
102
- self.load_data()
103
 
104
- def train_models(self):
105
- """Train clustering models"""
106
- try:
107
- # Select numerical features for clustering
108
- numerical_features = [
109
- 'Age', 'Avg_Daily_Usage_Hours', 'Sleep_Hours_Per_Night',
110
- 'Mental_Health_Score', 'Conflicts_Over_Social_Media', 'Addicted_Score',
111
- 'Is_Female', 'Is_Undergraduate', 'Is_Graduate', 'Is_High_School',
112
- 'Is_Single', 'Is_In_Relationship', 'Is_Complicated', 'Affects_Academic',
113
- 'High_Usage', 'Low_Sleep', 'Poor_Mental_Health', 'High_Conflict', 'High_Addiction',
114
- 'Usage_Sleep_Ratio', 'Mental_Health_Usage_Ratio'
115
- ]
116
-
117
- # Add platform features
118
- platform_features = [col for col in self.df.columns if col.startswith('Uses_')]
119
- numerical_features.extend(platform_features)
120
-
121
- # Filter to only include features that exist
122
- self.feature_names = [f for f in numerical_features if f in self.df.columns]
123
-
124
- # Create feature matrix
125
- X = self.df[self.feature_names].copy()
126
-
127
- # Handle missing values
128
- X = X.fillna(X.mean())
129
-
130
- # Scale features
131
- X_scaled = self.scaler.fit_transform(X)
132
-
133
- # Train K-Means model
134
- self.kmeans_model = KMeans(n_clusters=4, random_state=42, n_init=10)
135
- self.kmeans_model.fit(X_scaled)
136
-
137
- # Add cluster labels to dataframe
138
- self.df['Cluster'] = self.kmeans_model.labels_
139
-
140
- print("βœ… Models trained successfully!")
141
-
142
- except Exception as e:
143
- print(f"❌ Error training models: {e}")
144
 
145
- def analyze_individual(self, age, gender, academic_level, relationship_status,
146
- platform, daily_usage, sleep_hours, mental_health,
147
- conflicts, addiction_score, affects_academic):
148
- """Analyze an individual student's social media usage patterns"""
149
-
150
- # Create individual data point
151
- individual_data = {
152
- 'Age': age,
153
- 'Gender': gender,
154
- 'Academic_Level': academic_level,
155
- 'Relationship_Status': relationship_status,
156
- 'Most_Used_Platform': platform,
157
- 'Avg_Daily_Usage_Hours': daily_usage,
158
- 'Sleep_Hours_Per_Night': sleep_hours,
159
- 'Mental_Health_Score': mental_health,
160
- 'Conflicts_Over_Social_Media': conflicts,
161
- 'Addicted_Score': addiction_score,
162
- 'Affects_Academic_Performance': affects_academic
163
- }
164
-
165
- # Create binary features
166
- individual_data['Is_Female'] = 1 if gender == 'Female' else 0
167
- individual_data['Is_Undergraduate'] = 1 if academic_level == 'Undergraduate' else 0
168
- individual_data['Is_Graduate'] = 1 if academic_level == 'Graduate' else 0
169
- individual_data['Is_High_School'] = 1 if academic_level == 'High School' else 0
170
- individual_data['Is_Single'] = 1 if relationship_status == 'Single' else 0
171
- individual_data['Is_In_Relationship'] = 1 if relationship_status == 'In Relationship' else 0
172
- individual_data['Is_Complicated'] = 1 if relationship_status == 'Complicated' else 0
173
- individual_data['Affects_Academic'] = 1 if affects_academic == 'Yes' else 0
174
-
175
- # Platform features
176
- for platform_name in ['Instagram', 'TikTok', 'Facebook', 'Twitter', 'Snapchat']:
177
- individual_data[f'Uses_{platform_name}'] = 1 if platform == platform_name else 0
178
-
179
- # Behavioral features
180
- individual_data['High_Usage'] = 1 if daily_usage >= 6 else 0
181
- individual_data['Low_Sleep'] = 1 if sleep_hours <= 6 else 0
182
- individual_data['Poor_Mental_Health'] = 1 if mental_health <= 5 else 0
183
- individual_data['High_Conflict'] = 1 if conflicts >= 3 else 0
184
- individual_data['High_Addiction'] = 1 if addiction_score >= 7 else 0
185
-
186
- # Interaction features
187
- individual_data['Usage_Sleep_Ratio'] = daily_usage / sleep_hours if sleep_hours > 0 else 0
188
- individual_data['Mental_Health_Usage_Ratio'] = mental_health / daily_usage if daily_usage > 0 else 0
189
-
190
  # Create feature vector
191
- features = []
192
- for feature in self.feature_names:
193
- if feature in individual_data:
194
- features.append(individual_data[feature])
195
- else:
196
- features.append(0)
197
-
198
- # Scale features
199
- features_scaled = self.scaler.transform([features])
200
-
201
- # Predict cluster
202
- cluster = self.kmeans_model.predict(features_scaled)[0]
203
 
204
- # Get cluster characteristics
205
- cluster_data = self.df[self.df['Cluster'] == cluster]
206
 
207
- # Calculate risk factors
208
- risk_factors = []
209
- if daily_usage >= 6:
210
- risk_factors.append("High daily usage (β‰₯6 hours)")
211
- if sleep_hours <= 6:
212
- risk_factors.append("Low sleep (≀6 hours)")
213
- if mental_health <= 5:
214
- risk_factors.append("Poor mental health (≀5/10)")
215
- if conflicts >= 3:
216
- risk_factors.append("High social media conflicts (β‰₯3)")
217
- if addiction_score >= 7:
218
- risk_factors.append("High addiction score (β‰₯7/10)")
219
 
220
  # Generate recommendations
221
  recommendations = []
222
- if daily_usage >= 6:
223
- recommendations.append("Consider setting daily usage limits")
224
- if sleep_hours <= 6:
225
- recommendations.append("Improve sleep hygiene and reduce screen time before bed")
226
- if mental_health <= 5:
227
- recommendations.append("Consider mental health support and digital detox")
228
- if conflicts >= 3:
229
- recommendations.append("Work on communication skills and boundary setting")
230
- if addiction_score >= 7:
231
- recommendations.append("Seek professional help for digital addiction")
232
 
233
  if not recommendations:
234
- recommendations.append("Maintain healthy social media habits")
235
-
236
- # Create analysis results
237
- analysis_results = {
238
- "cluster": cluster,
239
- "cluster_size": len(cluster_data),
240
- "avg_usage_cluster": cluster_data['Avg_Daily_Usage_Hours'].mean(),
241
- "avg_mental_health_cluster": cluster_data['Mental_Health_Score'].mean(),
242
- "avg_sleep_cluster": cluster_data['Sleep_Hours_Per_Night'].mean(),
243
- "avg_addiction_cluster": cluster_data['Addicted_Score'].mean(),
244
- "risk_factors": risk_factors,
245
- "recommendations": recommendations
246
- }
247
-
248
- return analysis_results
249
-
250
- def create_dashboard_plots(self):
251
- """Create comprehensive dashboard plots"""
252
-
253
- # 1. Usage Distribution
254
- fig1 = px.histogram(self.df, x='Avg_Daily_Usage_Hours',
255
- title='Daily Social Media Usage Distribution',
256
- nbins=20, color_discrete_sequence=['#1f77b4'])
257
- fig1.update_layout(xaxis_title='Hours per Day', yaxis_title='Number of Students')
258
 
259
- # 2. Mental Health vs Usage
260
- fig2 = px.scatter(self.df, x='Avg_Daily_Usage_Hours', y='Mental_Health_Score',
261
- color='Cluster', title='Mental Health vs Daily Usage',
262
- color_discrete_sequence=px.colors.qualitative.Set1)
263
- fig2.update_layout(xaxis_title='Daily Usage (Hours)', yaxis_title='Mental Health Score')
264
-
265
- # 3. Cluster Distribution
266
- cluster_counts = self.df['Cluster'].value_counts().sort_index()
267
- fig3 = px.bar(x=cluster_counts.index, y=cluster_counts.values,
268
- title='Student Distribution by Cluster',
269
- color_discrete_sequence=['#2ca02c'])
270
- fig3.update_layout(xaxis_title='Cluster', yaxis_title='Number of Students')
271
-
272
- # 4. Platform Usage
273
- platform_counts = self.df['Most_Used_Platform'].value_counts()
274
- fig4 = px.pie(values=platform_counts.values, names=platform_counts.index,
275
- title='Most Used Social Media Platforms')
276
-
277
- # 5. Cluster Characteristics
278
- cluster_stats = self.df.groupby('Cluster').agg({
279
- 'Avg_Daily_Usage_Hours': 'mean',
280
- 'Mental_Health_Score': 'mean',
281
- 'Sleep_Hours_Per_Night': 'mean',
282
- 'Addicted_Score': 'mean'
283
- }).round(2)
284
-
285
- fig5 = px.imshow(cluster_stats.T,
286
- title='Cluster Characteristics Heatmap',
287
- color_continuous_scale='RdYlBu_r',
288
- aspect='auto')
289
- fig5.update_layout(xaxis_title='Cluster', yaxis_title='Metrics')
290
-
291
- return fig1, fig2, fig3, fig4, fig5
292
-
293
- def get_summary_stats(self):
294
- """Get summary statistics"""
295
- stats = {
296
- "total_students": len(self.df),
297
- "avg_age": self.df['Age'].mean(),
298
- "avg_daily_usage": self.df['Avg_Daily_Usage_Hours'].mean(),
299
- "avg_mental_health": self.df['Mental_Health_Score'].mean(),
300
- "avg_sleep": self.df['Sleep_Hours_Per_Night'].mean(),
301
- "avg_addiction": self.df['Addicted_Score'].mean(),
302
- "high_risk_students": len(self.df[self.df['Addicted_Score'] >= 7]),
303
- "most_used_platform": self.df['Most_Used_Platform'].mode()[0]
304
- }
305
- return stats
306
 
307
- # Initialize the analyzer
308
- analyzer = SocialMediaAnalyzer()
309
 
310
- def individual_analysis(age, gender, academic_level, relationship_status,
311
- platform, daily_usage, sleep_hours, mental_health,
312
- conflicts, addiction_score, affects_academic):
313
- """Gradio interface for individual analysis"""
314
-
315
- try:
316
- results = analyzer.analyze_individual(
317
- age, gender, academic_level, relationship_status,
318
- platform, daily_usage, sleep_hours, mental_health,
319
- conflicts, addiction_score, affects_academic
320
- )
321
-
322
- # Format the results
323
- output = f"""
324
- ## πŸ“Š Individual Analysis Results
325
 
326
- ### 🎯 Cluster Assignment
327
- **Cluster {results['cluster']}** - You belong to a group with {results['cluster_size']} similar students
328
 
329
- ### πŸ“ˆ Cluster Characteristics (Average)
330
- - **Daily Usage**: {results['avg_usage_cluster']:.1f} hours
331
- - **Mental Health Score**: {results['avg_mental_health_cluster']:.1f}/10
332
- - **Sleep Hours**: {results['avg_sleep_cluster']:.1f} hours/night
333
- - **Addiction Score**: {results['avg_addiction_cluster']:.1f}/10
334
-
335
- ### ⚠️ Risk Factors Identified
336
- """
337
-
338
- if results['risk_factors']:
339
- for factor in results['risk_factors']:
340
- output += f"- {factor}\n"
341
- else:
342
- output += "- No significant risk factors identified\n"
343
-
344
- output += "\n### πŸ’‘ Recommendations\n"
345
- for rec in results['recommendations']:
346
- output += f"- {rec}\n"
347
-
348
- return output
349
-
350
- except Exception as e:
351
- return f"❌ Error in analysis: {str(e)}"
352
-
353
- def dashboard():
354
- """Create dashboard with plots"""
355
- try:
356
- fig1, fig2, fig3, fig4, fig5 = analyzer.create_dashboard_plots()
357
- stats = analyzer.get_summary_stats()
358
-
359
- # Create summary text
360
- summary = f"""
361
- ## πŸ“Š Dataset Overview
362
-
363
- - **Total Students**: {stats['total_students']:,}
364
- - **Average Age**: {stats['avg_age']:.1f} years
365
- - **Average Daily Usage**: {stats['avg_daily_usage']:.1f} hours
366
- - **Average Mental Health Score**: {stats['avg_mental_health']:.1f}/10
367
- - **Average Sleep**: {stats['avg_sleep']:.1f} hours/night
368
- - **Average Addiction Score**: {stats['avg_addiction']:.1f}/10
369
- - **High Risk Students**: {stats['high_risk_students']} ({stats['high_risk_students']/stats['total_students']*100:.1f}%)
370
- - **Most Used Platform**: {stats['most_used_platform']}
371
  """
372
 
373
- return summary, fig1, fig2, fig3, fig4, fig5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
- except Exception as e:
376
- return f"❌ Error creating dashboard: {str(e)}", None, None, None, None, None
 
 
377
 
378
  # Create Gradio interface
379
- with gr.Blocks(title="Social Media Addiction Analysis", theme=gr.themes.Soft()) as demo:
380
 
381
- gr.Markdown("""
382
- # πŸ“± Social Media Addiction Analysis
383
-
384
- ## Overview
385
- This application analyzes student social media usage patterns to identify risk factors and provide personalized recommendations for healthy digital habits.
386
-
387
- ### Features:
388
- - **Individual Analysis**: Get personalized insights based on your social media usage
389
- - **Dashboard**: Explore overall patterns and cluster characteristics
390
- - **Risk Assessment**: Identify potential addiction and mental health concerns
391
- - **Recommendations**: Receive actionable advice for healthier social media use
392
- """)
393
 
394
  with gr.Tabs():
395
-
396
- # Individual Analysis Tab
397
  with gr.Tab("πŸ” Individual Analysis"):
398
- gr.Markdown("### Enter your social media usage information for personalized analysis")
399
-
400
  with gr.Row():
401
  with gr.Column():
402
- age = gr.Slider(minimum=16, maximum=30, value=20, step=1, label="Age")
403
- gender = gr.Radio(choices=["Male", "Female"], value="Male", label="Gender")
404
- academic_level = gr.Radio(choices=["High School", "Undergraduate", "Graduate"],
405
- value="Undergraduate", label="Academic Level")
406
- relationship_status = gr.Radio(choices=["Single", "In Relationship", "Complicated"],
407
- value="Single", label="Relationship Status")
408
-
409
- with gr.Column():
410
- platform = gr.Radio(choices=["Instagram", "TikTok", "Facebook", "Twitter", "Snapchat"],
411
- value="Instagram", label="Most Used Platform")
412
- daily_usage = gr.Slider(minimum=0, maximum=12, value=4, step=0.5,
413
- label="Average Daily Usage (Hours)")
414
- sleep_hours = gr.Slider(minimum=4, maximum=12, value=7, step=0.5,
415
- label="Sleep Hours per Night")
416
- mental_health = gr.Slider(minimum=1, maximum=10, value=7, step=1,
417
- label="Mental Health Score (1-10)")
418
 
419
  with gr.Column():
420
- conflicts = gr.Slider(minimum=0, maximum=5, value=2, step=1,
421
- label="Conflicts Over Social Media (0-5)")
422
- addiction_score = gr.Slider(minimum=1, maximum=10, value=5, step=1,
423
- label="Addiction Score (1-10)")
424
- affects_academic = gr.Radio(choices=["Yes", "No"], value="No",
425
- label="Affects Academic Performance")
426
 
427
- analyze_btn = gr.Button("πŸ” Analyze My Usage", variant="primary")
428
- analysis_output = gr.Markdown(label="Analysis Results")
429
 
430
  analyze_btn.click(
431
- fn=individual_analysis,
432
- inputs=[age, gender, academic_level, relationship_status, platform,
433
- daily_usage, sleep_hours, mental_health, conflicts, addiction_score, affects_academic],
434
- outputs=analysis_output
435
  )
436
 
437
- # Dashboard Tab
438
  with gr.Tab("πŸ“Š Dashboard"):
439
- gr.Markdown("### Explore overall patterns and cluster characteristics")
440
-
441
  dashboard_btn = gr.Button("πŸ“Š Generate Dashboard", variant="primary")
442
 
443
  with gr.Row():
444
- summary_output = gr.Markdown(label="Summary Statistics")
445
 
446
  with gr.Row():
447
- plot1 = gr.Plot(label="Usage Distribution")
448
- plot2 = gr.Plot(label="Mental Health vs Usage")
449
 
450
  with gr.Row():
451
- plot3 = gr.Plot(label="Cluster Distribution")
452
- plot4 = gr.Plot(label="Platform Usage")
453
-
454
- with gr.Row():
455
- plot5 = gr.Plot(label="Cluster Characteristics")
456
 
457
  dashboard_btn.click(
458
- fn=dashboard,
459
- outputs=[summary_output, plot1, plot2, plot3, plot4, plot5]
460
  )
461
-
462
- # About Tab
463
- with gr.Tab("ℹ️ About"):
464
- gr.Markdown("""
465
- ## About This Application
466
-
467
- ### Purpose
468
- This application helps students and educators understand social media usage patterns and identify potential addiction risks.
469
-
470
- ### Methodology
471
- - **Clustering Analysis**: Uses K-Means clustering to identify distinct user segments
472
- - **Risk Assessment**: Evaluates multiple factors including usage time, mental health, and conflicts
473
- - **Personalized Recommendations**: Provides actionable advice based on individual patterns
474
-
475
- ### Key Metrics
476
- - **Daily Usage**: Hours spent on social media per day
477
- - **Mental Health Score**: Self-reported mental health (1-10 scale)
478
- - **Sleep Hours**: Average sleep duration per night
479
- - **Addiction Score**: Self-reported addiction level (1-10 scale)
480
- - **Conflicts**: Number of conflicts related to social media use
481
-
482
- ### Recommendations
483
- - Set daily usage limits
484
- - Improve sleep hygiene
485
- - Seek mental health support when needed
486
- - Develop healthy digital boundaries
487
-
488
- ### Data Source
489
- Analysis based on student social media usage survey data.
490
- """)
491
 
492
- # Launch the app
493
  if __name__ == "__main__":
494
- import socket
495
-
496
- def find_free_port(start_port=7860, max_attempts=10):
497
- """Find a free port starting from start_port"""
498
- for port in range(start_port, start_port + max_attempts):
499
- try:
500
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
501
- s.bind(('localhost', port))
502
- return port
503
- except OSError:
504
- continue
505
- return None
506
-
507
- # Find an available port
508
- port = find_free_port()
509
- if port is None:
510
- print("❌ Could not find an available port. Please close other applications and try again.")
511
- exit(1)
512
-
513
- print(f"πŸš€ Starting Gradio app on port {port}")
514
- print(f"πŸ“± Local URL: http://localhost:{port}")
515
- print(f"🌐 Public URL will be provided once the app starts")
516
-
517
- demo.launch(share=True)
 
1
 
 
 
 
 
 
 
2
  import gradio as gr
3
  import pandas as pd
4
  import numpy as np
 
 
 
 
 
5
  import plotly.express as px
6
+ from sklearn.cluster import KMeans
7
+ from sklearn.preprocessing import StandardScaler
8
  import warnings
9
  warnings.filterwarnings('ignore')
10
 
11
+ class SimpleSocialMediaAnalyzer:
 
 
 
 
12
  def __init__(self):
13
+ self.df = self.create_sample_data()
14
+ self.train_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def create_sample_data(self):
17
  """Create sample data for demonstration"""
18
  np.random.seed(42)
19
+ n = 1000
20
+
21
+ return pd.DataFrame({
22
+ 'age': np.random.randint(16, 30, n),
23
+ 'daily_usage': np.random.normal(4.5, 2, n),
24
+ 'sleep_hours': np.random.normal(7, 1.5, n),
25
+ 'mental_health': np.random.normal(6.5, 2, n),
26
+ 'conflicts': np.random.randint(0, 6, n),
27
+ 'addiction_score': np.random.normal(5.5, 2, n),
28
+ 'gender': np.random.choice(['Male', 'Female'], n),
29
+ 'platform': np.random.choice(['Instagram', 'TikTok', 'Facebook', 'Twitter', 'Snapchat'], n)
 
 
 
30
  })
 
 
 
31
 
32
+ def train_model(self):
33
+ """Train a simple clustering model"""
34
+ # Select key features for clustering
35
+ features = ['daily_usage', 'sleep_hours', 'mental_health', 'addiction_score', 'conflicts']
36
+ X = self.df[features].fillna(self.df[features].mean())
37
+
38
+ # Scale and cluster
39
+ scaler = StandardScaler()
40
+ X_scaled = scaler.fit_transform(X)
41
+
42
+ kmeans = KMeans(n_clusters=3, random_state=42)
43
+ self.df['cluster'] = kmeans.fit_predict(X_scaled)
44
+
45
+ # Store for predictions
46
+ self.scaler = scaler
47
+ self.kmeans = kmeans
48
+ self.features = features
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ def analyze_individual(self, age, daily_usage, sleep_hours, mental_health, conflicts, addiction_score, gender, platform):
51
+ """Analyze individual user"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # Create feature vector
53
+ user_data = [daily_usage, sleep_hours, mental_health, addiction_score, conflicts]
54
+ user_scaled = self.scaler.transform([user_data])
55
+ cluster = self.kmeans.predict(user_scaled)[0]
 
 
 
 
 
 
 
 
 
56
 
57
+ # Get cluster stats
58
+ cluster_data = self.df[self.df['cluster'] == cluster]
59
 
60
+ # Identify risk factors
61
+ risks = []
62
+ if daily_usage >= 6: risks.append("High daily usage (β‰₯6 hours)")
63
+ if sleep_hours <= 6: risks.append("Low sleep (≀6 hours)")
64
+ if mental_health <= 5: risks.append("Poor mental health (≀5/10)")
65
+ if conflicts >= 3: risks.append("High conflicts (β‰₯3)")
66
+ if addiction_score >= 7: risks.append("High addiction score (β‰₯7/10)")
 
 
 
 
 
67
 
68
  # Generate recommendations
69
  recommendations = []
70
+ if daily_usage >= 6: recommendations.append("Set daily usage limits")
71
+ if sleep_hours <= 6: recommendations.append("Improve sleep hygiene")
72
+ if mental_health <= 5: recommendations.append("Consider mental health support")
73
+ if conflicts >= 3: recommendations.append("Work on communication skills")
74
+ if addiction_score >= 7: recommendations.append("Seek professional help")
 
 
 
 
 
75
 
76
  if not recommendations:
77
+ recommendations.append("Maintain healthy habits")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ # Format results
80
+ result = f"""
81
+ ## πŸ“Š Your Analysis Results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ **Cluster:** {cluster} (Similar to {len(cluster_data)} other students)
 
84
 
85
+ **Cluster Averages:**
86
+ - Daily Usage: {cluster_data['daily_usage'].mean():.1f} hours
87
+ - Mental Health: {cluster_data['mental_health'].mean():.1f}/10
88
+ - Sleep: {cluster_data['sleep_hours'].mean():.1f} hours
89
+ - Addiction Score: {cluster_data['addiction_score'].mean():.1f}/10
 
 
 
 
 
 
 
 
 
 
90
 
91
+ **Risk Factors:**
92
+ {chr(10).join(f"- {risk}" for risk in risks) if risks else "- No significant risks identified"}
93
 
94
+ **Recommendations:**
95
+ {chr(10).join(f"- {rec}" for rec in recommendations)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  """
97
 
98
+ return result
99
+
100
+ def create_dashboard(self):
101
+ """Create dashboard plots"""
102
+ # Usage distribution
103
+ fig1 = px.histogram(self.df, x='daily_usage', title='Daily Usage Distribution',
104
+ nbins=20, labels={'daily_usage': 'Hours/Day'})
105
+
106
+ # Mental health vs usage by cluster
107
+ fig2 = px.scatter(self.df, x='daily_usage', y='mental_health', color='cluster',
108
+ title='Mental Health vs Daily Usage by Cluster',
109
+ labels={'daily_usage': 'Hours/Day', 'mental_health': 'Mental Health Score'})
110
+
111
+ # Platform usage
112
+ platform_counts = self.df['platform'].value_counts()
113
+ fig3 = px.pie(values=platform_counts.values, names=platform_counts.index,
114
+ title='Most Used Platforms')
115
+
116
+ # Cluster characteristics
117
+ cluster_stats = self.df.groupby('cluster')[['daily_usage', 'mental_health', 'sleep_hours', 'addiction_score']].mean()
118
+ fig4 = px.bar(cluster_stats, title='Average Characteristics by Cluster')
119
+
120
+ # Summary stats
121
+ stats = f"""
122
+ ## πŸ“ˆ Dataset Summary
123
+ - **Total Students:** {len(self.df):,}
124
+ - **Average Daily Usage:** {self.df['daily_usage'].mean():.1f} hours
125
+ - **Average Mental Health:** {self.df['mental_health'].mean():.1f}/10
126
+ - **Average Sleep:** {self.df['sleep_hours'].mean():.1f} hours
127
+ - **High Risk Students:** {len(self.df[self.df['addiction_score'] >= 7])} ({len(self.df[self.df['addiction_score'] >= 7])/len(self.df)*100:.1f}%)
128
+ """
129
 
130
+ return stats, fig1, fig2, fig3, fig4
131
+
132
+ # Initialize analyzer
133
+ analyzer = SimpleSocialMediaAnalyzer()
134
 
135
  # Create Gradio interface
136
+ with gr.Blocks(title="Social Media Analysis - Simplified", theme=gr.themes.Soft()) as demo:
137
 
138
+ gr.Markdown("# πŸ“± Social Media Usage Analysis (Simplified)")
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  with gr.Tabs():
 
 
141
  with gr.Tab("πŸ” Individual Analysis"):
 
 
142
  with gr.Row():
143
  with gr.Column():
144
+ age = gr.Slider(16, 30, 20, label="Age")
145
+ daily_usage = gr.Slider(0, 12, 4, step=0.5, label="Daily Usage (Hours)")
146
+ sleep_hours = gr.Slider(4, 12, 7, step=0.5, label="Sleep Hours")
147
+ mental_health = gr.Slider(1, 10, 7, label="Mental Health Score (1-10)")
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  with gr.Column():
150
+ conflicts = gr.Slider(0, 5, 2, label="Social Media Conflicts (0-5)")
151
+ addiction_score = gr.Slider(1, 10, 5, label="Addiction Score (1-10)")
152
+ gender = gr.Radio(["Male", "Female"], "Male", label="Gender")
153
+ platform = gr.Radio(["Instagram", "TikTok", "Facebook", "Twitter", "Snapchat"],
154
+ "Instagram", label="Most Used Platform")
 
155
 
156
+ analyze_btn = gr.Button("πŸ” Analyze", variant="primary")
157
+ result_output = gr.Markdown()
158
 
159
  analyze_btn.click(
160
+ analyzer.analyze_individual,
161
+ [age, daily_usage, sleep_hours, mental_health, conflicts, addiction_score, gender, platform],
162
+ result_output
 
163
  )
164
 
 
165
  with gr.Tab("πŸ“Š Dashboard"):
 
 
166
  dashboard_btn = gr.Button("πŸ“Š Generate Dashboard", variant="primary")
167
 
168
  with gr.Row():
169
+ summary_text = gr.Markdown()
170
 
171
  with gr.Row():
172
+ plot1 = gr.Plot()
173
+ plot2 = gr.Plot()
174
 
175
  with gr.Row():
176
+ plot3 = gr.Plot()
177
+ plot4 = gr.Plot()
 
 
 
178
 
179
  dashboard_btn.click(
180
+ analyzer.create_dashboard,
181
+ outputs=[summary_text, plot1, plot2, plot3, plot4]
182
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
 
184
  if __name__ == "__main__":
185
+ demo.launch(share=True)