Spaces:

BlakeL
/

Social-Sphere

Runtime error

App Files Files Community

BlakeL commited on Jul 17, 2025

Commit

307db4e

verified ·

1 Parent(s): 0f45310

Update app.py

Browse files

Files changed (1) hide show

app.py +612 -547

app.py CHANGED Viewed

@@ -9,611 +9,676 @@ import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
-from sklearn.preprocessing import StandardScaler, LabelEncoder
-from sklearn.cluster import KMeans
-from sklearn.linear_model import LinearRegression, LogisticRegression
-from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
-from sklearn.metrics import silhouette_score, mean_squared_error, accuracy_score, classification_report
-import plotly.express as px
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
 import warnings
 warnings.filterwarnings('ignore')
-# Set style
 plt.style.use('seaborn-v0_8')
 sns.set_palette("husl")
-class ComprehensiveSocialMediaAnalyzer:
     def __init__(self):
-        """Initialize the analyzer with all models"""
-        self.df = None
-        self.scaler = StandardScaler()
-        self.kmeans_model = None
-        self.regression_model = None
-        self.conflicts_model = None
-        self.feature_names = None
         self.load_data()
-        self.train_all_models()
     def load_data(self):
-        """Load and prepare the dataset"""
         try:
-            # Load the dataset
-            import os
-            import glob
-            # Get current working directory
-            cwd = os.getcwd()
-            print(f"🔍 Current working directory: {cwd}")
             # Try multiple possible paths
             possible_paths = [
                 "data/Students Social Media Addiction.csv",
-                "./data/Students Social Media Addiction.csv",
                 "../data/Students Social Media Addiction.csv",
-                os.path.join(cwd, "data", "Students Social Media Addiction.csv"),
-                os.path.join(os.path.dirname(__file__), "data", "Students Social Media Addiction.csv")
             ]
-            # Also try to find any CSV file in data directory
-            data_files = glob.glob("data/*.csv")
-            print(f"🔍 Found CSV files in data/: {data_files}")
             for path in possible_paths:
-                print(f"🔍 Trying path: {path}")
-                if os.path.exists(path):
-                    try:
-                        self.df = pd.read_csv(path)
-                        print(f"✅ Data loaded from: {path}")
-                        print(f"   Shape: {self.df.shape}")
-                        print(f"   Columns: {list(self.df.columns)}")
-                        break
-                    except Exception as e:
-                        print(f"❌ Error reading {path}: {e}")
-                        continue
             else:
-                # If no file found, try to use any CSV in data directory
-                if data_files:
-                    try:
-                        self.df = pd.read_csv(data_files[0])
-                        print(f"✅ Data loaded from fallback: {data_files[0]}")
-                        print(f"   Shape: {self.df.shape}")
-                        print(f"   Columns: {list(self.df.columns)}")
-                    except Exception as e:
-                        print(f"❌ Error reading fallback file: {e}")
-                        raise FileNotFoundError("Could not load any data file")
-                else:
-                    raise FileNotFoundError("Could not find the data file in any expected location")
-            # Create binary features for categorical variables
-            self.df['Is_Female'] = (self.df['Gender'] == 'Female').astype(int)
-            self.df['Is_Male'] = (self.df['Gender'] == 'Male').astype(int)
-            # Academic level features
-            self.df['Is_Undergraduate'] = (self.df['Academic_Level'] == 'Undergraduate').astype(int)
-            self.df['Is_Graduate'] = (self.df['Academic_Level'] == 'Graduate').astype(int)
-            self.df['Is_High_School'] = (self.df['Academic_Level'] == 'High School').astype(int)
-            # Relationship status features
-            self.df['Is_Single'] = (self.df['Relationship_Status'] == 'Single').astype(int)
-            self.df['Is_In_Relationship'] = (self.df['Relationship_Status'] == 'In Relationship').astype(int)
-            self.df['Is_Complicated'] = (self.df['Relationship_Status'] == 'Complicated').astype(int)
-            # Academic performance
-            self.df['Affects_Academic'] = (self.df['Affects_Academic_Performance'] == 'Yes').astype(int)
-            # Create platform dummies (top 6 platforms)
-            top_platforms = self.df['Most_Used_Platform'].value_counts().head(6).index
-            for platform in top_platforms:
-                self.df[f'Uses_{platform}'] = (self.df['Most_Used_Platform'] == platform).astype(int)
-            # Create behavioral features
-            self.df['High_Usage'] = (self.df['Avg_Daily_Usage_Hours'] >= 6).astype(int)
-            self.df['Low_Sleep'] = (self.df['Sleep_Hours_Per_Night'] <= 6).astype(int)
-            self.df['Poor_Mental_Health'] = (self.df['Mental_Health_Score'] <= 5).astype(int)
-            self.df['High_Conflict'] = (self.df['Conflicts_Over_Social_Media'] >= 3).astype(int)
-            self.df['High_Addiction'] = (self.df['Addicted_Score'] >= 7).astype(int)
-            # Create interaction features
-            self.df['Usage_Sleep_Ratio'] = self.df['Avg_Daily_Usage_Hours'] / self.df['Sleep_Hours_Per_Night']
-            self.df['Mental_Health_Usage_Ratio'] = self.df['Mental_Health_Score'] / self.df['Avg_Daily_Usage_Hours']
-            print("✅ Data loaded successfully!")
         except Exception as e:
             print(f"❌ Error loading data: {e}")
-            # Create sample data if file not found
             self.create_sample_data()
     def create_sample_data(self):
         """Create sample data for demonstration"""
         np.random.seed(42)
-        n_samples = 1000
-        self.df = pd.DataFrame({
-            'Age': np.random.randint(16, 30, n_samples),
             'Gender': np.random.choice(['Male', 'Female'], n_samples),
-            'Academic_Level': np.random.choice(['High School', 'Undergraduate', 'Graduate'], n_samples),
             'Relationship_Status': np.random.choice(['Single', 'In Relationship', 'Complicated'], n_samples),
             'Most_Used_Platform': np.random.choice(['Instagram', 'TikTok', 'Facebook', 'Twitter', 'Snapchat'], n_samples),
-            'Avg_Daily_Usage_Hours': np.random.normal(4.5, 2, n_samples),
-            'Sleep_Hours_Per_Night': np.random.normal(7, 1.5, n_samples),
-            'Mental_Health_Score': np.random.normal(6.5, 2, n_samples),
-            'Conflicts_Over_Social_Media': np.random.randint(0, 6, n_samples),
-            'Addicted_Score': np.random.normal(5.5, 2, n_samples),
             'Affects_Academic_Performance': np.random.choice(['Yes', 'No'], n_samples)
         })
-        # Apply the same feature engineering
-        self.load_data()
-    def train_all_models(self):
-        """Train clustering, regression, and classification models"""
-        try:
-            # Select numerical features for all models
-            numerical_features = [
-                'Age', 'Avg_Daily_Usage_Hours', 'Sleep_Hours_Per_Night',
-                'Mental_Health_Score', 'Conflicts_Over_Social_Media', 'Addicted_Score',
-                'Is_Female', 'Is_Undergraduate', 'Is_Graduate', 'Is_High_School',
-                'Is_Single', 'Is_In_Relationship', 'Is_Complicated', 'Affects_Academic',
-                'High_Usage', 'Low_Sleep', 'Poor_Mental_Health', 'High_Conflict', 'High_Addiction',
-                'Usage_Sleep_Ratio', 'Mental_Health_Usage_Ratio'
-            ]
-            # Add platform features
-            platform_features = [col for col in self.df.columns if col.startswith('Uses_')]
-            numerical_features.extend(platform_features)
-            # Filter to only include features that exist
-            self.feature_names = [f for f in numerical_features if f in self.df.columns]
-            # Create feature matrix
-            X = self.df[self.feature_names].copy()
-            # Handle missing values
-            X = X.fillna(X.mean())
-            # Scale features
-            X_scaled = self.scaler.fit_transform(X)
-            # 1. Train Clustering Model (K-Means)
-            self.kmeans_model = KMeans(n_clusters=4, random_state=42, n_init=10)
-            self.df['Cluster'] = self.kmeans_model.fit_predict(X_scaled)
-            # 2. Train Regression Model (Predict Addiction Score)
-            self.regression_model = RandomForestRegressor(n_estimators=100, random_state=42)
-            self.regression_model.fit(X_scaled, self.df['Addicted_Score'])
-            # 3. Train Classification Model (Predict Conflicts)
-            # Create binary conflict target (High conflict if >= 3)
-            conflict_target = (self.df['Conflicts_Over_Social_Media'] >= 3).astype(int)
-            self.conflicts_model = RandomForestClassifier(n_estimators=100, random_state=42)
-            self.conflicts_model.fit(X_scaled, conflict_target)
-            print("✅ All models trained successfully!")
-            print(f"   - Clustering: {len(set(self.df['Cluster']))} clusters")
-            print(f"   - Regression: Addiction score prediction")
-            print(f"   - Classification: Conflict prediction")
-        except Exception as e:
-            print(f"❌ Error training models: {e}")
-    def analyze_individual(self, age, gender, academic_level, relationship_status,
-                         platform, daily_usage, sleep_hours, mental_health,
-                         conflicts, addiction_score, affects_academic):
-        """Comprehensive individual analysis"""
-        # Create individual data point
-        individual_data = {
             'Age': age,
             'Gender': gender,
             'Academic_Level': academic_level,
             'Relationship_Status': relationship_status,
             'Most_Used_Platform': platform,
             'Avg_Daily_Usage_Hours': daily_usage,
             'Sleep_Hours_Per_Night': sleep_hours,
             'Mental_Health_Score': mental_health,
             'Conflicts_Over_Social_Media': conflicts,
-            'Addicted_Score': addiction_score,
             'Affects_Academic_Performance': affects_academic
         }
-        # Create binary features
-        individual_data['Is_Female'] = 1 if gender == 'Female' else 0
-        individual_data['Is_Undergraduate'] = 1 if academic_level == 'Undergraduate' else 0
-        individual_data['Is_Graduate'] = 1 if academic_level == 'Graduate' else 0
-        individual_data['Is_High_School'] = 1 if academic_level == 'High School' else 0
-        individual_data['Is_Single'] = 1 if relationship_status == 'Single' else 0
-        individual_data['Is_In_Relationship'] = 1 if relationship_status == 'In Relationship' else 0
-        individual_data['Is_Complicated'] = 1 if relationship_status == 'Complicated' else 0
-        individual_data['Affects_Academic'] = 1 if affects_academic == 'Yes' else 0
-        # Platform features
-        for platform_name in ['Instagram', 'TikTok', 'Facebook', 'Twitter', 'Snapchat']:
-            individual_data[f'Uses_{platform_name}'] = 1 if platform == platform_name else 0
-        # Behavioral features
-        individual_data['High_Usage'] = 1 if daily_usage >= 6 else 0
-        individual_data['Low_Sleep'] = 1 if sleep_hours <= 6 else 0
-        individual_data['Poor_Mental_Health'] = 1 if mental_health <= 5 else 0
-        individual_data['High_Conflict'] = 1 if conflicts >= 3 else 0
-        individual_data['High_Addiction'] = 1 if addiction_score >= 7 else 0
-        # Interaction features
-        individual_data['Usage_Sleep_Ratio'] = daily_usage / sleep_hours if sleep_hours > 0 else 0
-        individual_data['Mental_Health_Usage_Ratio'] = mental_health / daily_usage if daily_usage > 0 else 0
-        # Create feature vector
-        features = []
-        for feature in self.feature_names:
-            if feature in individual_data:
-                features.append(individual_data[feature])
-            else:
-                features.append(0)
-        # Scale features
-        features_scaled = self.scaler.transform([features])
-        # 1. Clustering Analysis
-        cluster = self.kmeans_model.predict(features_scaled)[0]
-        cluster_data = self.df[self.df['Cluster'] == cluster]
-        # 2. Regression Analysis (Predict Addiction Score)
-        predicted_addiction = self.regression_model.predict(features_scaled)[0]
-        # 3. Classification Analysis (Predict Conflict Risk)
-        conflict_probability = self.conflicts_model.predict_proba(features_scaled)[0]
-        high_conflict_prob = conflict_probability[1]  # Probability of high conflict
-        # Calculate risk factors
-        risk_factors = []
-        if daily_usage >= 6:
-            risk_factors.append("High daily usage (≥6 hours)")
-        if sleep_hours <= 6:
-            risk_factors.append("Low sleep (≤6 hours)")
-        if mental_health <= 5:
-            risk_factors.append("Poor mental health (≤5/10)")
-        if conflicts >= 3:
-            risk_factors.append("High social media conflicts (≥3)")
-        if addiction_score >= 7:
-            risk_factors.append("High addiction score (≥7/10)")
-        # Generate recommendations
-        recommendations = []
-        if daily_usage >= 6:
-            recommendations.append("Consider setting daily usage limits")
-        if sleep_hours <= 6:
-            recommendations.append("Improve sleep hygiene and reduce screen time before bed")
-        if mental_health <= 5:
-            recommendations.append("Consider mental health support and digital detox")
-        if conflicts >= 3:
-            recommendations.append("Work on communication skills and boundary setting")
-        if addiction_score >= 7:
-            recommendations.append("Seek professional help for digital addiction")
-        if not recommendations:
-            recommendations.append("Maintain healthy social media habits")
-        # Format comprehensive results
-        output = f"""
-## 📊 Comprehensive Analysis Results
-### 🎯 Clustering Analysis
-**Cluster {cluster}** - You belong to a group with {len(cluster_data)} similar students
-**Cluster Characteristics (Average):**
-- Daily Usage: {cluster_data['Avg_Daily_Usage_Hours'].mean():.1f} hours
-- Mental Health Score: {cluster_data['Mental_Health_Score'].mean():.1f}/10
-- Sleep Hours: {cluster_data['Sleep_Hours_Per_Night'].mean():.1f} hours/night
-- Addiction Score: {cluster_data['Addicted_Score'].mean():.1f}/10
-### 📈 Regression Analysis (Addiction Prediction)
-**Your Current Addiction Score:** {addiction_score:.1f}/10
-**Predicted Addiction Score:** {predicted_addiction:.1f}/10
-**Difference:** {predicted_addiction - addiction_score:+.1f} points
-### ⚠️ Conflict Risk Analysis
-**Current Conflicts:** {conflicts}/5
-**High Conflict Risk Probability:** {high_conflict_prob:.1%}
-**Risk Level:** {'High' if high_conflict_prob > 0.6 else 'Medium' if high_conflict_prob > 0.3 else 'Low'}
-### 🚨 Risk Factors Identified
-"""
-        if risk_factors:
-            for factor in risk_factors:
-                output += f"- {factor}\n"
-        else:
-            output += "- No significant risk factors identified\n"
-        output += "\n### 💡 Personalized Recommendations\n"
-        for rec in recommendations:
-            output += f"- {rec}\n"
-        # Add model-specific recommendations
-        if predicted_addiction > addiction_score + 1:
-            output += "- Consider reducing social media usage to prevent addiction escalation\n"
-        if high_conflict_prob > 0.6:
-            output += "- Focus on improving communication and conflict resolution skills\n"
-        return output
-    def create_comprehensive_dashboard(self):
-        """Create comprehensive dashboard with all analyses"""
-        # 1. Usage Distribution
-        fig1 = px.histogram(self.df, x='Avg_Daily_Usage_Hours',
-                           title='Daily Social Media Usage Distribution',
-                           nbins=20, color_discrete_sequence=['#1f77b4'])
-        fig1.update_layout(xaxis_title='Hours per Day', yaxis_title='Number of Students')
-        # 2. Mental Health vs Usage by Cluster
-        fig2 = px.scatter(self.df, x='Avg_Daily_Usage_Hours', y='Mental_Health_Score',
-                          color='Cluster', title='Mental Health vs Daily Usage by Cluster',
-                          color_discrete_sequence=px.colors.qualitative.Set1)
-        fig2.update_layout(xaxis_title='Daily Usage (Hours)', yaxis_title='Mental Health Score')
-        # 3. Cluster Distribution
-        cluster_counts = self.df['Cluster'].value_counts().sort_index()
-        fig3 = px.bar(x=cluster_counts.index, y=cluster_counts.values,
-                      title='Student Distribution by Cluster',
-                      color_discrete_sequence=['#2ca02c'])
-        fig3.update_layout(xaxis_title='Cluster', yaxis_title='Number of Students')
-        # 4. Addiction Score Distribution
-        fig4 = px.histogram(self.df, x='Addicted_Score',
-                           title='Addiction Score Distribution',
-                           nbins=20, color_discrete_sequence=['#d62728'])
-        fig4.update_layout(xaxis_title='Addiction Score', yaxis_title='Number of Students')
-        # 5. Conflicts Analysis
-        conflict_counts = self.df['Conflicts_Over_Social_Media'].value_counts().sort_index()
-        fig5 = px.bar(x=conflict_counts.index, y=conflict_counts.values,
-                      title='Social Media Conflicts Distribution',
-                      color_discrete_sequence=['#ff7f0e'])
-        fig5.update_layout(xaxis_title='Number of Conflicts', yaxis_title='Number of Students')
-        # 6. Platform Usage
-        platform_counts = self.df['Most_Used_Platform'].value_counts()
-        fig6 = px.pie(values=platform_counts.values, names=platform_counts.index,
-                      title='Most Used Social Media Platforms')
-        # 7. Cluster Characteristics Heatmap
-        cluster_stats = self.df.groupby('Cluster').agg({
-            'Avg_Daily_Usage_Hours': 'mean',
-            'Mental_Health_Score': 'mean',
-            'Sleep_Hours_Per_Night': 'mean',
-            'Addicted_Score': 'mean',
-            'Conflicts_Over_Social_Media': 'mean'
-        }).round(2)
-        fig7 = px.imshow(cluster_stats.T,
-                         title='Cluster Characteristics Heatmap',
-                         color_continuous_scale='RdYlBu_r',
-                         aspect='auto')
-        fig7.update_layout(xaxis_title='Cluster', yaxis_title='Metrics')
-        # 8. Correlation Matrix
-        corr_features = ['Avg_Daily_Usage_Hours', 'Mental_Health_Score', 'Sleep_Hours_Per_Night',
-                        'Addicted_Score', 'Conflicts_Over_Social_Media']
-        corr_matrix = self.df[corr_features].corr()
-        fig8 = px.imshow(corr_matrix,
-                         title='Feature Correlation Matrix',
-                         color_continuous_scale='RdBu',
-                         aspect='auto')
-        fig8.update_layout(xaxis_title='Features', yaxis_title='Features')
-        return fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8
-    def get_comprehensive_stats(self):
-        """Get comprehensive summary statistics"""
-        stats = {
-            "total_students": len(self.df),
-            "avg_age": self.df['Age'].mean(),
-            "avg_daily_usage": self.df['Avg_Daily_Usage_Hours'].mean(),
-            "avg_mental_health": self.df['Mental_Health_Score'].mean(),
-            "avg_sleep": self.df['Sleep_Hours_Per_Night'].mean(),
-            "avg_addiction": self.df['Addicted_Score'].mean(),
-            "avg_conflicts": self.df['Conflicts_Over_Social_Media'].mean(),
-            "high_risk_students": len(self.df[self.df['Addicted_Score'] >= 7]),
-            "high_conflict_students": len(self.df[self.df['Conflicts_Over_Social_Media'] >= 3]),
-            "most_used_platform": self.df['Most_Used_Platform'].mode()[0],
-            "n_clusters": len(set(self.df['Cluster']))
-        }
-        return stats
-# Initialize the analyzer
-analyzer = ComprehensiveSocialMediaAnalyzer()
-def individual_analysis(age, gender, academic_level, relationship_status,
-                      platform, daily_usage, sleep_hours, mental_health,
-                      conflicts, addiction_score, affects_academic):
-    """Gradio interface for comprehensive individual analysis"""
-    try:
-        results = analyzer.analyze_individual(
-            age, gender, academic_level, relationship_status,
-            platform, daily_usage, sleep_hours, mental_health,
-            conflicts, addiction_score, affects_academic
-        )
-        return results
-    except Exception as e:
-        return f"❌ Error in analysis: {str(e)}"
-def comprehensive_dashboard():
-    """Create comprehensive dashboard with all analyses"""
-    try:
-        fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8 = analyzer.create_comprehensive_dashboard()
-        stats = analyzer.get_comprehensive_stats()
-        # Create comprehensive summary text
-        summary = f"""
-## 📊 Comprehensive Dataset Overview
-### 📈 Basic Statistics
-- **Total Students**: {stats['total_students']:,}
-- **Average Age**: {stats['avg_age']:.1f} years
-- **Average Daily Usage**: {stats['avg_daily_usage']:.1f} hours
-- **Average Mental Health Score**: {stats['avg_mental_health']:.1f}/10
-- **Average Sleep**: {stats['avg_sleep']:.1f} hours/night
-- **Average Addiction Score**: {stats['avg_addiction']:.1f}/10
-- **Average Conflicts**: {stats['avg_conflicts']:.1f}/5
-### ⚠️ Risk Assessment
-- **High Risk Students (Addiction ≥7)**: {stats['high_risk_students']} ({stats['high_risk_students']/stats['total_students']*100:.1f}%)
-- **High Conflict Students (≥3)**: {stats['high_conflict_students']} ({stats['high_conflict_students']/stats['total_students']*100:.1f}%)
-### 🎯 Analysis Results
-- **Clusters Identified**: {stats['n_clusters']}
-- **Most Used Platform**: {stats['most_used_platform']}
-### 📊 Model Performance
-- **Clustering**: K-Means with {stats['n_clusters']} clusters
-- **Regression**: Random Forest for addiction score prediction
-- **Classification**: Random Forest for conflict risk prediction
-        """
-        return summary, fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8
-    except Exception as e:
-        return f"❌ Error creating dashboard: {str(e)}", None, None, None, None, None, None, None, None
-# Create Gradio interface
-with gr.Blocks(title="Social Media Addiction Analysis - Comprehensive", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("""
-    # 📱 Social Media Addiction Analysis - Comprehensive
-    ## Overview
-    This application provides comprehensive analysis of student social media usage patterns including:
-    - **Clustering Analysis**: Identify behavioral segments
-    - **Regression Analysis**: Predict addiction scores
-    - **Classification Analysis**: Predict conflict risks
-    - **Risk Assessment**: Identify potential concerns
-    - **Personalized Recommendations**: Actionable advice
-    ### Features:
-    - **Individual Analysis**: Get personalized insights with all three model types
-    - **Comprehensive Dashboard**: Explore patterns across all analyses
-    - **Risk Assessment**: Multi-factor evaluation
-    - **Predictive Analytics**: ML-powered predictions
-    """)
-    with gr.Tabs():
-        # Individual Analysis Tab
-        with gr.Tab("🔍 Individual Analysis"):
-            gr.Markdown("### Enter your social media usage information for comprehensive analysis")
-            with gr.Row():
-                with gr.Column():
-                    age = gr.Slider(minimum=16, maximum=30, value=20, step=1, label="Age")
-                    gender = gr.Radio(choices=["Male", "Female"], value="Male", label="Gender")
-                    academic_level = gr.Radio(choices=["High School", "Undergraduate", "Graduate"],
-                                           value="Undergraduate", label="Academic Level")
-                    relationship_status = gr.Radio(choices=["Single", "In Relationship", "Complicated"],
-                                                value="Single", label="Relationship Status")
-                with gr.Column():
-                    platform = gr.Radio(choices=["Instagram", "TikTok", "Facebook", "Twitter", "Snapchat"],
-                                      value="Instagram", label="Most Used Platform")
-                    daily_usage = gr.Slider(minimum=0, maximum=12, value=4, step=0.5,
-                                          label="Average Daily Usage (Hours)")
-                    sleep_hours = gr.Slider(minimum=4, maximum=12, value=7, step=0.5,
-                                          label="Sleep Hours per Night")
-                    mental_health = gr.Slider(minimum=1, maximum=10, value=7, step=1,
-                                            label="Mental Health Score (1-10)")
-                with gr.Column():
-                    conflicts = gr.Slider(minimum=0, maximum=5, value=2, step=1,
-                                        label="Conflicts Over Social Media (0-5)")
-                    addiction_score = gr.Slider(minimum=1, maximum=10, value=5, step=1,
-                                              label="Addiction Score (1-10)")
-                    affects_academic = gr.Radio(choices=["Yes", "No"], value="No",
-                                              label="Affects Academic Performance")
-            analyze_btn = gr.Button("🔍 Analyze My Usage", variant="primary")
-            analysis_output = gr.Markdown(label="Comprehensive Analysis Results")
-            analyze_btn.click(
-                fn=individual_analysis,
-                inputs=[age, gender, academic_level, relationship_status, platform,
-                       daily_usage, sleep_hours, mental_health, conflicts, addiction_score, affects_academic],
-                outputs=analysis_output
-            )
-        # Comprehensive Dashboard Tab
-        with gr.Tab("📊 Comprehensive Dashboard"):
-            gr.Markdown("### Explore comprehensive patterns and all analyses")
-            dashboard_btn = gr.Button("📊 Generate Comprehensive Dashboard", variant="primary")
-            with gr.Row():
-                summary_output = gr.Markdown(label="Comprehensive Summary Statistics")
-            with gr.Row():
-                plot1 = gr.Plot(label="Usage Distribution")
-                plot2 = gr.Plot(label="Mental Health vs Usage by Cluster")
-            with gr.Row():
-                plot3 = gr.Plot(label="Cluster Distribution")
-                plot4 = gr.Plot(label="Addiction Score Distribution")
-            with gr.Row():
-                plot5 = gr.Plot(label="Conflicts Distribution")
-                plot6 = gr.Plot(label="Platform Usage")
-            with gr.Row():
-                plot7 = gr.Plot(label="Cluster Characteristics Heatmap")
-                plot8 = gr.Plot(label="Feature Correlation Matrix")
-            dashboard_btn.click(
-                fn=comprehensive_dashboard,
-                outputs=[summary_output, plot1, plot2, plot3, plot4, plot5, plot6, plot7, plot8]
-            )
-        # About Tab
-        with gr.Tab("ℹ️ About"):
-            gr.Markdown("""
-            ## About This Comprehensive Application
-            ### Purpose
-            This application provides comprehensive analysis of student social media usage patterns using multiple machine learning approaches.
-            ### Methodology
-            - **Clustering Analysis**: K-Means clustering to identify distinct behavioral segments
-            - **Regression Analysis**: Random Forest to predict addiction scores
-            - **Classification Analysis**: Random Forest to predict conflict risks
-            - **Risk Assessment**: Multi-factor evaluation of potential concerns
-            - **Personalized Recommendations**: Actionable advice based on all analyses
-            ### Key Metrics
-            - **Daily Usage**: Hours spent on social media per day
-            - **Mental Health Score**: Self-reported mental health (1-10 scale)
-            - **Sleep Hours**: Average sleep duration per night
-            - **Addiction Score**: Self-reported addiction level (1-10 scale)
-            - **Conflicts**: Number of conflicts related to social media use
-            ### Model Performance
-            - **Clustering**: Identifies 4 distinct behavioral clusters
-            - **Regression**: Predicts addiction scores with high accuracy
-            - **Classification**: Predicts conflict risk probability
-            ### Recommendations
-            - Set daily usage limits
-            - Improve sleep hygiene
-            - Seek mental health support when needed
-            - Develop healthy digital boundaries
-            - Work on communication skills
-            ### Data Source
-            Analysis based on comprehensive student social media usage survey data.
-            """)
-# Launch the app
-if __name__ == "__main__":
-    demo.launch(share=True)

 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
+from pathlib import Path
 import warnings
+import io
+import base64
 warnings.filterwarnings('ignore')
+import sys
+sys.path.append('src')
+from social_sphere_llm.unified_prediction_service import UnifiedSocialMediaPredictionService
+from info import SocialSphereInfo
+from graphs import create_conflict_pie_chart, create_addiction_score_chart, create_addiction_gauge_chart, create_clustering_charts
+# Set style for plots
 plt.style.use('seaborn-v0_8')
 sns.set_palette("husl")
+class SocialMediaAnalyzer:
     def __init__(self):
+        self.data = None
         self.load_data()
+        self.unified_service = UnifiedSocialMediaPredictionService()
+        self.info = SocialSphereInfo()
     def load_data(self):
+        """Load the dataset with fallback options"""
         try:
             # Try multiple possible paths
             possible_paths = [
                 "data/Students Social Media Addiction.csv",
+                "data/cleaned_data.csv",
                 "../data/Students Social Media Addiction.csv",
+                "../data/cleaned_data.csv"
             ]
             for path in possible_paths:
+                if Path(path).exists():
+                    self.data = pd.read_csv(path)
+                    print(f"✅ Data loaded from: {path}")
+                    break
             else:
+                # Create sample data if file not found
+                print("⚠️ Data file not found, creating sample data...")
+                self.create_sample_data()
         except Exception as e:
             print(f"❌ Error loading data: {e}")
             self.create_sample_data()
     def create_sample_data(self):
         """Create sample data for demonstration"""
         np.random.seed(42)
+        n_samples = 100
+        self.data = pd.DataFrame({
+            'Age': np.random.randint(18, 25, n_samples),
             'Gender': np.random.choice(['Male', 'Female'], n_samples),
+            'Academic_Level': np.random.choice(['Undergraduate', 'Graduate', 'High School'], n_samples),
             'Relationship_Status': np.random.choice(['Single', 'In Relationship', 'Complicated'], n_samples),
+            'Country': np.random.choice(['USA', 'UK', 'Canada', 'Australia'], n_samples),
             'Most_Used_Platform': np.random.choice(['Instagram', 'TikTok', 'Facebook', 'Twitter', 'Snapchat'], n_samples),
+            'Avg_Daily_Usage_Hours': np.random.uniform(1, 12, n_samples),
+            'Sleep_Hours_Per_Night': np.random.uniform(4, 10, n_samples),
+            'Mental_Health_Score': np.random.uniform(1, 10, n_samples),
+            'Conflicts_Over_Social_Media': np.random.randint(0, 5, n_samples),
+            'Addicted_Score': np.random.uniform(1, 10, n_samples),
             'Affects_Academic_Performance': np.random.choice(['Yes', 'No'], n_samples)
         })
+        print("✅ Sample data created successfully!")
+    def create_conflict_pie_chart(self, result):
+        """Create a pie chart for conflict prediction results"""
+        # Create the pie chart
+        fig, ax = plt.subplots(figsize=(3, 2))
+        # Define colors and labels
+        if result['conflict_level'] == 'High Risk':
+            colors = ['#ff6b6b', '#4ecdc4']  # Red for High Risk, Green for Low Risk
+            sizes = [result['confidence'], 1 - result['confidence']]
+            labels = ['High Risk', 'Low Risk']
+        else:
+            colors = ['#4ecdc4', '#ff6b6b']  # Green for Low Risk, Red for High Risk
+            sizes = [result['confidence'], 1 - result['confidence']]
+            labels = ['Low Risk', 'High Risk']
+        # Create pie chart
+        wedges, texts, autotexts = ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%',
+                                          startangle=90, explode=(0.1, 0))
+        # Customize the chart
+        ax.set_title(f'Conflict Risk Prediction\nConfidence: {result["confidence"]:.1%}',
+                    fontsize=14, fontweight='bold', pad=20)
+        # Make the chart more visually appealing
+        for autotext in autotexts:
+            autotext.set_color('white')
+            autotext.set_fontweight('bold')
+        # Add a legend
+        ax.legend(wedges, labels, title="Risk Levels", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))
+        plt.tight_layout()
+        # Convert plot to base64 string for embedding in markdown
+        img_buffer = io.BytesIO()
+        plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
+        img_buffer.seek(0)
+        img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
+        plt.close()
+        return f"data:image/png;base64,{img_base64}"
+    def create_addiction_score_chart(self, result):
+        """Create a histogram with prediction line for addiction score results"""
+        # Create the figure
+        fig, ax = plt.subplots(figsize=(10, 6))
+        # Generate sample distribution for context (if we have data)
+        if self.data is not None and 'Addicted_Score' in self.data.columns:
+            # Use actual data distribution
+            scores = self.data['Addicted_Score'].dropna()
+        else:
+            # Create a realistic distribution
+            np.random.seed(42)
+            scores = np.random.normal(5.5, 1.5, 1000)
+            scores = np.clip(scores, 1, 10)  # Clip to valid range
+        # Create histogram
+        n, bins, patches = ax.hist(scores, bins=20, alpha=0.7, color='#4ecdc4',
+                                  edgecolor='black', linewidth=0.5)
+        # Add prediction line
+        predicted_score = result['predicted_score']
+        ax.axvline(x=predicted_score, color='#ff6b6b', linewidth=3,
+                   label=f'Your Prediction: {predicted_score:.2f}')
+        # Add confidence interval if available
+        if 'confidence' in result:
+            confidence = result['confidence']
+            # Add a shaded area around the prediction
+            ax.axvspan(predicted_score - 0.5, predicted_score + 0.5,
+                      alpha=0.3, color='#ff6b6b',
+                      label=f'Confidence: {confidence:.2f}')
+        # Customize the chart
+        ax.set_title('Addiction Score Distribution with Your Prediction',
+                    fontsize=16, fontweight='bold', pad=20)
+        ax.set_xlabel('Addiction Score (1-10)', fontsize=12, fontweight='bold')
+        ax.set_ylabel('Frequency', fontsize=12, fontweight='bold')
+        # Add addiction level zones
+        ax.axvspan(1, 3, alpha=0.2, color='green', label='Low Addiction (1-3)')
+        ax.axvspan(3, 7, alpha=0.2, color='orange', label='Moderate Addiction (3-7)')
+        ax.axvspan(7, 10, alpha=0.2, color='red', label='High Addiction (7-10)')
+        # Add legend
+        ax.legend(loc='upper right', fontsize=10)
+        # Add grid
+        ax.grid(True, alpha=0.3)
+        # Set x-axis limits
+        ax.set_xlim(0, 10)
+        plt.tight_layout()
+        # Convert plot to base64 string for embedding in markdown
+        img_buffer = io.BytesIO()
+        plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
+        img_buffer.seek(0)
+        img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
+        plt.close()
+        return f"data:image/png;base64,{img_base64}"
+    def create_addiction_gauge_chart(self, result):
+        """Create a gauge chart for addiction score results"""
+        # Create the figure
+        fig, ax = plt.subplots(figsize=(3, 2), subplot_kw={'projection': 'polar'})
+        # Get the predicted score
+        predicted_score = result['predicted_score']
+        # Convert score to angle (0-180 degrees, where 0 is low addiction, 180 is high)
+        # Map 1-10 score to 0-180 degrees
+        angle = (predicted_score - 1) * 20  # 20 degrees per unit (180/9)
+        # Create the gauge
+        # Background circle (full range)
+        theta = np.linspace(0, np.pi, 100)
+        ax.plot(theta, [1]*100, 'k-', linewidth=3)
+        # Color zones
+        # Low addiction (1-3): Green
+        low_angle = np.linspace(0, 2*20*np.pi/180, 50)
+        ax.fill_between(low_angle, 0, 1, alpha=0.3, color='green', label='Low (1-3)')
+        # Moderate addiction (3-7): Orange
+        mod_angle = np.linspace(2*20*np.pi/180, 6*20*np.pi/180, 50)
+        ax.fill_between(mod_angle, 0, 1, alpha=0.3, color='orange', label='Moderate (3-7)')
+        # High addiction (7-10): Red
+        high_angle = np.linspace(6*20*np.pi/180, np.pi, 50)
+        ax.fill_between(high_angle, 0, 1, alpha=0.3, color='red', label='High (7-10)')
+        # Add the needle
+        needle_angle = angle * np.pi / 180
+        ax.plot([needle_angle, needle_angle], [0, 1.2], 'k-', linewidth=4, label=f'Your Score: {predicted_score:.1f}')
+        # Add a circle at the needle tip
+        ax.plot(needle_angle, 1.2, 'ko', markersize=10, markeredgecolor='white', markeredgewidth=2)
+        # Customize the chart
+        ax.set_title(f'Addiction Score Gauge\nPredicted: {predicted_score:.1f}/10',
+                    fontsize=14, fontweight='bold', pad=20)
+        # Remove axis labels and ticks
+        ax.set_xticks([])
+        ax.set_yticks([])
+        ax.set_ylim(0, 1.3)
+        # Add text labels
+        ax.text(0, 1.4, 'Low\n(1-3)', ha='center', va='center', fontsize=10, fontweight='bold')
+        ax.text(np.pi/2, 1.4, 'Moderate\n(3-7)', ha='center', va='center', fontsize=10, fontweight='bold')
+        ax.text(np.pi, 1.4, 'High\n(7-10)', ha='center', va='center', fontsize=10, fontweight='bold')
+        # Add confidence if available
+        if 'confidence' in result:
+            confidence = result['confidence']
+            ax.text(0, -0.3, f'Confidence: {confidence:.2f}', ha='center', va='center',
+                   fontsize=10, fontweight='bold', bbox=dict(boxstyle="round,pad=0.3", facecolor="lightblue"))
+        plt.tight_layout()
+        # Convert plot to base64 string for embedding in markdown
+        img_buffer = io.BytesIO()
+        plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
+        img_buffer.seek(0)
+        img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
+        plt.close()
+        return f"data:image/png;base64,{img_base64}"
+    def create_clustering_charts(self, result):
+        """Create visualization charts for clustering results"""
+        # Create the figure with subplots
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
+        # Chart 1: Elbow Method for Optimal K
+        k_values = range(1, 11)
+        inertias = [150, 120, 85, 65, 55, 50, 47, 45, 43, 42]  # Example inertias
+        ax1.plot(k_values, inertias, 'bo-', linewidth=2, markersize=8)
+        ax1.set_xlabel('Number of Clusters (k)', fontweight='bold')
+        ax1.set_ylabel('Inertia', fontweight='bold')
+        ax1.set_title('Elbow Method: Optimal K Selection', fontsize=12, fontweight='bold')
+        ax1.grid(True, alpha=0.3)
+        # Highlight the optimal k (usually around 3-5)
+        optimal_k = 3
+        ax1.axvline(x=optimal_k, color='red', linestyle='--', alpha=0.7, label=f'Optimal k = {optimal_k}')
+        ax1.legend()
+        # Chart 2: Cluster Scatter Plot
+        # Generate sample data for visualization
+        np.random.seed(42)
+        n_samples = 200
+        # Create clusters with different centers for Sleep vs Age
+        cluster_centers = np.array([[7, 20], [6, 22], [5, 21]])  # Sleep hours vs Age
+        cluster_sizes = [60, 80, 60]
+        data = []
+        colors = ['#4ecdc4', '#ffd93d', '#ff6b6b']
+        labels = ['Low Risk', 'Moderate Risk', 'High Risk']
+        for i, (center, size, color, label) in enumerate(zip(cluster_centers, cluster_sizes, colors, labels)):
+            cluster_data = np.random.normal(center, 0.8, (size, 2))
+            data.append(cluster_data)
+            # Plot each cluster
+            ax2.scatter(cluster_data[:, 0], cluster_data[:, 1], c=color,
+                       alpha=0.7, s=50, label=label)
+        # Highlight the user's cluster
+        user_cluster_idx = 0 if 'Low' in result['risk_level'] else (1 if 'Moderate' in result['risk_level'] else 2)
+        user_data = data[user_cluster_idx]
+        ax2.scatter(user_data[:, 0], user_data[:, 1], c=colors[user_cluster_idx],
+                   alpha=1.0, s=100, edgecolors='black', linewidth=2,
+                   label=f'Your Cluster: {labels[user_cluster_idx]}')
+        ax2.set_xlabel('Sleep Hours per Night', fontweight='bold')
+        ax2.set_ylabel('Age', fontweight='bold')
+        ax2.set_title(f'Cluster Analysis: Sleep vs Age (k={optimal_k})\nYour Cluster: {result["cluster_label"]}',
+                     fontsize=12, fontweight='bold')
+        ax2.legend()
+        ax2.grid(True, alpha=0.3)
+        plt.tight_layout()
+        # Convert plot to base64 string for embedding in markdown
+        img_buffer = io.BytesIO()
+        plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
+        img_buffer.seek(0)
+        img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
+        plt.close()
+        return f"data:image/png;base64,{img_base64}"
+    def get_clustering_assignments(self):
+        """Return DataFrame with Sleep, Age, and cluster assignments for all data."""
+        if self.data is None or self.unified_service.clustering_model is None or self.unified_service.clustering_scaler is None:
+            return None
+        # Build feature matrix for all rows
+        feature_names = self.unified_service.feature_names.get('clustering', [])
+        df = self.data.copy()
+        # Build features as in predict_cluster
+        def build_features(row):
+            features = {}
+            features['Age'] = float(row.get('Age', 0))
+            features['Avg_Daily_Usage_Hours'] = float(row.get('Avg_Daily_Usage_Hours', 0))
+            features['Sleep_Hours_Per_Night'] = float(row.get('Sleep_Hours_Per_Night', 0))
+            features['Mental_Health_Score'] = float(row.get('Mental_Health_Score', 0))
+            features['Conflicts_Over_Social_Media'] = float(row.get('Conflicts_Over_Social_Media', 0))
+            features['Addicted_Score'] = float(row.get('Addicted_Score', 0))
+            # Gender
+            gender = str(row.get('Gender', '')).lower()
+            features['Is_Female'] = 1 if gender in ['female', 'f'] else 0
+            # Academic Level
+            level = str(row.get('Academic_Level', '')).lower()
+            features['Is_Undergraduate'] = 1 if 'undergraduate' in level else 0
+            features['Is_Graduate'] = 1 if 'graduate' in level else 0
+            features['Is_High_School'] = 1 if 'high school' in level else 0
+            # Behavioral
+            features['High_Usage'] = 1 if features['Avg_Daily_Usage_Hours'] >= 6 else 0
+            features['Low_Sleep'] = 1 if features['Sleep_Hours_Per_Night'] <= 6 else 0
+            features['Poor_Mental_Health'] = 1 if features['Mental_Health_Score'] <= 5 else 0
+            features['High_Conflict'] = 1 if features['Conflicts_Over_Social_Media'] >= 3 else 0
+            features['High_Addiction'] = 1 if features['Addicted_Score'] >= 7 else 0
+            # Interactions
+            features['Usage_Sleep_Ratio'] = features['Avg_Daily_Usage_Hours'] / features['Sleep_Hours_Per_Night'] if features['Sleep_Hours_Per_Night'] else 0
+            features['Mental_Health_Usage_Ratio'] = features['Mental_Health_Score'] / features['Avg_Daily_Usage_Hours'] if features['Avg_Daily_Usage_Hours'] else 0
+            return [features.get(f, 0) for f in feature_names]
+        X = np.array([build_features(row) for _, row in df.iterrows()])
+        X_scaled = self.unified_service.clustering_scaler.transform(X)
+        clusters = self.unified_service.clustering_model.predict(X_scaled)
+        df = df.copy()
+        df['cluster'] = clusters
+        return df[['Sleep_Hours_Per_Night', 'Age', 'cluster']]
+    def classification_task(self, age, gender, academic_level, relationship_status,
+                          country, platform, daily_usage, sleep_hours, mental_health,
+                          conflicts, addicted_score, affects_academic):
+        """Classification task interface (now uses real ML pipeline)"""
+        # Prepare input dict for unified pipeline
+        input_data = {
             'Age': age,
             'Gender': gender,
             'Academic_Level': academic_level,
             'Relationship_Status': relationship_status,
+            'Country': country,
             'Most_Used_Platform': platform,
             'Avg_Daily_Usage_Hours': daily_usage,
             'Sleep_Hours_Per_Night': sleep_hours,
             'Mental_Health_Score': mental_health,
             'Conflicts_Over_Social_Media': conflicts,
+            'Addicted_Score': addicted_score,
             'Affects_Academic_Performance': affects_academic
         }
+        result = self.unified_service.predict_conflicts(input_data)
+        if 'error' in result:
+            return f"❌ Error: {result['error']}"
+        # Create the pie chart
+        pie_chart_img = create_conflict_pie_chart(result)
+        return f"""
+# 🔍 Classification Task: Conflict Risk Prediction
+## 📊 Prediction Results
+**Predicted Conflict Level:** {result['conflict_level']}
+**Confidence:** {result['confidence']:.2f}
+**Recommendation:** {result['recommendation']}
+## 📈 Visual Risk Assessment
+![Conflict Risk Prediction]({pie_chart_img})
+## 📋 What This Means
+- **Low Risk (0)**: Predicted to have ≤3 conflicts over social media
+- **High Risk (1)**: Predicted to have >3 conflicts over social media
+- **Confidence**: How certain the model is about this prediction
+"""
+    def regression_task(self, age, gender, academic_level, relationship_status,
+                       country, platform, daily_usage, sleep_hours, mental_health,
+                       conflicts, affects_academic):
+        """Regression task interface (now uses real ML pipeline)"""
+        input_data = {
+            'Age': age,
+            'Gender': gender,
+            'Academic_Level': academic_level,
+            'Relationship_Status': relationship_status,
+            'Country': country,
+            'Most_Used_Platform': platform,
+            'Avg_Daily_Usage_Hours': daily_usage,
+            'Sleep_Hours_Per_Night': sleep_hours,
+            'Mental_Health_Score': mental_health,
+            'Conflicts_Over_Social_Media': conflicts,
+            'Affects_Academic_Performance': affects_academic
+        }
+        result = self.unified_service.predict_addicted_score(input_data)
+        if 'error' in result:
+            return f"❌ Error: {result['error']}"
+        # Create only the gauge chart
+        gauge_img = create_addiction_gauge_chart(result)
+        return f"""
+# 📊 Regression Task: Addiction Score Prediction
+## 📊 Prediction Results
+**Predicted Addiction Score:** {result['predicted_score']:.2f}
+**Addiction Level:** {result['addiction_level']}
+**Confidence:** {result['confidence']:.2f}
+## 📈 Visual Addiction Score Analysis
+![Addiction Score Gauge]({gauge_img})
+## 📋 What This Means
+- **Low Addiction (1-3)**: Minimal social media dependency
+- **Moderate Addiction (3-7)**: Some dependency with room for improvement
+- **High Addiction (7-10)**: Significant dependency requiring attention
+- **Gauge Chart**: Intuitive visual representation of your addiction level
+- **Confidence**: How certain the model is about this prediction
+"""
+    def clustering_task(self, age, gender, academic_level, relationship_status,
+                       country, platform, daily_usage, sleep_hours, mental_health,
+                       conflicts, addicted_score, affects_academic):
+        """Clustering task interface (now uses real ML pipeline)"""
+        input_data = {
+            'Age': age,
+            'Gender': gender,
+            'Academic_Level': academic_level,
+            'Relationship_Status': relationship_status,
+            'Country': country,
+            'Most_Used_Platform': platform,
+            'Avg_Daily_Usage_Hours': daily_usage,
+            'Sleep_Hours_Per_Night': sleep_hours,
+            'Mental_Health_Score': mental_health,
+            'Conflicts_Over_Social_Media': conflicts,
+            'Addicted_Score': addicted_score,
+            'Affects_Academic_Performance': affects_academic
+        }
+        result = self.unified_service.predict_cluster(input_data)
+        if 'error' in result:
+            return f"❌ Error: {result['error']}"
+        # Get real clustering assignments for all data
+        cluster_df = self.get_clustering_assignments()
+        # Get user's point and cluster
+        user_sleep = input_data.get('Sleep_Hours_Per_Night', None)
+        user_age = input_data.get('Age', None)
+        user_cluster = result.get('cluster_id', None)
+        cluster_labels_map = self.unified_service.cluster_labels if self.unified_service.cluster_labels else {0: 'Cluster 0', 1: 'Cluster 1', 2: 'Cluster 2'}
+        # Create the clustering charts using real data
+        charts_img = create_clustering_charts(result, cluster_df, user_sleep, user_age, user_cluster, cluster_labels_map)
+        return f"""
+# 🎯 Clustering Task: Behavioral Pattern Analysis
+## 📊 Prediction Results
+**Cluster Label:** {result['cluster_label']}
+**Risk Level:** {result['risk_level']}
+**Recommendation:** {result['recommendation']}
+**Confidence:** {result['confidence']:.2f}
+## 📈 Visual Analysis
+![Cluster Analysis]({charts_img})
+## 📋 What This Means
+- **Elbow Method**: Shows how the optimal number of clusters (k=3) was determined
+- **Cluster Scatter Plot**: Displays how users are grouped based on behavioral patterns
+- **Your Position**: Highlighted point shows where you fall in the cluster analysis
+- **Risk Assessment**: Identifies your overall risk level based on cluster membership
+- **Confidence**: How certain the model is about this classification
+"""
+def create_interface():
+    """Create the Gradio interface"""
+    analyzer = SocialMediaAnalyzer()
+    with gr.Blocks(title="Social Sphere - Social Media Addiction Analysis", theme=gr.themes.Soft(primary_hue="purple")) as app:
+        gr.Markdown("# 📱 Social Sphere")
+        gr.Markdown("### Interactive machine learning-powered platform for social media impact analysis")
+        with gr.Row():
+            # Left side - Main Menu
+            with gr.Column(scale=1):
+                gr.Markdown("## 🎯 Main Menu")
+                task_choice = gr.Dropdown(
+                    choices=[
+                        "About App",
+                        "Classification Task (Predict High/Low Conflict Risk)",
+                        "Regression Task",
+                        "Clustering Task",
+                        "Disclaimer",
+                        "Dataset Citation"
+                    ],
+                    label="Select Analysis Task",
+                    value="About App"
+                )
+            # Right side - Content area
+            with gr.Column(scale=3):
+                output_area = gr.Markdown(value=analyzer.info.about_app(), label="Analysis Results")
+                # Input form for ML tasks (initially hidden)
+                input_container = gr.Column(visible=False)
+                with input_container:
+                    gr.Markdown("## 📋 Input Parameters")
+                    with gr.Row():
+                        age = gr.Slider(minimum=16, maximum=30, value=20, step=1, label="Age", scale=1)
+                        gender = gr.Radio(choices=["Male", "Female"], value="Male", label="Gender", scale=1)
+                    with gr.Row():
+                        academic_level = gr.Dropdown(
+                            choices=["High School", "Undergraduate", "Graduate"],
+                            value="Undergraduate",
+                            label="Academic Level",
+                            scale=1
+                        )
+                        relationship_status = gr.Dropdown(
+                            choices=["Single", "In Relationship", "Complicated"],
+                            value="Single",
+                            label="Relationship Status",
+                            scale=1
+                        )
+                    with gr.Row():
+                        country = gr.Dropdown(
+                            choices=["USA", "UK", "Canada", "Australia", "Other"],
+                            value="USA",
+                            label="Country",
+                            scale=1
+                        )
+                        platform = gr.Dropdown(
+                            choices=["Instagram", "TikTok", "Facebook", "Twitter", "Snapchat", "YouTube"],
+                            value="Instagram",
+                            label="Most Used Platform",
+                            scale=1
+                        )
+                    with gr.Row():
+                        daily_usage = gr.Slider(minimum=0, maximum=24, value=4, step=0.5, label="Daily Usage (hours)", scale=1)
+                        sleep_hours = gr.Slider(minimum=0, maximum=12, value=7, step=0.5, label="Sleep Hours", scale=1)
+                    with gr.Row():
+                        mental_health = gr.Slider(minimum=1, maximum=10, value=7, step=1, label="Mental Health Score (1-10)", scale=1)
+                        conflicts = gr.Slider(minimum=0, maximum=5, value=1, step=1, label="Conflicts Over Social Media", visible=True, scale=1)
+                    with gr.Row():
+                        addicted_score = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Addiction Score (1-10)", scale=1)
+                        affects_academic = gr.Radio(choices=["Yes", "No"], value="No", label="Affects Academic Performance", scale=1)
+                    # Predict button
+                    predict_btn = gr.Button("🚀 Run Prediction", variant="primary", size="lg")
+        # Function to handle task selection (for non-ML tasks)
+        def handle_task_selection(task):
+            if task == "About App":
+                return analyzer.info.about_app(), gr.update(visible=False)
+            elif task == "Disclaimer":
+                return analyzer.info.disclaimer(), gr.update(visible=False)
+            elif task == "Dataset Citation":
+                return analyzer.info.dataset_citation(), gr.update(visible=False)
+            else:
+                return "Select a task and click 'Run Prediction' to get results.", gr.update(visible=True)
+        # Function to handle predictions
+        def handle_prediction(task, age, gender, academic_level, relationship_status,
+                            country, platform, daily_usage, sleep_hours, mental_health,
+                            conflicts, addicted_score, affects_academic):
+            if task == "Classification Task (Predict High/Low Conflict Risk)":
+                return analyzer.classification_task(age, gender, academic_level, relationship_status,
+                                                 country, platform, daily_usage, sleep_hours, mental_health,
+                                                 0, addicted_score, affects_academic)  # Set conflicts to 0 for prediction
+            elif task == "Regression Task":
+                return analyzer.regression_task(age, gender, academic_level, relationship_status,
+                                             country, platform, daily_usage, sleep_hours, mental_health,
+                                             conflicts, affects_academic)
+            elif task == "Clustering Task":
+                return analyzer.clustering_task(age, gender, academic_level, relationship_status,
+                                             country, platform, daily_usage, sleep_hours, mental_health,
+                                             conflicts, addicted_score, affects_academic)
+            else:
+                return "Please select a prediction task (Classification, Regression, or Clustering)."
+        # Function to control input visibility based on task
+        def update_input_visibility(task):
+            if task == "Classification Task (Predict High/Low Conflict Risk)":
+                return gr.update(visible=False)  # Hide conflicts input for classification
+            else:
+                return gr.update(visible=True)   # Show conflicts input for other tasks
+        # Connect the interface
+        task_choice.change(
+            fn=handle_task_selection,
+            inputs=[task_choice],
+            outputs=[output_area, input_container]
+        )
+        # Control conflicts input visibility
+        task_choice.change(
+            fn=update_input_visibility,
+            inputs=[task_choice],
+            outputs=[conflicts]
+        )
+        # Connect predict button
+        predict_btn.click(
+            fn=handle_prediction,
+            inputs=[task_choice, age, gender, academic_level, relationship_status,
+                   country, platform, daily_usage, sleep_hours, mental_health,
+                   conflicts, addicted_score, affects_academic],
+            outputs=output_area
+        )
+        gr.Markdown("---")
+        gr.Markdown("### 🔧 Technical Information")
+        gr.Markdown("- **Framework**: Gradio")
+        gr.Markdown("- **Backend**: Python with scikit-learn")
+        gr.Markdown("- **ML Pipeline**: MLflow integration")
+        gr.Markdown("- **Data**: Students Social Media Addiction Dataset")
+    return app
+if __name__ == "__main__":
+    # Create and launch the app
+    app = create_interface()
+    # Launch with automatic port finding
+    import socket
+    def find_free_port():
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind(('', 0))
+            s.listen(1)
+            port = s.getsockname()[1]
+        return port
+    port = find_free_port()
+    print(f"🚀 Launching app on port {port}")
+    print(f"📱 Access the app at: http://localhost:{port}")
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=port,
+        share=False,
+        show_error=True,
+        quiet=False
+    )