Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """ml_code.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1tETflt1JmWJudI-SDbtBiVIqjgCnaSU8 | |
| Import Packages : | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.semi_supervised import SelfTrainingClassifier | |
| from sklearn.preprocessing import StandardScaler | |
| import joblib | |
| """Dataset Load :""" | |
| # Load dataset | |
| df = pd.read_csv("data.csv") | |
| # Clean column names | |
| df.columns = df.columns.str.strip() | |
| # Rename relevant columns | |
| df = df.rename(columns={ | |
| '2. How interested are you in the event topic?': 'interest', | |
| '3. How close are you to the event location?': 'proximity', | |
| '4. How many similar events have you attended in the past year?': 'past_attendance', | |
| '5. How much free time do you have during the event timing?': 'free_time', | |
| '6. Are you willing to attend this event?': 'willingness' | |
| }) | |
| df.head() | |
| """Dataset Preprocess :""" | |
| # Normalize values from linear scale 1–2 → 0–1 | |
| df[['interest', 'proximity', 'free_time']] = df[['interest', 'proximity', 'free_time']].apply(lambda x: (x - 1) / (2 - 1)) | |
| # Convert willingness to numerical values | |
| df['willingness'] = df['willingness'].map({'Yes': 1, 'No': 0}) | |
| # Features and labels | |
| X = df[['interest', 'proximity', 'past_attendance', 'free_time']].values | |
| y = df['willingness'].values | |
| """Dataset into unlabeled :""" | |
| # Unlabeled samples = -1 | |
| y_semi = np.array([label if not np.isnan(label) else -1 for label in y]) | |
| # Feature scaling | |
| scaler = StandardScaler() | |
| X_scaled = scaler.fit_transform(X) | |
| """Train the model :""" | |
| # Base model | |
| base_model = LogisticRegression() | |
| # Self-Training Wrapper | |
| self_training_model = SelfTrainingClassifier(base_model, criterion='k_best', k_best=3, max_iter=10) | |
| # Train on partially labeled data | |
| self_training_model.fit(X_scaled, y_semi) | |
| print("✅ Self-training complete!") | |
| """Prediction :""" | |
| # Predict on all samples | |
| predicted = self_training_model.predict(X_scaled) | |
| # Add predictions to DataFrame | |
| df['predicted_attendance'] = predicted | |
| # Save model and predictions | |
| joblib.dump(self_training_model, "model.pkl") | |
| joblib.dump(scaler, "scaler.pkl") | |
| df.to_csv("predictions.csv", index=False) | |
| print("✅ Model and predictions saved as model.pkl and predictions.csv") | |
| """Test the model:""" | |
| # Load saved model and scaler | |
| model = joblib.load("model.pkl") | |
| scaler = joblib.load("scaler.pkl") | |
| # 👇 Define your test input | |
| # Format: [interest (0-1), proximity (0-1), past_attendance (integer), free_time (0-1)] | |
| test_input = np.array([[0.0, 0.0, 0, 0.0]]) | |
| # You can change these values | |
| # Scale input the same way training data was scaled | |
| test_scaled = scaler.transform(test_input) | |
| # Make prediction | |
| prediction = model.predict(test_scaled)[0] | |
| # Show result | |
| if prediction == 1: | |
| print("✅ The person is likely to ATTEND the event.") | |
| else: | |
| print("❌ The person is NOT likely to attend the event.") | |
| """Accuracy, Precision, Recall , F1(for classification):""" | |
| from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score | |
| # Filter only rows with actual labels (i.e., labeled data) | |
| labeled_mask = y_semi != -1 | |
| X_labeled = X_scaled[labeled_mask] | |
| y_true = y_semi[labeled_mask] | |
| y_pred = self_training_model.predict(X_labeled) | |
| # Classification Metrics | |
| acc = accuracy_score(y_true, y_pred) | |
| prec = precision_score(y_true, y_pred) | |
| rec = recall_score(y_true, y_pred) | |
| f1 = f1_score(y_true, y_pred) | |
| # Display results | |
| print(f"📊 Model Evaluation on Labeled Data:") | |
| print(f"✅ Accuracy: {acc:.4f}") | |
| print(f"✅ Precision: {prec:.4f}") | |
| print(f"✅ Recall: {rec:.4f}") | |
| print(f"✅ F1 Score: {f1:.4f}") | |
| import gradio as gr | |
| import joblib | |
| import numpy as np | |
| # Load model and scaler | |
| model = joblib.load("model.pkl") | |
| scaler = joblib.load("scaler.pkl") | |
| # Prediction function | |
| def predict_attendance(interest_pct, proximity_pct, past_attendance, free_time_pct): | |
| # Convert % (0 to 100) to scale 1 to 2, then normalize to 0–1 | |
| interest = (interest_pct / 100) | |
| proximity = (proximity_pct / 100) | |
| free_time = (free_time_pct / 100) | |
| # Prepare and scale input | |
| input_data = np.array([[interest, proximity, past_attendance, free_time]]) | |
| input_scaled = scaler.transform(input_data) | |
| # Predict | |
| prediction = model.predict(input_scaled)[0] | |
| return "✅ Will Attend" if prediction == 1 else "❌ Will Not Attend" | |
| # Gradio UI | |
| iface = gr.Interface( | |
| fn=predict_attendance, | |
| inputs=[ | |
| gr.Slider(0, 100, step=10, label="Interest in Topic (%)"), | |
| gr.Slider(0, 100, step=10, label="Proximity to Event (%)"), | |
| gr.Slider(0, 10, step=1, label="Similar Events Attended"), | |
| gr.Slider(0, 100, step=10, label="Free Time Availability (%)"), | |
| ], | |
| outputs="text", | |
| title="🎯 Event Attendance Predictor", | |
| description="Enter your info to find out if you're likely to attend this event. Sliders use percent to indicate strength or availability." | |
| ) | |
| iface.launch() |