Spaces:
Runtime error
Runtime error
File size: 4,996 Bytes
5049c1f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | # -*- coding: utf-8 -*-
"""ml_code.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1tETflt1JmWJudI-SDbtBiVIqjgCnaSU8
Import Packages :
"""
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.preprocessing import StandardScaler
import joblib
"""Dataset Load :"""
# Load dataset
df = pd.read_csv("data.csv")
# Clean column names
df.columns = df.columns.str.strip()
# Rename relevant columns
df = df.rename(columns={
'2. How interested are you in the event topic?': 'interest',
'3. How close are you to the event location?': 'proximity',
'4. How many similar events have you attended in the past year?': 'past_attendance',
'5. How much free time do you have during the event timing?': 'free_time',
'6. Are you willing to attend this event?': 'willingness'
})
df.head()
"""Dataset Preprocess :"""
# Normalize values from linear scale 1–2 → 0–1
df[['interest', 'proximity', 'free_time']] = df[['interest', 'proximity', 'free_time']].apply(lambda x: (x - 1) / (2 - 1))
# Convert willingness to numerical values
df['willingness'] = df['willingness'].map({'Yes': 1, 'No': 0})
# Features and labels
X = df[['interest', 'proximity', 'past_attendance', 'free_time']].values
y = df['willingness'].values
"""Dataset into unlabeled :"""
# Unlabeled samples = -1
y_semi = np.array([label if not np.isnan(label) else -1 for label in y])
# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
"""Train the model :"""
# Base model
base_model = LogisticRegression()
# Self-Training Wrapper
self_training_model = SelfTrainingClassifier(base_model, criterion='k_best', k_best=3, max_iter=10)
# Train on partially labeled data
self_training_model.fit(X_scaled, y_semi)
print("✅ Self-training complete!")
"""Prediction :"""
# Predict on all samples
predicted = self_training_model.predict(X_scaled)
# Add predictions to DataFrame
df['predicted_attendance'] = predicted
# Save model and predictions
joblib.dump(self_training_model, "model.pkl")
joblib.dump(scaler, "scaler.pkl")
df.to_csv("predictions.csv", index=False)
print("✅ Model and predictions saved as model.pkl and predictions.csv")
"""Test the model:"""
# Load saved model and scaler
model = joblib.load("model.pkl")
scaler = joblib.load("scaler.pkl")
# 👇 Define your test input
# Format: [interest (0-1), proximity (0-1), past_attendance (integer), free_time (0-1)]
test_input = np.array([[0.0, 0.0, 0, 0.0]])
# You can change these values
# Scale input the same way training data was scaled
test_scaled = scaler.transform(test_input)
# Make prediction
prediction = model.predict(test_scaled)[0]
# Show result
if prediction == 1:
print("✅ The person is likely to ATTEND the event.")
else:
print("❌ The person is NOT likely to attend the event.")
"""Accuracy, Precision, Recall , F1(for classification):"""
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Filter only rows with actual labels (i.e., labeled data)
labeled_mask = y_semi != -1
X_labeled = X_scaled[labeled_mask]
y_true = y_semi[labeled_mask]
y_pred = self_training_model.predict(X_labeled)
# Classification Metrics
acc = accuracy_score(y_true, y_pred)
prec = precision_score(y_true, y_pred)
rec = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
# Display results
print(f"📊 Model Evaluation on Labeled Data:")
print(f"✅ Accuracy: {acc:.4f}")
print(f"✅ Precision: {prec:.4f}")
print(f"✅ Recall: {rec:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
import gradio as gr
import joblib
import numpy as np
# Load model and scaler
model = joblib.load("model.pkl")
scaler = joblib.load("scaler.pkl")
# Prediction function
def predict_attendance(interest_pct, proximity_pct, past_attendance, free_time_pct):
# Convert % (0 to 100) to scale 1 to 2, then normalize to 0–1
interest = (interest_pct / 100)
proximity = (proximity_pct / 100)
free_time = (free_time_pct / 100)
# Prepare and scale input
input_data = np.array([[interest, proximity, past_attendance, free_time]])
input_scaled = scaler.transform(input_data)
# Predict
prediction = model.predict(input_scaled)[0]
return "✅ Will Attend" if prediction == 1 else "❌ Will Not Attend"
# Gradio UI
iface = gr.Interface(
fn=predict_attendance,
inputs=[
gr.Slider(0, 100, step=10, label="Interest in Topic (%)"),
gr.Slider(0, 100, step=10, label="Proximity to Event (%)"),
gr.Slider(0, 10, step=1, label="Similar Events Attended"),
gr.Slider(0, 100, step=10, label="Free Time Availability (%)"),
],
outputs="text",
title="🎯 Event Attendance Predictor",
description="Enter your info to find out if you're likely to attend this event. Sliders use percent to indicate strength or availability."
)
iface.launch() |