File size: 4,996 Bytes
5049c1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# -*- coding: utf-8 -*-
"""ml_code.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1tETflt1JmWJudI-SDbtBiVIqjgCnaSU8

Import Packages :
"""

import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.preprocessing import StandardScaler
import joblib

"""Dataset Load :"""

# Load dataset
df = pd.read_csv("data.csv")

# Clean column names
df.columns = df.columns.str.strip()

# Rename relevant columns
df = df.rename(columns={
    '2. How interested are you in the event topic?': 'interest',
    '3. How close are you to the event location?': 'proximity',
    '4. How many similar events have you attended in the past year?': 'past_attendance',
    '5. How much free time do you have during the event timing?': 'free_time',
    '6. Are you willing to attend this event?': 'willingness'
})

df.head()

"""Dataset Preprocess :"""

# Normalize values from linear scale 1–2 → 0–1
df[['interest', 'proximity', 'free_time']] = df[['interest', 'proximity', 'free_time']].apply(lambda x: (x - 1) / (2 - 1))

# Convert willingness to numerical values
df['willingness'] = df['willingness'].map({'Yes': 1, 'No': 0})

# Features and labels
X = df[['interest', 'proximity', 'past_attendance', 'free_time']].values
y = df['willingness'].values

"""Dataset into unlabeled :"""

# Unlabeled samples = -1
y_semi = np.array([label if not np.isnan(label) else -1 for label in y])

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

"""Train the model :"""

# Base model
base_model = LogisticRegression()

# Self-Training Wrapper
self_training_model = SelfTrainingClassifier(base_model, criterion='k_best', k_best=3, max_iter=10)

# Train on partially labeled data
self_training_model.fit(X_scaled, y_semi)

print("✅ Self-training complete!")

"""Prediction :"""

# Predict on all samples
predicted = self_training_model.predict(X_scaled)

# Add predictions to DataFrame
df['predicted_attendance'] = predicted

# Save model and predictions
joblib.dump(self_training_model, "model.pkl")
joblib.dump(scaler, "scaler.pkl")
df.to_csv("predictions.csv", index=False)

print("✅ Model and predictions saved as model.pkl and predictions.csv")

"""Test the model:"""

# Load saved model and scaler
model = joblib.load("model.pkl")
scaler = joblib.load("scaler.pkl")

# 👇 Define your test input
# Format: [interest (0-1), proximity (0-1), past_attendance (integer), free_time (0-1)]
test_input = np.array([[0.0, 0.0, 0, 0.0]])
  # You can change these values

# Scale input the same way training data was scaled
test_scaled = scaler.transform(test_input)

# Make prediction
prediction = model.predict(test_scaled)[0]

# Show result
if prediction == 1:
    print("✅ The person is likely to ATTEND the event.")
else:
    print("❌ The person is NOT likely to attend the event.")

"""Accuracy, Precision, Recall , F1(for classification):"""

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Filter only rows with actual labels (i.e., labeled data)
labeled_mask = y_semi != -1
X_labeled = X_scaled[labeled_mask]
y_true = y_semi[labeled_mask]
y_pred = self_training_model.predict(X_labeled)

# Classification Metrics
acc = accuracy_score(y_true, y_pred)
prec = precision_score(y_true, y_pred)
rec = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

# Display results
print(f"📊 Model Evaluation on Labeled Data:")
print(f"✅ Accuracy:  {acc:.4f}")
print(f"✅ Precision: {prec:.4f}")
print(f"✅ Recall:    {rec:.4f}")
print(f"✅ F1 Score:  {f1:.4f}")

import gradio as gr
import joblib
import numpy as np

# Load model and scaler
model = joblib.load("model.pkl")
scaler = joblib.load("scaler.pkl")

# Prediction function
def predict_attendance(interest_pct, proximity_pct, past_attendance, free_time_pct):
    # Convert % (0 to 100) to scale 1 to 2, then normalize to 0–1
    interest = (interest_pct / 100)
    proximity = (proximity_pct / 100)
    free_time = (free_time_pct / 100)

    # Prepare and scale input
    input_data = np.array([[interest, proximity, past_attendance, free_time]])
    input_scaled = scaler.transform(input_data)

    # Predict
    prediction = model.predict(input_scaled)[0]
    return "✅ Will Attend" if prediction == 1 else "❌ Will Not Attend"

# Gradio UI
iface = gr.Interface(
    fn=predict_attendance,
    inputs=[
        gr.Slider(0, 100, step=10, label="Interest in Topic (%)"),
        gr.Slider(0, 100, step=10, label="Proximity to Event (%)"),
        gr.Slider(0, 10, step=1, label="Similar Events Attended"),
        gr.Slider(0, 100, step=10, label="Free Time Availability (%)"),
    ],
    outputs="text",
    title="🎯 Event Attendance Predictor",
    description="Enter your info to find out if you're likely to attend this event. Sliders use percent to indicate strength or availability."
)

iface.launch()