# -*- coding: utf-8 -*-
"""ml_code.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1tETflt1JmWJudI-SDbtBiVIqjgCnaSU8

Import Packages :
"""

import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.preprocessing import StandardScaler
import joblib

"""Dataset Load :"""

# Load dataset
df = pd.read_csv("data.csv")

# Clean column names
df.columns = df.columns.str.strip()

# Rename relevant columns
df = df.rename(columns={
    '2. How interested are you in the event topic?': 'interest',
    '3. How close are you to the event location?': 'proximity',
    '4. How many similar events have you attended in the past year?': 'past_attendance',
    '5. How much free time do you have during the event timing?': 'free_time',
    '6. Are you willing to attend this event?': 'willingness'
})

df.head()

"""Dataset Preprocess :"""

# Normalize values from linear scale 1–2 → 0–1
df[['interest', 'proximity', 'free_time']] = df[['interest', 'proximity', 'free_time']].apply(lambda x: (x - 1) / (2 - 1))

# Convert willingness to numerical values
df['willingness'] = df['willingness'].map({'Yes': 1, 'No': 0})

# Features and labels
X = df[['interest', 'proximity', 'past_attendance', 'free_time']].values
y = df['willingness'].values

"""Dataset into unlabeled :"""

# Unlabeled samples = -1
y_semi = np.array([label if not np.isnan(label) else -1 for label in y])

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

"""Train the model :"""

# Base model
base_model = LogisticRegression()

# Self-Training Wrapper
self_training_model = SelfTrainingClassifier(base_model, criterion='k_best', k_best=3, max_iter=10)

# Train on partially labeled data
self_training_model.fit(X_scaled, y_semi)

print("✅ Self-training complete!")

"""Prediction :"""

# Predict on all samples
predicted = self_training_model.predict(X_scaled)

# Add predictions to DataFrame
df['predicted_attendance'] = predicted

# Save model and predictions
joblib.dump(self_training_model, "model.pkl")
joblib.dump(scaler, "scaler.pkl")
df.to_csv("predictions.csv", index=False)

print("✅ Model and predictions saved as model.pkl and predictions.csv")

"""Test the model:"""

# Load saved model and scaler
model = joblib.load("model.pkl")
scaler = joblib.load("scaler.pkl")

# 👇 Define your test input
# Format: [interest (0-1), proximity (0-1), past_attendance (integer), free_time (0-1)]
test_input = np.array([[0.0, 0.0, 0, 0.0]])
  # You can change these values

# Scale input the same way training data was scaled
test_scaled = scaler.transform(test_input)

# Make prediction
prediction = model.predict(test_scaled)[0]

# Show result
if prediction == 1:
    print("✅ The person is likely to ATTEND the event.")
else:
    print("❌ The person is NOT likely to attend the event.")

"""Accuracy, Precision, Recall , F1(for classification):"""

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Filter only rows with actual labels (i.e., labeled data)
labeled_mask = y_semi != -1
X_labeled = X_scaled[labeled_mask]
y_true = y_semi[labeled_mask]
y_pred = self_training_model.predict(X_labeled)

# Classification Metrics
acc = accuracy_score(y_true, y_pred)
prec = precision_score(y_true, y_pred)
rec = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

# Display results
print(f"📊 Model Evaluation on Labeled Data:")
print(f"✅ Accuracy:  {acc:.4f}")
print(f"✅ Precision: {prec:.4f}")
print(f"✅ Recall:    {rec:.4f}")
print(f"✅ F1 Score:  {f1:.4f}")

import gradio as gr
import joblib
import numpy as np

# Load model and scaler
model = joblib.load("model.pkl")
scaler = joblib.load("scaler.pkl")

# Prediction function
def predict_attendance(interest_pct, proximity_pct, past_attendance, free_time_pct):
    # Convert % (0 to 100) to scale 1 to 2, then normalize to 0–1
    interest = (interest_pct / 100)
    proximity = (proximity_pct / 100)
    free_time = (free_time_pct / 100)

    # Prepare and scale input
    input_data = np.array([[interest, proximity, past_attendance, free_time]])
    input_scaled = scaler.transform(input_data)

    # Predict
    prediction = model.predict(input_scaled)[0]
    return "✅ Will Attend" if prediction == 1 else "❌ Will Not Attend"

# Gradio UI
iface = gr.Interface(
    fn=predict_attendance,
    inputs=[
        gr.Slider(0, 100, step=10, label="Interest in Topic (%)"),
        gr.Slider(0, 100, step=10, label="Proximity to Event (%)"),
        gr.Slider(0, 10, step=1, label="Similar Events Attended"),
        gr.Slider(0, 100, step=10, label="Free Time Availability (%)"),
    ],
    outputs="text",
    title="🎯 Event Attendance Predictor",
    description="Enter your info to find out if you're likely to attend this event. Sliders use percent to indicate strength or availability."
)

iface.launch()