Spaces:
Sleeping
Sleeping
Upload 7 files
Browse files- models/kmeans_model_main.pkl +3 -0
- models/label_encoders.pkl +3 -0
- models/neural_network_model_final.pth +3 -0
- models/scaler.pkl +3 -0
- new_app.py +105 -0
- requirements.txt +9 -0
- utilfuncs.py +101 -0
models/kmeans_model_main.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9d806543c21df456ecb027d5a6612e3dd4df6b83dad1c0162107cea9af2f138
|
| 3 |
+
size 41959
|
models/label_encoders.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5afe5a524720f0f772ccd6559cd273375aea6bfa60bf04d1e09d2de4aead799
|
| 3 |
+
size 1173
|
models/neural_network_model_final.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26f05147f6dccecba4f3371b0e27780dca640ff22881b6fe0e6ff504c6f0a61f
|
| 3 |
+
size 39757
|
models/scaler.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:578d7df41f7dab4716e9a62ea48ed5bad6386fc90188a714b2a3697f790be7cc
|
| 3 |
+
size 1271
|
new_app.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from utilfuncs import (
|
| 4 |
+
load_kmeans_model,
|
| 5 |
+
load_churn_model,
|
| 6 |
+
encode_and_scale,
|
| 7 |
+
get_cluster,
|
| 8 |
+
get_churn_label,
|
| 9 |
+
load_encoder,
|
| 10 |
+
load_scaler,
|
| 11 |
+
create_tsne_plot
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
def app():
|
| 15 |
+
kmeans_model = load_kmeans_model('models/kmeans_model_main.pkl')
|
| 16 |
+
churn_model = load_churn_model('models/neural_network_model_final.pth')
|
| 17 |
+
encoders = load_encoder('models/label_encoders.pkl')
|
| 18 |
+
scaler = load_scaler('models/scaler.pkl')
|
| 19 |
+
|
| 20 |
+
def predict(age, gender, location, subscription_length, monthly_bill, avg_internet_usage, num_tickets, avg_talktime, social_class, subscription_type, base_charge):
|
| 21 |
+
data = {
|
| 22 |
+
'Age': [age],
|
| 23 |
+
'Gender': [gender],
|
| 24 |
+
'Location': [location],
|
| 25 |
+
'Subscription_Length_Months': [subscription_length],
|
| 26 |
+
'Monthly_Bill': [monthly_bill],
|
| 27 |
+
'Average_Internet_Usage': [avg_internet_usage],
|
| 28 |
+
'No_of_Tickets': [num_tickets],
|
| 29 |
+
'Average_Talktime_Usage': [avg_talktime],
|
| 30 |
+
'Social_Class': [social_class],
|
| 31 |
+
'Subscription_Type': [subscription_type],
|
| 32 |
+
'Base_Charge': [base_charge]
|
| 33 |
+
}
|
| 34 |
+
df = pd.DataFrame(data)
|
| 35 |
+
scaled_df = encode_and_scale(df, encoders, scaler)
|
| 36 |
+
group = get_cluster(scaled_df, kmeans_model)
|
| 37 |
+
predicted_group_text = group[0]
|
| 38 |
+
churn_label = get_churn_label(scaled_df, churn_model)
|
| 39 |
+
predicted_churn_text = 'Yes' if churn_label[0][0] == 1 else 'No'
|
| 40 |
+
return predicted_group_text, predicted_churn_text
|
| 41 |
+
|
| 42 |
+
def process_csv(file):
|
| 43 |
+
df = pd.read_csv(file)
|
| 44 |
+
|
| 45 |
+
required_columns = ['CustomerID', 'Name', 'Age', 'Gender', 'Location',
|
| 46 |
+
'Subscription_Length_Months', 'Monthly_Bill', 'Average_Internet_Usage',
|
| 47 |
+
'No_of_Tickets', 'Average_Talktime_Usage', 'Social_Class',
|
| 48 |
+
'Subscription_Type', 'Base_Charge']
|
| 49 |
+
|
| 50 |
+
if not all(col in df.columns for col in required_columns):
|
| 51 |
+
return "Error: The CSV file must have the following columns: " + ", ".join(required_columns)
|
| 52 |
+
|
| 53 |
+
df = df.drop(columns=['CustomerID', 'Name'])
|
| 54 |
+
|
| 55 |
+
scaled_df = encode_and_scale(df, encoders, scaler)
|
| 56 |
+
groups = get_cluster(scaled_df, kmeans_model)
|
| 57 |
+
churn_labels = get_churn_label(scaled_df, churn_model)
|
| 58 |
+
|
| 59 |
+
tsne_plot = create_tsne_plot(scaled_df, groups)
|
| 60 |
+
|
| 61 |
+
churn_percentage = (churn_labels[:, 0] == 1).float().mean() * 100
|
| 62 |
+
|
| 63 |
+
return tsne_plot, f"Predicted Churn Percentage: {churn_percentage:.2f}%"
|
| 64 |
+
|
| 65 |
+
with gr.Blocks() as app:
|
| 66 |
+
with gr.Tab("Churn Prediction"):
|
| 67 |
+
with gr.Group():
|
| 68 |
+
gr.Markdown("## Churn Prediction App")
|
| 69 |
+
age = gr.Slider(label="Age", minimum=18, maximum=100, step=1, value=30)
|
| 70 |
+
gender = gr.Dropdown(label="Gender", choices=["Male", "Female"], value="Male")
|
| 71 |
+
location = gr.Dropdown(label="Location", choices=["Urban", "Rural", "Suburban"], value="Urban")
|
| 72 |
+
subscription_length = gr.Slider(label="Subscription Length (Months)", minimum=1, maximum=60, step=1, value=12)
|
| 73 |
+
monthly_bill = gr.Slider(label="Monthly Bill", minimum=10, maximum=1000, step=1, value=50)
|
| 74 |
+
avg_internet_usage = gr.Slider(label="Average Internet Usage", minimum=1, maximum=200, step=1, value=50)
|
| 75 |
+
num_tickets = gr.Slider(label="Number of Tickets", minimum=0, maximum=20, step=1, value=2)
|
| 76 |
+
avg_talktime = gr.Slider(label="Average Talktime Usage", minimum=1, maximum=200, step=1, value=50)
|
| 77 |
+
social_class = gr.Slider(label="Social Class", minimum=1, maximum=3, step=1, value=1)
|
| 78 |
+
subscription_type = gr.Dropdown(label="Subscription Type", choices=["A", "B", "C", "D", "E"], value="B")
|
| 79 |
+
base_charge = gr.Slider(label="Base Charge", minimum=10, maximum=500, step=1, value=30)
|
| 80 |
+
|
| 81 |
+
with gr.Group():
|
| 82 |
+
gr.Markdown("## Results")
|
| 83 |
+
predicted_group = gr.Text(label="Predicted Group")
|
| 84 |
+
predicted_churn = gr.Text(label="Predicted Churn")
|
| 85 |
+
|
| 86 |
+
predict_button = gr.Button("Predict")
|
| 87 |
+
predict_button.click(predict, inputs=[age, gender, location, subscription_length, monthly_bill, avg_internet_usage, num_tickets, avg_talktime, social_class, subscription_type, base_charge], outputs=[predicted_group, predicted_churn])
|
| 88 |
+
|
| 89 |
+
with gr.Tab("Upload CSV"):
|
| 90 |
+
with gr.Group():
|
| 91 |
+
gr.Markdown("## Upload CSV for Analysis")
|
| 92 |
+
csv_file = gr.File(label="Upload CSV")
|
| 93 |
+
process_button = gr.Button("Process CSV")
|
| 94 |
+
|
| 95 |
+
with gr.Group():
|
| 96 |
+
gr.Markdown("## Results")
|
| 97 |
+
tsne_plot = gr.Image(label="t-SNE Plot")
|
| 98 |
+
churn_percentage = gr.Text(label="Predicted Churn Percentage")
|
| 99 |
+
|
| 100 |
+
process_button.click(process_csv, inputs=[csv_file], outputs=[tsne_plot, churn_percentage])
|
| 101 |
+
|
| 102 |
+
app.launch()
|
| 103 |
+
|
| 104 |
+
if __name__ == "__main__":
|
| 105 |
+
app()
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
joblib
|
| 3 |
+
pandas
|
| 4 |
+
numpy
|
| 5 |
+
scikit-learn
|
| 6 |
+
matplotlib
|
| 7 |
+
seaborn
|
| 8 |
+
Pillow
|
| 9 |
+
gradio
|
utilfuncs.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import joblib
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from sklearn.manifold import TSNE
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import seaborn as sns
|
| 8 |
+
import io
|
| 9 |
+
import numpy as np
|
| 10 |
+
from PIL import Image
|
| 11 |
+
|
| 12 |
+
class FixedNeuronsNetwork(nn.Module):
|
| 13 |
+
def __init__(self, num_features, num_classes, num_hidden_layers, hidden_neurons, layer_activation=nn.ReLU, final_activation=nn.Sigmoid):
|
| 14 |
+
super(FixedNeuronsNetwork, self).__init__()
|
| 15 |
+
|
| 16 |
+
layers = []
|
| 17 |
+
|
| 18 |
+
layers.append(nn.Linear(num_features, hidden_neurons))
|
| 19 |
+
layers.append(layer_activation())
|
| 20 |
+
|
| 21 |
+
for _ in range(num_hidden_layers - 1):
|
| 22 |
+
layers.append(nn.Linear(hidden_neurons, hidden_neurons))
|
| 23 |
+
layers.append(layer_activation())
|
| 24 |
+
|
| 25 |
+
layers.append(nn.Linear(hidden_neurons, num_classes))
|
| 26 |
+
|
| 27 |
+
self.final_activation = final_activation()
|
| 28 |
+
|
| 29 |
+
self.network = nn.Sequential(*layers)
|
| 30 |
+
|
| 31 |
+
def forward(self, x):
|
| 32 |
+
x = self.network(x)
|
| 33 |
+
output = self.final_activation(x)
|
| 34 |
+
return output
|
| 35 |
+
|
| 36 |
+
def load_kmeans_model(model_path):
|
| 37 |
+
kmeans_loaded = joblib.load('kmeans_model_main.pkl')
|
| 38 |
+
return kmeans_loaded
|
| 39 |
+
|
| 40 |
+
def load_churn_model(model_path):
|
| 41 |
+
model = FixedNeuronsNetwork(num_features=11,num_classes=1,num_hidden_layers=3,hidden_neurons=64).to('cpu')
|
| 42 |
+
model.load_state_dict(torch.load(model_path))
|
| 43 |
+
|
| 44 |
+
return model
|
| 45 |
+
|
| 46 |
+
def load_encoder(model_path):
|
| 47 |
+
label_encoders = joblib.load(model_path)
|
| 48 |
+
return label_encoders
|
| 49 |
+
|
| 50 |
+
def load_scaler(model_path):
|
| 51 |
+
scaler = joblib.load(model_path)
|
| 52 |
+
return scaler
|
| 53 |
+
|
| 54 |
+
def encode_and_scale(new_sample_df, encoders, scaler):
|
| 55 |
+
|
| 56 |
+
numerical_cols = ['Age', 'Subscription_Length_Months', 'Monthly_Bill','Average_Internet_Usage', 'No_of_Tickets', 'Average_Talktime_Usage','Social_Class', 'Base_Charge']
|
| 57 |
+
new_sample_df[numerical_cols] = scaler.transform(new_sample_df[numerical_cols])
|
| 58 |
+
|
| 59 |
+
for col, le in encoders.items():
|
| 60 |
+
new_sample_df[col] = le.transform(new_sample_df[col])
|
| 61 |
+
|
| 62 |
+
return new_sample_df
|
| 63 |
+
|
| 64 |
+
def get_cluster(sample, kmeans_loaded):
|
| 65 |
+
group = kmeans_loaded.predict(sample)
|
| 66 |
+
|
| 67 |
+
return group
|
| 68 |
+
|
| 69 |
+
def get_churn_label(sample, model):
|
| 70 |
+
sample_tensor = torch.tensor(sample.values, dtype=torch.float32)
|
| 71 |
+
model.eval()
|
| 72 |
+
with torch.no_grad():
|
| 73 |
+
predictions = model(sample_tensor)
|
| 74 |
+
predictions = (predictions > 0.5).float()
|
| 75 |
+
|
| 76 |
+
return predictions
|
| 77 |
+
|
| 78 |
+
def create_tsne_plot(df, clusters):
|
| 79 |
+
|
| 80 |
+
tsne = TSNE(n_components=2, random_state=42)
|
| 81 |
+
tsne_results = tsne.fit_transform(df)
|
| 82 |
+
|
| 83 |
+
df_tsne = pd.DataFrame(tsne_results, columns=['TSNE1', 'TSNE2'])
|
| 84 |
+
df_tsne['Cluster'] = clusters
|
| 85 |
+
|
| 86 |
+
plt.figure(figsize=(10, 8))
|
| 87 |
+
sns.scatterplot(
|
| 88 |
+
x='TSNE1', y='TSNE2',
|
| 89 |
+
hue='Cluster',
|
| 90 |
+
palette=sns.color_palette('hsv', len(set(clusters))),
|
| 91 |
+
data=df_tsne,
|
| 92 |
+
legend='full'
|
| 93 |
+
)
|
| 94 |
+
plt.title('t-SNE Visualization of K Means Clustering')
|
| 95 |
+
|
| 96 |
+
buf = io.BytesIO()
|
| 97 |
+
plt.savefig(buf, format='png')
|
| 98 |
+
buf.seek(0)
|
| 99 |
+
plot_image = np.asarray(Image.open(buf))
|
| 100 |
+
|
| 101 |
+
return plot_image
|