PK03 commited on
Commit
106d593
·
verified ·
1 Parent(s): 3b43030

Upload 7 files

Browse files
models/kmeans_model_main.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9d806543c21df456ecb027d5a6612e3dd4df6b83dad1c0162107cea9af2f138
3
+ size 41959
models/label_encoders.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5afe5a524720f0f772ccd6559cd273375aea6bfa60bf04d1e09d2de4aead799
3
+ size 1173
models/neural_network_model_final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26f05147f6dccecba4f3371b0e27780dca640ff22881b6fe0e6ff504c6f0a61f
3
+ size 39757
models/scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:578d7df41f7dab4716e9a62ea48ed5bad6386fc90188a714b2a3697f790be7cc
3
+ size 1271
new_app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from utilfuncs import (
4
+ load_kmeans_model,
5
+ load_churn_model,
6
+ encode_and_scale,
7
+ get_cluster,
8
+ get_churn_label,
9
+ load_encoder,
10
+ load_scaler,
11
+ create_tsne_plot
12
+ )
13
+
14
+ def app():
15
+ kmeans_model = load_kmeans_model('models/kmeans_model_main.pkl')
16
+ churn_model = load_churn_model('models/neural_network_model_final.pth')
17
+ encoders = load_encoder('models/label_encoders.pkl')
18
+ scaler = load_scaler('models/scaler.pkl')
19
+
20
+ def predict(age, gender, location, subscription_length, monthly_bill, avg_internet_usage, num_tickets, avg_talktime, social_class, subscription_type, base_charge):
21
+ data = {
22
+ 'Age': [age],
23
+ 'Gender': [gender],
24
+ 'Location': [location],
25
+ 'Subscription_Length_Months': [subscription_length],
26
+ 'Monthly_Bill': [monthly_bill],
27
+ 'Average_Internet_Usage': [avg_internet_usage],
28
+ 'No_of_Tickets': [num_tickets],
29
+ 'Average_Talktime_Usage': [avg_talktime],
30
+ 'Social_Class': [social_class],
31
+ 'Subscription_Type': [subscription_type],
32
+ 'Base_Charge': [base_charge]
33
+ }
34
+ df = pd.DataFrame(data)
35
+ scaled_df = encode_and_scale(df, encoders, scaler)
36
+ group = get_cluster(scaled_df, kmeans_model)
37
+ predicted_group_text = group[0]
38
+ churn_label = get_churn_label(scaled_df, churn_model)
39
+ predicted_churn_text = 'Yes' if churn_label[0][0] == 1 else 'No'
40
+ return predicted_group_text, predicted_churn_text
41
+
42
+ def process_csv(file):
43
+ df = pd.read_csv(file)
44
+
45
+ required_columns = ['CustomerID', 'Name', 'Age', 'Gender', 'Location',
46
+ 'Subscription_Length_Months', 'Monthly_Bill', 'Average_Internet_Usage',
47
+ 'No_of_Tickets', 'Average_Talktime_Usage', 'Social_Class',
48
+ 'Subscription_Type', 'Base_Charge']
49
+
50
+ if not all(col in df.columns for col in required_columns):
51
+ return "Error: The CSV file must have the following columns: " + ", ".join(required_columns)
52
+
53
+ df = df.drop(columns=['CustomerID', 'Name'])
54
+
55
+ scaled_df = encode_and_scale(df, encoders, scaler)
56
+ groups = get_cluster(scaled_df, kmeans_model)
57
+ churn_labels = get_churn_label(scaled_df, churn_model)
58
+
59
+ tsne_plot = create_tsne_plot(scaled_df, groups)
60
+
61
+ churn_percentage = (churn_labels[:, 0] == 1).float().mean() * 100
62
+
63
+ return tsne_plot, f"Predicted Churn Percentage: {churn_percentage:.2f}%"
64
+
65
+ with gr.Blocks() as app:
66
+ with gr.Tab("Churn Prediction"):
67
+ with gr.Group():
68
+ gr.Markdown("## Churn Prediction App")
69
+ age = gr.Slider(label="Age", minimum=18, maximum=100, step=1, value=30)
70
+ gender = gr.Dropdown(label="Gender", choices=["Male", "Female"], value="Male")
71
+ location = gr.Dropdown(label="Location", choices=["Urban", "Rural", "Suburban"], value="Urban")
72
+ subscription_length = gr.Slider(label="Subscription Length (Months)", minimum=1, maximum=60, step=1, value=12)
73
+ monthly_bill = gr.Slider(label="Monthly Bill", minimum=10, maximum=1000, step=1, value=50)
74
+ avg_internet_usage = gr.Slider(label="Average Internet Usage", minimum=1, maximum=200, step=1, value=50)
75
+ num_tickets = gr.Slider(label="Number of Tickets", minimum=0, maximum=20, step=1, value=2)
76
+ avg_talktime = gr.Slider(label="Average Talktime Usage", minimum=1, maximum=200, step=1, value=50)
77
+ social_class = gr.Slider(label="Social Class", minimum=1, maximum=3, step=1, value=1)
78
+ subscription_type = gr.Dropdown(label="Subscription Type", choices=["A", "B", "C", "D", "E"], value="B")
79
+ base_charge = gr.Slider(label="Base Charge", minimum=10, maximum=500, step=1, value=30)
80
+
81
+ with gr.Group():
82
+ gr.Markdown("## Results")
83
+ predicted_group = gr.Text(label="Predicted Group")
84
+ predicted_churn = gr.Text(label="Predicted Churn")
85
+
86
+ predict_button = gr.Button("Predict")
87
+ predict_button.click(predict, inputs=[age, gender, location, subscription_length, monthly_bill, avg_internet_usage, num_tickets, avg_talktime, social_class, subscription_type, base_charge], outputs=[predicted_group, predicted_churn])
88
+
89
+ with gr.Tab("Upload CSV"):
90
+ with gr.Group():
91
+ gr.Markdown("## Upload CSV for Analysis")
92
+ csv_file = gr.File(label="Upload CSV")
93
+ process_button = gr.Button("Process CSV")
94
+
95
+ with gr.Group():
96
+ gr.Markdown("## Results")
97
+ tsne_plot = gr.Image(label="t-SNE Plot")
98
+ churn_percentage = gr.Text(label="Predicted Churn Percentage")
99
+
100
+ process_button.click(process_csv, inputs=[csv_file], outputs=[tsne_plot, churn_percentage])
101
+
102
+ app.launch()
103
+
104
+ if __name__ == "__main__":
105
+ app()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ joblib
3
+ pandas
4
+ numpy
5
+ scikit-learn
6
+ matplotlib
7
+ seaborn
8
+ Pillow
9
+ gradio
utilfuncs.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import joblib
4
+ import pandas as pd
5
+ from sklearn.manifold import TSNE
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+ import io
9
+ import numpy as np
10
+ from PIL import Image
11
+
12
+ class FixedNeuronsNetwork(nn.Module):
13
+ def __init__(self, num_features, num_classes, num_hidden_layers, hidden_neurons, layer_activation=nn.ReLU, final_activation=nn.Sigmoid):
14
+ super(FixedNeuronsNetwork, self).__init__()
15
+
16
+ layers = []
17
+
18
+ layers.append(nn.Linear(num_features, hidden_neurons))
19
+ layers.append(layer_activation())
20
+
21
+ for _ in range(num_hidden_layers - 1):
22
+ layers.append(nn.Linear(hidden_neurons, hidden_neurons))
23
+ layers.append(layer_activation())
24
+
25
+ layers.append(nn.Linear(hidden_neurons, num_classes))
26
+
27
+ self.final_activation = final_activation()
28
+
29
+ self.network = nn.Sequential(*layers)
30
+
31
+ def forward(self, x):
32
+ x = self.network(x)
33
+ output = self.final_activation(x)
34
+ return output
35
+
36
+ def load_kmeans_model(model_path):
37
+ kmeans_loaded = joblib.load('kmeans_model_main.pkl')
38
+ return kmeans_loaded
39
+
40
+ def load_churn_model(model_path):
41
+ model = FixedNeuronsNetwork(num_features=11,num_classes=1,num_hidden_layers=3,hidden_neurons=64).to('cpu')
42
+ model.load_state_dict(torch.load(model_path))
43
+
44
+ return model
45
+
46
+ def load_encoder(model_path):
47
+ label_encoders = joblib.load(model_path)
48
+ return label_encoders
49
+
50
+ def load_scaler(model_path):
51
+ scaler = joblib.load(model_path)
52
+ return scaler
53
+
54
+ def encode_and_scale(new_sample_df, encoders, scaler):
55
+
56
+ numerical_cols = ['Age', 'Subscription_Length_Months', 'Monthly_Bill','Average_Internet_Usage', 'No_of_Tickets', 'Average_Talktime_Usage','Social_Class', 'Base_Charge']
57
+ new_sample_df[numerical_cols] = scaler.transform(new_sample_df[numerical_cols])
58
+
59
+ for col, le in encoders.items():
60
+ new_sample_df[col] = le.transform(new_sample_df[col])
61
+
62
+ return new_sample_df
63
+
64
+ def get_cluster(sample, kmeans_loaded):
65
+ group = kmeans_loaded.predict(sample)
66
+
67
+ return group
68
+
69
+ def get_churn_label(sample, model):
70
+ sample_tensor = torch.tensor(sample.values, dtype=torch.float32)
71
+ model.eval()
72
+ with torch.no_grad():
73
+ predictions = model(sample_tensor)
74
+ predictions = (predictions > 0.5).float()
75
+
76
+ return predictions
77
+
78
+ def create_tsne_plot(df, clusters):
79
+
80
+ tsne = TSNE(n_components=2, random_state=42)
81
+ tsne_results = tsne.fit_transform(df)
82
+
83
+ df_tsne = pd.DataFrame(tsne_results, columns=['TSNE1', 'TSNE2'])
84
+ df_tsne['Cluster'] = clusters
85
+
86
+ plt.figure(figsize=(10, 8))
87
+ sns.scatterplot(
88
+ x='TSNE1', y='TSNE2',
89
+ hue='Cluster',
90
+ palette=sns.color_palette('hsv', len(set(clusters))),
91
+ data=df_tsne,
92
+ legend='full'
93
+ )
94
+ plt.title('t-SNE Visualization of K Means Clustering')
95
+
96
+ buf = io.BytesIO()
97
+ plt.savefig(buf, format='png')
98
+ buf.seek(0)
99
+ plot_image = np.asarray(Image.open(buf))
100
+
101
+ return plot_image