Xenax33 commited on
Commit
989f5b5
·
verified ·
1 Parent(s): bf478b3

Upload 11 files

Browse files
RestApi/api.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ import joblib
3
+ import numpy as np
4
+
5
+ app = FastAPI()
6
+
7
+ # Load the trained model
8
+ loaded_model = joblib.load('random_forest_model.joblib')
9
+
10
+ @app.get("/")
11
+ def read_root():
12
+ return {"message": "Welcome to the Bank Marketing Model API"}
13
+
14
+ @app.post("/predict/")
15
+ def predict(data: dict):
16
+ try:
17
+ # Convert the input data to a numpy array
18
+ input_data = np.array(data['features']).reshape(1, 16)
19
+
20
+ # Make predictions using the loaded model
21
+ prediction = loaded_model.predict(input_data)
22
+
23
+ # Return the prediction as a JSON response
24
+ return {"prediction": prediction.tolist()}
25
+ except Exception as e:
26
+ # Return a custom error message to the client
27
+ raise HTTPException(status_code=500, detail=str(e))
__pycache__/api.cpython-312.pyc ADDED
Binary file (1.35 kB). View file
 
__pycache__/model_io.cpython-312.pyc ADDED
Binary file (550 Bytes). View file
 
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import pandas as pd
4
+
5
+ def clean_data(data):
6
+ # Define the mapping for categorical variables
7
+ job_map = {
8
+ "admin.": 0,
9
+ "blue-collar": 1,
10
+ "entrepreneur": 2,
11
+ "housemaid": 3,
12
+ "management": 4,
13
+ "retired": 5,
14
+ "self-employed": 6,
15
+ "services": 7,
16
+ "student": 8,
17
+ "technician": 9,
18
+ "unemployed": 10,
19
+ "unknown": 11
20
+ }
21
+
22
+ marital_map = {
23
+ "divorced": 0,
24
+ "married": 1,
25
+ "single": 2,
26
+ "unknown": 3
27
+ }
28
+
29
+ education_map = {
30
+ "primary": 0,
31
+ "secondary": 1,
32
+ "tertiary": 2,
33
+ "unknown": 3
34
+ }
35
+
36
+ default_map = {
37
+ "no": 0,
38
+ "yes": 1,
39
+ "unknown": 2
40
+ }
41
+
42
+ housing_map = {
43
+ "no": 0,
44
+ "yes": 1,
45
+ "unknown": 2
46
+ }
47
+
48
+ loan_map = {
49
+ "no": 0,
50
+ "yes": 1,
51
+ "unknown": 2
52
+ }
53
+
54
+ contact_map = {
55
+ "cellular": 0,
56
+ "telephone": 1,
57
+ "unknown": 2
58
+ }
59
+
60
+ month_map = {
61
+ "apr": 0,
62
+ "aug": 1,
63
+ "dec": 2,
64
+ "feb": 3,
65
+ "jan": 4,
66
+ "jul": 5,
67
+ "jun": 6,
68
+ "mar": 7,
69
+ "may": 8,
70
+ "nov": 9,
71
+ "oct": 10,
72
+ "sep": 11
73
+ }
74
+
75
+ poutcome_map = {
76
+ "failure": 0,
77
+ "nonexistent": 1,
78
+ "success": 2,
79
+ "unknown": 3
80
+ }
81
+
82
+ # Create a dictionary to store the cleaned data
83
+ cleaned_data = {}
84
+
85
+ # Clean the data
86
+ cleaned_data["age"] = data[0]
87
+ cleaned_data["job"] = job_map.get(data[1], 11)
88
+ cleaned_data["marital"] = marital_map.get(data[2], 3)
89
+ cleaned_data["education"] = education_map.get(data[3], 3)
90
+ cleaned_data["default"] = default_map.get(data[4], 2)
91
+ cleaned_data["balance"] = data[5] / 1000
92
+ cleaned_data["housing"] = housing_map.get(data[6], 2)
93
+ cleaned_data["loan"] = loan_map.get(data[7], 2)
94
+ cleaned_data["contact"] = contact_map.get(data[8], 2)
95
+ cleaned_data["day"] = data[9]
96
+ cleaned_data["month"] = month_map.get(data[10], 11)
97
+ cleaned_data["duration"] = data[11] / 100
98
+ cleaned_data["campaign"] = data[12]
99
+ cleaned_data["pdays"] = data[13] / 100
100
+ cleaned_data["previous"] = data[14]
101
+ cleaned_data["poutcome"] = poutcome_map.get(data[15], 3)
102
+
103
+ print("Cleaned Data:")
104
+ print(cleaned_data)
105
+
106
+ return cleaned_data
107
+
108
+ def predict(age, job, marital, education, default, balance, housing, loan, contact, day, month, duration, campaign, pdays, previous, poutcome):
109
+ cleaned_data = clean_data([age, job, marital, education, default, balance, housing, loan, contact, day, month, duration, campaign, pdays, previous, poutcome])
110
+ url = "http://localhost:8000/predict/"
111
+ api_data = {"features": list(cleaned_data.values())}
112
+ print("API Request:")
113
+ print(api_data)
114
+ response = requests.post(url, json=api_data)
115
+ prediction = response.json()["prediction"][0]
116
+ return prediction
117
+
118
+ demo = gr.Interface(
119
+ fn=predict,
120
+ inputs=[
121
+ gr.Number(label="Age"),
122
+ gr.Text(label="Job"),
123
+ gr.Text(label="Marital"),
124
+ gr.Text(label="Education"),
125
+ gr.Text(label="Default"),
126
+ gr.Number(label="Balance"),
127
+ gr.Text(label="Housing"),
128
+ gr.Text(label="Loan"),
129
+ gr.Text(label="Contact"),
130
+ gr.Number(label="Day"),
131
+ gr.Text(label="Month"),
132
+ gr.Number(label="Duration"),
133
+ gr.Number(label="Campaign"),
134
+ gr.Number(label="Pdays"),
135
+ gr.Number(label="Previous"),
136
+ gr.Text(label="Poutcome"),
137
+ ],
138
+ outputs=gr.Text(label="Prediction"),
139
+ title="Bank Marketing Prediction",
140
+ description="This is a demo for bank marketing prediction. Please enter the required information to get the prediction."
141
+ )
142
+
143
+ if __name__ == "__main__":
144
+ demo.launch()
data_cleaning/bank.csv ADDED
The diff for this file is too large to render. See raw diff
 
data_cleaning/cleaned_bank_marketing.csv ADDED
The diff for this file is too large to render. See raw diff
 
data_cleaning/data_cleaning.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ bank_data = pd.read_csv('bank.csv')
3
+ bank_data.fillna('unknown', inplace=True)
4
+ from sklearn.preprocessing import LabelEncoder
5
+
6
+ le = LabelEncoder()
7
+ categorical_cols = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome']
8
+ for col in categorical_cols:
9
+ bank_data[col] = le.fit_transform(bank_data[col])
10
+
11
+
12
+ from sklearn.preprocessing import StandardScaler
13
+ scaler = StandardScaler()
14
+ bank_data[['age', 'balance', 'day', 'duration', 'campaign', 'pdays', 'previous']] = scaler.fit_transform(bank_data[['age', 'balance', 'day', 'duration', 'campaign', 'pdays', 'previous']])
15
+ bank_data.to_csv('cleaned_bank_marketing.csv', index=False)
flagged/log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Age,Job,Marital,Education,Default,Balance,Housing,Loan,Contact,Day,Month,Duration,Campaign,Pdays,Previous,Poutcome,Prediction,flag,username,timestamp
2
+ 54,admin.,married,secondary,no,2343,yes,no,unknown,5,may,1042,1,'-1,0,unknown,yes,,,2024-10-06 20:18:43.922400
model_building/model_building.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.model_selection import train_test_split
3
+ from sklearn.tree import DecisionTreeClassifier
4
+ from sklearn.metrics import accuracy_score, classification_report
5
+ from sklearn.ensemble import RandomForestClassifier
6
+ import model_building.model_io as model_io
7
+
8
+ model = RandomForestClassifier(n_estimators=100, random_state=42)
9
+
10
+ bank_data = pd.read_csv('cleaned_bank_marketing.csv')
11
+
12
+ X = bank_data.drop('deposit', axis=1)
13
+ y = bank_data['deposit']
14
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
15
+
16
+ # Train the model on the training data
17
+ model.fit(X_train, y_train)
18
+
19
+ # Make predictions on the testing data
20
+ y_pred = model.predict(X_test)
21
+
22
+ print("Accuracy:", accuracy_score(y_test, y_pred))
23
+ print("Classification Report:")
24
+ print(classification_report(y_test, y_pred))
25
+
26
+ # Save the trained model to a file
27
+ model_io.save_model(model, 'random_forest_model.joblib')
28
+
29
+ # Load the saved model from the file
30
+ loaded_model = model_io.load_model('random_forest_model.joblib')
31
+
32
+ # Make predictions using the loaded model
33
+ loaded_y_pred = loaded_model.predict(X_test)
34
+
35
+ print("Loaded Model Accuracy:", accuracy_score(y_test, loaded_y_pred))
36
+ print("Loaded Model Classification Report:")
37
+ print(classification_report(y_test, loaded_y_pred))
model_building/model_io.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import joblib
2
+
3
+ def save_model(model, filename):
4
+ joblib.dump(model, filename)
5
+
6
+ def load_model(filename):
7
+ return joblib.load(filename)
model_building/random_forest_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d3241175627742dfd089254908b546d5682554b47bd73616b9d1143a3368db7
3
+ size 20606745