Upload 11 files

Browse files

Files changed (11) hide show

RestApi/api.py +27 -0
__pycache__/api.cpython-312.pyc +0 -0
__pycache__/model_io.cpython-312.pyc +0 -0
app.py +144 -0
data_cleaning/bank.csv +0 -0
data_cleaning/cleaned_bank_marketing.csv +0 -0
data_cleaning/data_cleaning.py +15 -0
flagged/log.csv +2 -0
model_building/model_building.py +37 -0
model_building/model_io.py +7 -0
model_building/random_forest_model.joblib +3 -0

RestApi/api.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from fastapi import FastAPI
+import joblib
+import numpy as np
+app = FastAPI()
+# Load the trained model
+loaded_model = joblib.load('random_forest_model.joblib')
+@app.get("/")
+def read_root():
+    return {"message": "Welcome to the Bank Marketing Model API"}
+@app.post("/predict/")
+def predict(data: dict):
+    try:
+        # Convert the input data to a numpy array
+        input_data = np.array(data['features']).reshape(1, 16)
+        # Make predictions using the loaded model
+        prediction = loaded_model.predict(input_data)
+        # Return the prediction as a JSON response
+        return {"prediction": prediction.tolist()}
+    except Exception as e:
+        # Return a custom error message to the client
+        raise HTTPException(status_code=500, detail=str(e))

__pycache__/api.cpython-312.pyc ADDED Viewed

Binary file (1.35 kB). View file

__pycache__/model_io.cpython-312.pyc ADDED Viewed

Binary file (550 Bytes). View file

app.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import gradio as gr
+import requests
+import pandas as pd
+def clean_data(data):
+    # Define the mapping for categorical variables
+    job_map = {
+        "admin.": 0,
+        "blue-collar": 1,
+        "entrepreneur": 2,
+        "housemaid": 3,
+        "management": 4,
+        "retired": 5,
+        "self-employed": 6,
+        "services": 7,
+        "student": 8,
+        "technician": 9,
+        "unemployed": 10,
+        "unknown": 11
+    }
+    marital_map = {
+        "divorced": 0,
+        "married": 1,
+        "single": 2,
+        "unknown": 3
+    }
+    education_map = {
+        "primary": 0,
+        "secondary": 1,
+        "tertiary": 2,
+        "unknown": 3
+    }
+    default_map = {
+        "no": 0,
+        "yes": 1,
+        "unknown": 2
+    }
+    housing_map = {
+        "no": 0,
+        "yes": 1,
+        "unknown": 2
+    }
+    loan_map = {
+        "no": 0,
+        "yes": 1,
+        "unknown": 2
+    }
+    contact_map = {
+        "cellular": 0,
+        "telephone": 1,
+        "unknown": 2
+    }
+    month_map = {
+        "apr": 0,
+        "aug": 1,
+        "dec": 2,
+        "feb": 3,
+        "jan": 4,
+        "jul": 5,
+        "jun": 6,
+        "mar": 7,
+        "may": 8,
+        "nov": 9,
+        "oct": 10,
+        "sep": 11
+    }
+    poutcome_map = {
+        "failure": 0,
+        "nonexistent": 1,
+        "success": 2,
+        "unknown": 3
+    }
+    # Create a dictionary to store the cleaned data
+    cleaned_data = {}
+    # Clean the data
+    cleaned_data["age"] = data[0]
+    cleaned_data["job"] = job_map.get(data[1], 11)
+    cleaned_data["marital"] = marital_map.get(data[2], 3)
+    cleaned_data["education"] = education_map.get(data[3], 3)
+    cleaned_data["default"] = default_map.get(data[4], 2)
+    cleaned_data["balance"] = data[5] / 1000
+    cleaned_data["housing"] = housing_map.get(data[6], 2)
+    cleaned_data["loan"] = loan_map.get(data[7], 2)
+    cleaned_data["contact"] = contact_map.get(data[8], 2)
+    cleaned_data["day"] = data[9]
+    cleaned_data["month"] = month_map.get(data[10], 11)
+    cleaned_data["duration"] = data[11] / 100
+    cleaned_data["campaign"] = data[12]
+    cleaned_data["pdays"] = data[13] / 100
+    cleaned_data["previous"] = data[14]
+    cleaned_data["poutcome"] = poutcome_map.get(data[15], 3)
+    print("Cleaned Data:")
+    print(cleaned_data)
+    return cleaned_data
+def predict(age, job, marital, education, default, balance, housing, loan, contact, day, month, duration, campaign, pdays, previous, poutcome):
+    cleaned_data = clean_data([age, job, marital, education, default, balance, housing, loan, contact, day, month, duration, campaign, pdays, previous, poutcome])
+    url = "http://localhost:8000/predict/"
+    api_data = {"features": list(cleaned_data.values())}
+    print("API Request:")
+    print(api_data)
+    response = requests.post(url, json=api_data)
+    prediction = response.json()["prediction"][0]
+    return prediction
+demo = gr.Interface(
+    fn=predict,
+    inputs=[
+        gr.Number(label="Age"),
+        gr.Text(label="Job"),
+        gr.Text(label="Marital"),
+        gr.Text(label="Education"),
+        gr.Text(label="Default"),
+        gr.Number(label="Balance"),
+        gr.Text(label="Housing"),
+        gr.Text(label="Loan"),
+        gr.Text(label="Contact"),
+        gr.Number(label="Day"),
+        gr.Text(label="Month"),
+        gr.Number(label="Duration"),
+        gr.Number(label="Campaign"),
+        gr.Number(label="Pdays"),
+        gr.Number(label="Previous"),
+        gr.Text(label="Poutcome"),
+    ],
+    outputs=gr.Text(label="Prediction"),
+    title="Bank Marketing Prediction",
+    description="This is a demo for bank marketing prediction. Please enter the required information to get the prediction."
+)
+if __name__ == "__main__":
+    demo.launch()

data_cleaning/bank.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data_cleaning/cleaned_bank_marketing.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data_cleaning/data_cleaning.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import pandas as pd
+bank_data = pd.read_csv('bank.csv')
+bank_data.fillna('unknown', inplace=True)
+from sklearn.preprocessing import LabelEncoder
+le = LabelEncoder()
+categorical_cols = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome']
+for col in categorical_cols:
+    bank_data[col] = le.fit_transform(bank_data[col])
+from sklearn.preprocessing import StandardScaler
+scaler = StandardScaler()
+bank_data[['age', 'balance', 'day', 'duration', 'campaign', 'pdays', 'previous']] = scaler.fit_transform(bank_data[['age', 'balance', 'day', 'duration', 'campaign', 'pdays', 'previous']])
+bank_data.to_csv('cleaned_bank_marketing.csv', index=False)

flagged/log.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Age,Job,Marital,Education,Default,Balance,Housing,Loan,Contact,Day,Month,Duration,Campaign,Pdays,Previous,Poutcome,Prediction,flag,username,timestamp
2	+ 54,admin.,married,secondary,no,2343,yes,no,unknown,5,may,1042,1,'-1,0,unknown,yes,,,2024-10-06 20:18:43.922400

model_building/model_building.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.metrics import accuracy_score, classification_report
+from sklearn.ensemble import RandomForestClassifier
+import model_building.model_io as model_io
+model = RandomForestClassifier(n_estimators=100, random_state=42)
+bank_data = pd.read_csv('cleaned_bank_marketing.csv')
+X = bank_data.drop('deposit', axis=1)
+y = bank_data['deposit']
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Train the model on the training data
+model.fit(X_train, y_train)
+# Make predictions on the testing data
+y_pred = model.predict(X_test)
+print("Accuracy:", accuracy_score(y_test, y_pred))
+print("Classification Report:")
+print(classification_report(y_test, y_pred))
+# Save the trained model to a file
+model_io.save_model(model, 'random_forest_model.joblib')
+# Load the saved model from the file
+loaded_model = model_io.load_model('random_forest_model.joblib')
+# Make predictions using the loaded model
+loaded_y_pred = loaded_model.predict(X_test)
+print("Loaded Model Accuracy:", accuracy_score(y_test, loaded_y_pred))
+print("Loaded Model Classification Report:")
+print(classification_report(y_test, loaded_y_pred))

model_building/model_io.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import joblib
+def save_model(model, filename):
+    joblib.dump(model, filename)
+def load_model(filename):
+    return joblib.load(filename)

model_building/random_forest_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d3241175627742dfd089254908b546d5682554b47bd73616b9d1143a3368db7
+size 20606745