Upload 13 files

Browse files

Files changed (13) hide show

.gitattributes +3 -35
.gitlab-ci.yml +13 -0
CREDIT SCORE CARD PRESENTATION.pptx +3 -0
Credit_Score (2).ipynb +0 -0
Credit_Score_Classifier.py +130 -0
Flask-keypair.pem +27 -0
README.md +89 -0
app.py +83 -0
clean_train.csv +0 -0
credit_classifier.joblib +3 -0
requirements.txt +5 -0
score_app.py +24 -0
train.csv.zip +3 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,3 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+credit_classifier.joblib filter=lfs diff=lfs merge=lfs -text
+CREDIT[[:space:]]SCORE[[:space:]]CARD[[:space:]]PRESENTATION.pptx filter=lfs diff=lfs merge=lfs -text
+train.csv.zip filter=lfs diff=lfs merge=lfs -text

.gitlab-ci.yml ADDED Viewed

	@@ -0,0 +1,13 @@

+stages:
+  - deploy
+deploy_to_streamlit:
+  stage: deploy
+  script:
+    - echo "Deploying to Streamlit..."
+    - curl -X POST "https://api.streamlit.io/deploy" \
+      -H "Authorization: Bearer $STREAMLIT_API_TOKEN" \
+      -H "Content-Type: application/json" \
+      -d '{"repository": "https://gitlab.com/rosey-team/credit-score/-/tree/main", "branch": "main"}'
+  only:
+    - main

CREDIT SCORE CARD PRESENTATION.pptx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:861cd7621885df16ea52a7809786001b34fd4e96bb5a68ec16bccf5dddfe9d6f
+size 4090426

Credit_Score (2).ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Credit_Score_Classifier.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""i
+mport streamlit as st
+import pandas as pd
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from joblib import dump, load
+# Load and process data
+data = pd.read_csv("C:/Users/lenovo/Downloads/Create_score_model/clean_train.csv")
+X = data.drop('Credit_Score', axis=1)
+y = data['Credit_Score']
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+X_train.head()
+# Train the model
+rf = RandomForestClassifier(random_state=42)
+rf.fit(X_train, y_train)
+# Save the model
+#dump(rf, 'credit_classifier.joblib', compress=('gzip', 9))
+# Streamlit App
+# %%writefile scoring_app.py
+loaded_model = load(rf)
+# Streamlit interface
+st.title("Credit Score Prediction App")
+# Input fields
+feature1 = st.number_input("Outstanding Debt")
+feature2 = st.number_input("Credit Mix")
+feature3 = st.number_input("Credit History Age (in months)")
+feature4 = st.number_input("Monthly Balance")
+feature5 = st.number_input("Payment Behaviour")
+feature6 = st.number_input("Annual Income")
+feature7 = st.number_input("Number of Delayed Payments")
+# Define target names
+target_names = {0: "Good", 1: "Poor", 2: "Standard"}
+if st.button("Predict"):
+    prediction = loaded_model.predict([[feature1, feature2, feature3, feature4, feature5, feature6, feature7]])
+    st.write(f"Predicted Class: {target_names[prediction[0]]}")
+"""
+import streamlit as st
+import pandas as pd
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from joblib import dump, load
+@st.cache_resource
+def load_and_train_model():
+    try:
+        # Load and process data
+        data = pd.read_csv("clean_train.csv")
+    except FileNotFoundError:
+        st.error("Error: The file 'clean_train.csv' was not found. Please upload the file.")
+        st.stop()
+    # Splitting data into features and target
+    X = data.drop('Credit_Score', axis=1)
+    y = data['Credit_Score']
+    # Train-test split
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    # Train the Random Forest model
+    rf = RandomForestClassifier(random_state=42)
+    rf.fit(X_train, y_train)
+    # Save the trained model to a file
+    dump(rf, 'credit_classifier.joblib')
+    return rf
+@st.cache_resource
+def load_model():
+    import os
+    if os.path.exists('credit_classifier.joblib'):
+        try:
+            return load('credit_classifier.joblib')
+        except Exception as e:
+            st.warning(f"Error loading model: {e}. Retraining the model...")
+            return load_and_train_model()
+    else:
+        st.warning("Model file not found. Training a new model...")
+        return load_and_train_model()
+# Load the trained model
+loaded_model = load_model()
+# Streamlit App
+st.title("Credit Score Prediction App")
+st.markdown("Predict the credit score category based on user inputs.")
+# Input fields
+st.sidebar.header("Enter Feature Values:")
+feature1 = st.sidebar.number_input("Outstanding Debt", min_value=0.0, step=100.0)
+feature2 = st.sidebar.selectbox("Credit Mix (0=Bad, 1=Good, 2=Excellent)", [0, 1, 2])
+feature3 = st.sidebar.number_input("Credit History Age (in months)", min_value=0.0, step=1.0)
+feature4 = st.sidebar.number_input("Monthly Balance", min_value=0.0, step=100.0)
+feature5 = st.sidebar.number_input("Payment Behaviour", min_value=0.0, step=1.0)
+feature6 = st.sidebar.number_input("Annual Income", min_value=0.0, step=1000.0)
+feature7 = st.sidebar.number_input("Number of Delayed Payments", min_value=0, step=1)
+# Define target names
+target_names = {0: "Good", 1: "Poor", 2: "Standard"}
+# Predict Button
+if st.button("Predict"):
+    try:
+        # Prepare the input as a 2D array
+        inputs = np.array([[feature1, feature2, feature3, feature4, feature5, feature6, feature7]])
+        # Make prediction
+        prediction = loaded_model.predict(inputs)
+        # Display result
+        st.success(f"Predicted Credit Score: {target_names[prediction[0]]}")
+    except Exception as e:
+        st.error(f"An error occurred during prediction: {e}")

Flask-keypair.pem ADDED Viewed

	@@ -0,0 +1,27 @@

+-----BEGIN RSA PRIVATE KEY-----
+MIIEpAIBAAKCAQEA1oOm76qQhW3n++IWurUdp1r46/JtIsyyZms6wVHLDPWbAhBe
+FqMIjwEvs2ia3AROIf2RZVFKQUuS5OvC4/J5DfmHWlXmhYNBsLuMgSpRK0F0Utwc
+3Skl7vDNkN+2n111RFzOY3fMvuqh2DR5v29qETa1M/46FvPZ1xc0p36VXQy0MIEA
+QekaD0cU3qWTXRvzdojtXjFLckdSRmcYzgIUG9oRLioJPg3gkz/AdjXG+yh9l5b1
+jdb8XFmlpiFwfBQCog4mIukNegYni7Fnhjh9AmXcHBt/s4NgZrhZKoCoZ6LBZimb
+bx6LGA0y+KcoebpeRqM9GfrjKOy7d4k2lRwu7QIDAQABAoIBAQCnp8uQI1nCvwyB
+w8mJ8UYU57gHCLkdaqi//Wy1bQzquwaETXCL9q4Yvo+eg9IMxVyfmkPJ0QfdEojj
+XBQ8/eR49J5CD0MU9GoiC+MbjIxV51lX7Wqtu5xHLHMtmHtuAKhHXtV7zkT6rf0C
+Mrk40oYun/htg3O5PqryPdPMWNW6G2ZCJZNM9GiEe+6IrrGQqUYaWzYucmHsrzgl
+Nfx9H9hAcCuC0W3buypH9AoaPK62YAGBmW+Yz07sgSSn11KifGBvfL3XLTrSg2lj
+z46oncCqw55MC8dVoFGjlWhm9LZJYmqU7+RCzpEjyv0ZC3g1jBRZWmZGZJcOcdPv
+ACc+QfyNAoGBAPD0f8odH1bJsgshltbn/hmycq0W2IrDI9vHT+R0mLdoXYadxpKA
+619UzVJEgOuEO9It422ZECu2hdRNXGo9TueB0fbotIm1N3/jb45361zyXUJN8J/o
+Eyw2cPp/bGrcRIE3R4OanyAcR1G7nrmJzfxQZaTmvrEumXJ5cjBSgp/zAoGBAOPo
+g8prlJ9Jf5JekK/GokwpMFpKLDbOK4pYnaLHRlwOEn1xyAjAzFSERZGNo8yuKcpE
+aCk4Am/ZMOYWnKN6BoysznpTtE6r/cLVwkIDDw8tTh86MBM91o443rpQWcfaKY64
+GsJERIrVaHa0+dVD4KNT8YSBFsDao49wgzgHFo2fAoGBAKdIZ6uvGkHLl6Mot6sN
+txiqDfljQgTcQ3ni3vFxjVDP0RaTVPgDLuWpXz59Nq/Lxyg9GYbOXC6s8i6ZYs6T
+L8OEGnv7xNNSD2znPly/npSR4vMUXzj1CtKIHVmbu25Y6+p1sV2JrG3DlDQHOMQB
+CxPf0SFx3PPvsTKLZB5uOifHAoGAAgt4Azzr3HIAXwPSMIGLuzszX9wCgYtgAKHC
+6E75ZqIJsG5w6QMx0iBNr3yh15hIb7QlaxlBjFdahPX2+dCCdoimYZqWp44LfNAG
+Kil03SH+7XDMNZ/8aNJBzVIjPBPNA6bLNqr+InC/uzDsfOla/pwmMpYl6h0MEqFj
+zADkRukCgYA9hqWdcfa0x+PKK5Q5t4wdLvwqdHUA/fpk7I5Fk/cBquXmXfeQEzTM
+vYctKyOPAZucG6TDiSTE/g0HSzg/tLFnlRTXduSTq/l07XoPygk0PAPDHqgFqkrc
+7KdrLgXpc9YIzPZ+fEXdZc0rmETasPaFLfepmzGsv/bAPVpCsYjUXQ==
+-----END RSA PRIVATE KEY-----

README.md ADDED Viewed

	@@ -0,0 +1,89 @@

+# **Credit Scorecard Project**
+### **Table of Contents**
+1. Project Overview
+2. Features
+3. Technologies Used
+4. Installation
+5. Usage
+6. Data
+7. Model
+8. Evaluation
+9. Future Enhancements
+10. Contributing
+11. Contact
+### **Project Overview**
+**BUSINESS PROBLEM:**
+A credit company currently faces a high risk of loan defaults, which directly impacts profitability and increases operational costs associated with debt recovery. Many customers who are approved for credit loans may not have the financial stability or creditworthiness to manage debt effectively, leading to missed payments and default. The company needs a robust way to evaluate applicants’ eligibility for credit loans to reduce the likelihood of default and ensure that only financially responsible customers are approved.
+**BUSINESS OBJECTIVES:**
+1. Minimize Loan Default Rates: Implement a data-driven model to predict and identify high-risk customers likely to default on loans, thereby reducing the overall loan default rate.
+2. Improve Loan Eligibility Assessment: Develop a systematic method for assessing creditworthiness by using key financial and behavioral indicators (such as outstanding debt, credit history, income level, and payment behavior) to make more accurate loan approval decisions.
+3. Enhance Profitability through Risk Management: By approving loans primarily for customers with strong credit profiles, the company can decrease losses due to defaults and increase profitability.
+4. Increase Customer Retention of Low-Risk Borrowers: Retain and attract low-risk customers by offering favorable loan terms to those who demonstrate financial responsibility, thereby improving customer loyalty and satisfaction among reliable borrowers.
+5. Support Financial Health of Borderline Customers: Provide resources or alternative loan products for borderline customers who are not immediately eligible for standard loans. This could include smaller loans or financial education programs to help these customers improve their creditworthiness, creating a pipeline of future eligible customers.
+**DATA UNDERSTANDING:**
+1. Identification Columns: ID, Customer_ID: Unique identifiers for records and customers, likely strings.
+2. Demographic Information: Name, Age, SSN, Occupation: Descriptive attributes about customers. Financial and Credit Attributes:
+3. Income Inofrmation: Annual_Income, Monthly_Inhand_Salary, Num_Bank_Accounts, Num_Credit_Card: Count of banking products, possibly integers. Interest_Rate, Outstanding_Debt, Credit_Utilization_Ratio: Financial metrics, potentially floats. Num_of_Loan, Type_of_Loan: Loan-related info, with Type_of_Loan listing loan types.
+4. Behavioral and Payment Information: Delay_from_due_date, Num_of_Delayed_Payment, Changed_Credit_Limit, Num_Credit_Inquiries: Indicators of payment history and credit management. Payment_Behaviour, Payment_of_Min_Amount: Details on payment patterns, possibly strings or categories. Monthly_Balance, Total_EMI_per_month, Amount_invested_monthly: Monthly financial metrics, possibly floats.
+5. The source of the data is Kaggle through their website https://www.kaggle.com/datasets
+### **Features**
+1. **Predictive Model**: Uses machine learning to predict credit scores.
+2. **Data Preprocessing**: Includes handling missing data, outliers, and scaling features.
+3. **Model Explainability**: Provides insights into the model's decisions using feature importance.
+4. **Performance Metrics**: Accuracy, Precision, Recall, F1-Score, and AUC-ROC.
+### **Technologies Used**
+1. Programming Language: Python 3.13
+2. Libraries:
+3. Pandas
+4. NumPy
+5. Scikit-learn
+6. XGBoost
+7. Matplotlib & Seaborn for visualization
+### **Data**
+The dataset includes anonymized financial information with features like income, credit history, debt, and other personal details.
+![image](https://github.com/user-attachments/assets/45aaa2f7-6bde-4815-9ca2-0835ef53d7da)
+Most of the customers have a standard credit mix. However ~20000 individuals have credit mixes whose information may either be ambiguous,
+inconsistent or not enough data to determine their where thy fall. We keep this in mind.
+![image](https://github.com/user-attachments/assets/10cf1c30-40b7-4714-b471-d1161696c51b)
+The customers have an average of 1300 of outstanding debt regardless of occupation.
+### **Model**
+The model uses [e.g., Logistic Regression, Decision Trees, XGBoost] to predict credit scores. Hyperparameter tuning and cross-validation ensure optimal performance.
+### **Evaluation**
+The model is evaluated using various performance metrics to ensure its reliability and accuracy. Confusion matrix, ROC curve, and classification reports are generated.
+![image](https://github.com/user-attachments/assets/7014d382-e98d-444c-930e-68c4d8b3c6a9)
+### **Results:**
+![image](https://github.com/user-attachments/assets/5814f037-1087-4524-9b84-f70d8773a292)
+### **Future Enhancements**
+Incorporate more diverse datasets for robustness.
+Implement deep learning models.
+Develop a user-friendly web interface for real-time credit scoring.
+Contributing
+Contributions are welcome! Please fork the repository and submit a pull request.
+### **Contact**
+For any inquiries, please contact:
+wahomeshiko@gmail.com

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+from flask import Flask, request, render_template
+import pandas as pd
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from joblib import dump, load
+import os
+# Existing app initialization
+app = Flask(__name__)
+# Function to load and preprocess the data
+def load_and_train_model():
+    # Load and process data
+    data = pd.read_csv("clean_train.csv")
+    X = data.drop('Credit_Score', axis=1)
+    y = data['Credit_Score']
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    # Train the model
+    rf = RandomForestClassifier(random_state=42)
+    rf.fit(X_train, y_train)
+    # Save the model to a file
+    dump(rf, 'credit_classifier.joblib', compress=('gzip', 9))
+    return rf
+# Load the trained model (or train it if necessary)
+def load_model():
+    if os.path.exists('credit_classifier.joblib'):
+        return load('credit_classifier.joblib')
+    else:
+        return load_and_train_model()
+loaded_model = load_model()
+# Define target names
+target_names = {0: "Good", 1: "Poor", 2: "Standard"}
+# Flask routes
+@app.route("/")
+def home():
+    return render_template("index.html")
+@app.route('/predict', methods=['POST'])
+def predict():
+    try:
+        # Extract input values from the form
+        outstanding_debt = float(request.form['outstanding_debt'])
+        credit_mix = int(request.form['credit_mix'])
+        credit_history_age = float(request.form['credit_history_age'])
+        monthly_balance = float(request.form['monthly_balance'])
+        payment_behaviour = float(request.form['payment_behaviour'])
+        annual_income = float(request.form['annual_income'])
+        delayed_payments = int(request.form['delayed_payments'])
+        # Prepare input data for prediction
+        input_data = [[
+            outstanding_debt, credit_mix, credit_history_age,
+            monthly_balance, payment_behaviour, annual_income,
+            delayed_payments
+        ]]
+        # Predict using the loaded model
+        prediction = loaded_model.predict(input_data)[0]  # Use loaded_model here
+        # Map the prediction to the category name
+        target_names = {0: "Good", 1: "Poor", 2: "Standard"}
+        prediction_text = target_names.get(prediction, "Unknown")
+        # Render result.html with the prediction
+        return render_template('result.html', prediction=prediction_text)
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+#if __name__ == "__main__":
+    #app.run(debug=True)
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=8080)

clean_train.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

credit_classifier.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9c5d5fc7bc20dd744e240340ce7d6cf51caf56c4c8db27494d3bce0f83a545d
+size 33489157

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+scikit-learn
+streamlit
+pandas
+joblib
+numpy

score_app.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import streamlit as st
+import pickle
+# Load the model
+with open('credit_model.pkl', 'rb') as f:
+    rf = pickle.load(f)
+# Use 'rf' for predictions in your Streamlit app
+st.title("Credit Score Prediction App")
+# Input fields
+feature1 = st.number_input("Outstanding_Debt")
+feature2 = st.number_input("Credit_Mix")
+feature3 = st.number_input("Credit_History_Age_in_months")
+feature4 = st.number_input("Monthly_Balance")
+feature5 = st.number_input("Paymnet_Behaviour")
+feature6 = st.number_input("Annual_Income")
+feature7 = st.number_input("Number_of_Delayed_Payment")
+if st.button("Predict"):
+    prediction = rf.predict([[feature1, feature2, feature3, feature4, feature5, feature6, feature7]])
+    st.write(f"Predicted Class: {data.target_names[prediction[0]]}")

train.csv.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1061b6701bbd529b261fd89fa68fd6cab618b4ba100778c65610ff3b0171fbbe
+size 6382379