tharu22 commited on
Commit
de6c861
·
1 Parent(s): efc0627
Files changed (4) hide show
  1. Dockerfile +16 -0
  2. credit_risk_dataset.csv +0 -0
  3. main.py +94 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9-SLIM
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
credit_risk_dataset.csv ADDED
The diff for this file is too large to render. See raw diff
 
main.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ import pandas as pd
3
+ from pydantic import BaseModel
4
+ import joblib
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.ensemble import RandomForestClassifier
7
+ from sklearn.linear_model import LogisticRegression
8
+
9
+ # Initialize FastAPI app
10
+ app = FastAPI()
11
+
12
+ # Load dataset
13
+ DATASET_PATH = "credit_risk_dataset.csv" # Update with actual dataset path
14
+ df = pd.read_csv(DATASET_PATH)
15
+
16
+ # Prepare data for ML training
17
+ FEATURES = ["loan_amnt", "loan_int_rate", "person_age", "person_income", "cb_person_cred_hist_length"]
18
+ X = df[FEATURES]
19
+ y = df["loan_status"]
20
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
21
+
22
+ # Train models
23
+ classifier = RandomForestClassifier(n_estimators=100, random_state=42)
24
+ classifier.fit(X_train, y_train)
25
+ joblib.dump(classifier, "models/risk_classifier.pkl")
26
+
27
+ regressor = LogisticRegression()
28
+ regressor.fit(X_train, y_train)
29
+ joblib.dump(regressor, "models/past_due_regressor.pkl")
30
+
31
+ # Load ML models
32
+ classifier = joblib.load("models/risk_classifier.pkl")
33
+ regressor = joblib.load("models/past_due_regressor.pkl")
34
+
35
+ # Pydantic models for validation
36
+ class LoanRequest(BaseModel):
37
+ loan_amnt: float
38
+ loan_int_rate: float
39
+ person_age: int
40
+ person_income: float
41
+ person_home_ownership: str
42
+ cb_person_cred_hist_length: int
43
+
44
+ # API Endpoints
45
+ @app.get("/loan_status_distribution")
46
+ def loan_status_distribution():
47
+ if "loan_status" not in df.columns:
48
+ raise HTTPException(status_code=400, detail="Missing 'loan_status' column")
49
+
50
+ status_counts = df["loan_status"].value_counts(normalize=True) * 100
51
+ return {"default_percentage": status_counts.get(1, 0), "non_default_percentage": status_counts.get(0, 0)}
52
+
53
+ @app.get("/payment_timeline_analysis")
54
+ def payment_timeline_analysis():
55
+ grouped = df.groupby("loan_status")["loan_amnt"].mean().to_dict()
56
+ return {"average_loan_amount_by_status": grouped}
57
+
58
+ @app.get("/principal_amount_patterns")
59
+ def principal_amount_patterns():
60
+ demographic_defaults = df.groupby(["person_age", "person_income", "person_home_ownership"])['loan_status'].mean().to_dict()
61
+ return {"demographic_default_rates": demographic_defaults}
62
+
63
+ @app.get("/credit_history_impact")
64
+ def credit_history_impact():
65
+ history_impact = df.groupby("cb_person_cred_hist_length")["loan_status"].mean().to_dict()
66
+ return {"credit_history_default_rates": history_impact}
67
+
68
+ @app.get("/customer_profile_analysis")
69
+ def customer_profile_analysis():
70
+ profile_analysis = df.groupby(["person_age", "person_income", "person_home_ownership"])["loan_status"].mean().to_dict()
71
+ return {"customer_profile_default_rates": profile_analysis}
72
+
73
+ @app.get("/loan_intent_analysis")
74
+ def loan_intent_analysis():
75
+ intent_defaults = df.groupby("loan_intent")["loan_status"].mean().to_dict()
76
+ return {"loan_intent_default_rates": intent_defaults}
77
+
78
+ @app.get("/collection_effectiveness")
79
+ def collection_effectiveness():
80
+ success_rate = df.groupby("cb_person_default_on_file")["loan_status"].mean().to_dict()
81
+ return {"collection_success_rate": success_rate}
82
+
83
+ @app.get("/risk_score_development")
84
+ def risk_score_development():
85
+ risk_factors = df.groupby(["loan_amnt", "loan_int_rate", "person_age", "person_income", "cb_person_cred_hist_length"])["loan_status"].mean().to_dict()
86
+ return {"risk_scores": risk_factors}
87
+
88
+ @app.post("/predict_loan_risk")
89
+ def predict_loan_risk(request: LoanRequest):
90
+ input_data = [[request.loan_amnt, request.loan_int_rate, request.person_age, request.person_income, request.cb_person_cred_hist_length]]
91
+ risk_class = classifier.predict(input_data)[0]
92
+ risk_prob = regressor.predict_proba(input_data)[0][1]
93
+ return {"predicted_risk_category": int(risk_class), "default_probability": float(risk_prob)}
94
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pandas
4
+ pydantic
5
+ joblib
6
+ scikit-learn