pricing-model / app.py
MarcinB1990's picture
Create app.py
8dcda84 verified
from huggingface_hub import hf_hub_download
import os
import json
import time
import joblib
import lightgbm
import numpy as np
import pandas as pd
import gradio as gr
from datetime import datetime, timedelta
from huggingface_hub import upload_file
from sklearn.ensemble import RandomForestRegressor
HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN is None:
raise ValueError("HF_TOKEN not set. Add it to Space Secrets.")
DATASET_REPO = "GFT-Poland/synthetic-pricing-dataset"
MODEL_REPO = "GFT-Poland/basic-pricing-model"
DATASET_FILE = "synthetic_pricing_dataset.csv"
MODEL_FILE = "model.joblib"
FEATURES = [
"age",
"monthly_income",
"living_costs",
"existing_liabilities",
"credit_limits",
"ltv",
"interest_rate",
"loan_term_years",
"installment_type"
]
TARGET = "max_loan_amount"
# =============================
# GENERATE DATASET
# =============================
def generate_dataset():
n = 1_000_000
seed = 42
import numpy as np
import pandas as pd
np.random.seed(seed)
rows = []
def installment_to_loan(m, rate, years, itype):
r = rate / 12
n = years * 12
if m <= 0:
return 0.0
if itype == 0:
return m * ((1 - (1 + r) ** -n) / r)
if itype == 1:
return m * n * 0.82
if itype == 2:
r = (rate + 0.05) / 12
return m * ((1 - (1 + r) ** -n) / r)
for _ in range(n):
age = np.random.randint(21, 65)
monthly_income = np.random.uniform(2000, 50000)
living_costs = (
900
+ 0.35 * (monthly_income ** 0.85)
+ np.random.uniform(-300, 300)
)
existing_liabilities = monthly_income * np.random.uniform(0.05, 0.30)
credit_limits = monthly_income * np.random.uniform(3, 9)
ltv = np.random.uniform(0.4, 0.95)
interest_rate = np.random.uniform(0.045, 0.095)
loan_term_years = np.random.randint(
10, min(35, 75 - age) + 1
)
installment_type = np.random.choice([0, 1, 2], p=[0.5, 0.3, 0.2])
disposable_income = (
monthly_income - living_costs - existing_liabilities
)
if disposable_income <= 0:
max_loan = 0
else:
dsr_base = 0.28 + 0.18 * np.tanh(monthly_income / 12000)
dsr_adj = 0
if ltv > 0.85:
dsr_adj -= 0.10
if interest_rate > 0.07:
dsr_adj -= 0.07
if installment_type == 2:
dsr_adj -= 0.05
if credit_limits > monthly_income * 6:
dsr_adj -= 0.05
dsr_limit = np.clip(dsr_base + dsr_adj, 0.25, 0.55)
max_installment = disposable_income * dsr_limit
loan_from_installment = installment_to_loan(
max_installment,
interest_rate,
loan_term_years,
installment_type
)
max_loan = min(
loan_from_installment,
monthly_income * 240,
credit_limits * 12
)
rows.append({
"age": age,
"monthly_income": round(monthly_income, 2),
"living_costs": round(living_costs, 2),
"existing_liabilities": round(existing_liabilities, 2),
"credit_limits": round(credit_limits, 2),
"ltv": round(ltv, 2),
"interest_rate": round(interest_rate, 4),
"loan_term_years": loan_term_years,
"installment_type": installment_type,
"max_loan_amount": round(max_loan, 2),
})
df = pd.DataFrame(rows)
df["update_ts"] = time.time()
df.to_csv(DATASET_FILE, index=False)
upload_file(
path_or_fileobj=DATASET_FILE,
path_in_repo=DATASET_FILE,
repo_id=DATASET_REPO,
repo_type="dataset",
token=HF_TOKEN,
)
return f"Creditworthiness dataset generated ({len(df)} rows)"
# =============================
# HELPER: DOWNLOAD IF NEWER
# =============================
def download_if_newer(repo_id, filename, local_path, repo_type="dataset"):
try:
hf_file = hf_hub_download(repo_id=repo_id, filename=filename, repo_type=repo_type, token=HF_TOKEN)
repo_mtime = os.path.getmtime(hf_file)
except Exception:
return f"No remote file found, using local {local_path}"
if os.path.exists(local_path):
local_mtime = os.path.getmtime(local_path)
if repo_mtime > local_mtime:
import shutil
shutil.copy2(hf_file, local_path)
return f"Updated local {local_path} from repo."
else:
return f"Local {local_path} is up-to-date."
else:
import shutil
shutil.copy2(hf_file, local_path)
return f"Downloaded {local_path} from repo."
# =============================
# TRAIN MODEL
# =============================
def train_model():
download_if_newer(
DATASET_REPO,
DATASET_FILE,
DATASET_FILE,
repo_type="dataset"
)
df = pd.read_csv(DATASET_FILE)
X = df[FEATURES]
y = df[TARGET]
from lightgbm import LGBMRegressor
model = LGBMRegressor(
n_estimators=600,
learning_rate=0.05,
max_depth=7,
num_leaves=40,
subsample=0.85,
colsample_bytree=0.85,
monotone_constraints=[
0, # age
+1, # income
-1, # living costs
-1, # liabilities
-1, # credit limits
-1, # LTV
-1, # interest rate
+1, # loan term
-1, # installment type risk
],
random_state=42,
n_jobs=-1,
)
model.fit(X, y)
joblib.dump(model, MODEL_FILE)
upload_file(
path_or_fileobj=MODEL_FILE,
path_in_repo=MODEL_FILE,
repo_id=MODEL_REPO,
repo_type="model",
token=HF_TOKEN,
)
return "Creditworthiness model trained and uploaded"
# =============================
# INVOKE MODEL
# =============================
def invoke_model(json_input):
try:
msg = download_if_newer(MODEL_REPO, MODEL_FILE, MODEL_FILE, repo_type="model")
data = json.loads(json_input)
model = joblib.load(MODEL_FILE)
X = pd.DataFrame([data], columns=FEATURES)
prediction = model.predict(X)[0]
return {"max_loan_amount": round(float(prediction), 2), "info": msg}
except Exception as e:
return {"error": str(e)}
# =============================
# UI
# =============================
with gr.Blocks() as app:
gr.Markdown("# HF Pricing Engine – MVP")
with gr.Row():
gen_btn = gr.Button("Generate dataset")
train_btn = gr.Button("Train model")
status = gr.Textbox(label="Status")
gen_btn.click(generate_dataset, outputs=status)
train_btn.click(train_model, outputs=status)
gr.Markdown("## Invoke model, for intallment typ: 0 = fixed 1 = decreasing 2 = variable")
json_input = gr.Textbox(
label="Input JSON",
lines=8,
value=json.dumps({
"age": 34,
"monthly_income": 10000,
"living_costs": 1000,
"existing_liabilities": 1000,
"credit_limits": 50000,
"ltv": 0.8,
"interest_rate": 0.065,
"loan_term_years": 30,
"installment_type": 0
}, indent=2),
)
invoke_btn = gr.Button("Invoke")
result = gr.JSON(label="Result")
invoke_btn.click(invoke_model, inputs=json_input, outputs=result)
app.launch()