Spaces:

GFT-Poland
/

pricing-model

Sleeping

App Files Files Community

pricing-model / app.py

MarcinB1990

Create app.py

8dcda84 verified 2 months ago

raw

history blame contribute delete

7.67 kB

	from huggingface_hub import hf_hub_download
	import os
	import json
	import time
	import joblib
	import lightgbm
	import numpy as np
	import pandas as pd
	import gradio as gr
	from datetime import datetime, timedelta
	from huggingface_hub import upload_file
	from sklearn.ensemble import RandomForestRegressor

	HF_TOKEN = os.environ.get("HF_TOKEN")
	if HF_TOKEN is None:
	raise ValueError("HF_TOKEN not set. Add it to Space Secrets.")

	DATASET_REPO = "GFT-Poland/synthetic-pricing-dataset"
	MODEL_REPO = "GFT-Poland/basic-pricing-model"

	DATASET_FILE = "synthetic_pricing_dataset.csv"
	MODEL_FILE = "model.joblib"

	FEATURES = [
	"age",
	"monthly_income",
	"living_costs",
	"existing_liabilities",
	"credit_limits",
	"ltv",
	"interest_rate",
	"loan_term_years",
	"installment_type"
	]

	TARGET = "max_loan_amount"

	# =============================
	# GENERATE DATASET
	# =============================
	def generate_dataset():
	n = 1_000_000
	seed = 42
	import numpy as np
	import pandas as pd

	np.random.seed(seed)
	rows = []

	def installment_to_loan(m, rate, years, itype):
	r = rate / 12
	n = years * 12

	if m <= 0:
	return 0.0

	if itype == 0:
	return m * ((1 - (1 + r) ** -n) / r)

	if itype == 1:
	return m * n * 0.82

	if itype == 2:
	r = (rate + 0.05) / 12
	return m * ((1 - (1 + r) ** -n) / r)

	for _ in range(n):

	age = np.random.randint(21, 65)

	monthly_income = np.random.uniform(2000, 50000)

	living_costs = (
	900
	+ 0.35 * (monthly_income ** 0.85)
	+ np.random.uniform(-300, 300)
	)

	existing_liabilities = monthly_income * np.random.uniform(0.05, 0.30)
	credit_limits = monthly_income * np.random.uniform(3, 9)

	ltv = np.random.uniform(0.4, 0.95)
	interest_rate = np.random.uniform(0.045, 0.095)

	loan_term_years = np.random.randint(
	10, min(35, 75 - age) + 1
	)

	installment_type = np.random.choice([0, 1, 2], p=[0.5, 0.3, 0.2])

	disposable_income = (
	monthly_income - living_costs - existing_liabilities
	)

	if disposable_income <= 0:
	max_loan = 0
	else:
	dsr_base = 0.28 + 0.18 * np.tanh(monthly_income / 12000)

	dsr_adj = 0
	if ltv > 0.85:
	dsr_adj -= 0.10
	if interest_rate > 0.07:
	dsr_adj -= 0.07
	if installment_type == 2:
	dsr_adj -= 0.05
	if credit_limits > monthly_income * 6:
	dsr_adj -= 0.05

	dsr_limit = np.clip(dsr_base + dsr_adj, 0.25, 0.55)

	max_installment = disposable_income * dsr_limit

	loan_from_installment = installment_to_loan(
	max_installment,
	interest_rate,
	loan_term_years,
	installment_type
	)

	max_loan = min(
	loan_from_installment,
	monthly_income * 240,
	credit_limits * 12
	)

	rows.append({
	"age": age,
	"monthly_income": round(monthly_income, 2),
	"living_costs": round(living_costs, 2),
	"existing_liabilities": round(existing_liabilities, 2),
	"credit_limits": round(credit_limits, 2),
	"ltv": round(ltv, 2),
	"interest_rate": round(interest_rate, 4),
	"loan_term_years": loan_term_years,
	"installment_type": installment_type,
	"max_loan_amount": round(max_loan, 2),
	})

	df = pd.DataFrame(rows)
	df["update_ts"] = time.time()

	df.to_csv(DATASET_FILE, index=False)

	upload_file(
	path_or_fileobj=DATASET_FILE,
	path_in_repo=DATASET_FILE,
	repo_id=DATASET_REPO,
	repo_type="dataset",
	token=HF_TOKEN,
	)

	return f"Creditworthiness dataset generated ({len(df)} rows)"


	# =============================
	# HELPER: DOWNLOAD IF NEWER
	# =============================
	def download_if_newer(repo_id, filename, local_path, repo_type="dataset"):
	try:
	hf_file = hf_hub_download(repo_id=repo_id, filename=filename, repo_type=repo_type, token=HF_TOKEN)
	repo_mtime = os.path.getmtime(hf_file)
	except Exception:
	return f"No remote file found, using local {local_path}"

	if os.path.exists(local_path):
	local_mtime = os.path.getmtime(local_path)
	if repo_mtime > local_mtime:
	import shutil
	shutil.copy2(hf_file, local_path)
	return f"Updated local {local_path} from repo."
	else:
	return f"Local {local_path} is up-to-date."
	else:
	import shutil
	shutil.copy2(hf_file, local_path)
	return f"Downloaded {local_path} from repo."


	# =============================
	# TRAIN MODEL
	# =============================
	def train_model():
	download_if_newer(
	DATASET_REPO,
	DATASET_FILE,
	DATASET_FILE,
	repo_type="dataset"
	)

	df = pd.read_csv(DATASET_FILE)

	X = df[FEATURES]
	y = df[TARGET]

	from lightgbm import LGBMRegressor

	model = LGBMRegressor(
	n_estimators=600,
	learning_rate=0.05,
	max_depth=7,
	num_leaves=40,
	subsample=0.85,
	colsample_bytree=0.85,
	monotone_constraints=[
	0, # age
	+1, # income
	-1, # living costs
	-1, # liabilities
	-1, # credit limits
	-1, # LTV
	-1, # interest rate
	+1, # loan term
	-1, # installment type risk
	],
	random_state=42,
	n_jobs=-1,
	)

	model.fit(X, y)

	joblib.dump(model, MODEL_FILE)

	upload_file(
	path_or_fileobj=MODEL_FILE,
	path_in_repo=MODEL_FILE,
	repo_id=MODEL_REPO,
	repo_type="model",
	token=HF_TOKEN,
	)

	return "Creditworthiness model trained and uploaded"



	# =============================
	# INVOKE MODEL
	# =============================
	def invoke_model(json_input):
	try:
	msg = download_if_newer(MODEL_REPO, MODEL_FILE, MODEL_FILE, repo_type="model")

	data = json.loads(json_input)
	model = joblib.load(MODEL_FILE)

	X = pd.DataFrame([data], columns=FEATURES)
	prediction = model.predict(X)[0]

	return {"max_loan_amount": round(float(prediction), 2), "info": msg}

	except Exception as e:
	return {"error": str(e)}


	# =============================
	# UI
	# =============================
	with gr.Blocks() as app:
	gr.Markdown("# HF Pricing Engine – MVP")

	with gr.Row():
	gen_btn = gr.Button("Generate dataset")
	train_btn = gr.Button("Train model")

	status = gr.Textbox(label="Status")
	gen_btn.click(generate_dataset, outputs=status)
	train_btn.click(train_model, outputs=status)

	gr.Markdown("## Invoke model, for intallment typ: 0 = fixed 1 = decreasing 2 = variable")
	json_input = gr.Textbox(
	label="Input JSON",
	lines=8,
	value=json.dumps({
	"age": 34,
	"monthly_income": 10000,
	"living_costs": 1000,
	"existing_liabilities": 1000,
	"credit_limits": 50000,
	"ltv": 0.8,
	"interest_rate": 0.065,
	"loan_term_years": 30,
	"installment_type": 0
	}, indent=2),
	)

	invoke_btn = gr.Button("Invoke")
	result = gr.JSON(label="Result")
	invoke_btn.click(invoke_model, inputs=json_input, outputs=result)

	app.launch()