Spaces:
Sleeping
Sleeping
| from huggingface_hub import hf_hub_download | |
| import os | |
| import json | |
| import time | |
| import joblib | |
| import lightgbm | |
| import numpy as np | |
| import pandas as pd | |
| import gradio as gr | |
| from datetime import datetime, timedelta | |
| from huggingface_hub import upload_file | |
| from sklearn.ensemble import RandomForestRegressor | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| if HF_TOKEN is None: | |
| raise ValueError("HF_TOKEN not set. Add it to Space Secrets.") | |
| DATASET_REPO = "GFT-Poland/synthetic-pricing-dataset" | |
| MODEL_REPO = "GFT-Poland/basic-pricing-model" | |
| DATASET_FILE = "synthetic_pricing_dataset.csv" | |
| MODEL_FILE = "model.joblib" | |
| FEATURES = [ | |
| "age", | |
| "monthly_income", | |
| "living_costs", | |
| "existing_liabilities", | |
| "credit_limits", | |
| "ltv", | |
| "interest_rate", | |
| "loan_term_years", | |
| "installment_type" | |
| ] | |
| TARGET = "max_loan_amount" | |
| # ============================= | |
| # GENERATE DATASET | |
| # ============================= | |
| def generate_dataset(): | |
| n = 1_000_000 | |
| seed = 42 | |
| import numpy as np | |
| import pandas as pd | |
| np.random.seed(seed) | |
| rows = [] | |
| def installment_to_loan(m, rate, years, itype): | |
| r = rate / 12 | |
| n = years * 12 | |
| if m <= 0: | |
| return 0.0 | |
| if itype == 0: | |
| return m * ((1 - (1 + r) ** -n) / r) | |
| if itype == 1: | |
| return m * n * 0.82 | |
| if itype == 2: | |
| r = (rate + 0.05) / 12 | |
| return m * ((1 - (1 + r) ** -n) / r) | |
| for _ in range(n): | |
| age = np.random.randint(21, 65) | |
| monthly_income = np.random.uniform(2000, 50000) | |
| living_costs = ( | |
| 900 | |
| + 0.35 * (monthly_income ** 0.85) | |
| + np.random.uniform(-300, 300) | |
| ) | |
| existing_liabilities = monthly_income * np.random.uniform(0.05, 0.30) | |
| credit_limits = monthly_income * np.random.uniform(3, 9) | |
| ltv = np.random.uniform(0.4, 0.95) | |
| interest_rate = np.random.uniform(0.045, 0.095) | |
| loan_term_years = np.random.randint( | |
| 10, min(35, 75 - age) + 1 | |
| ) | |
| installment_type = np.random.choice([0, 1, 2], p=[0.5, 0.3, 0.2]) | |
| disposable_income = ( | |
| monthly_income - living_costs - existing_liabilities | |
| ) | |
| if disposable_income <= 0: | |
| max_loan = 0 | |
| else: | |
| dsr_base = 0.28 + 0.18 * np.tanh(monthly_income / 12000) | |
| dsr_adj = 0 | |
| if ltv > 0.85: | |
| dsr_adj -= 0.10 | |
| if interest_rate > 0.07: | |
| dsr_adj -= 0.07 | |
| if installment_type == 2: | |
| dsr_adj -= 0.05 | |
| if credit_limits > monthly_income * 6: | |
| dsr_adj -= 0.05 | |
| dsr_limit = np.clip(dsr_base + dsr_adj, 0.25, 0.55) | |
| max_installment = disposable_income * dsr_limit | |
| loan_from_installment = installment_to_loan( | |
| max_installment, | |
| interest_rate, | |
| loan_term_years, | |
| installment_type | |
| ) | |
| max_loan = min( | |
| loan_from_installment, | |
| monthly_income * 240, | |
| credit_limits * 12 | |
| ) | |
| rows.append({ | |
| "age": age, | |
| "monthly_income": round(monthly_income, 2), | |
| "living_costs": round(living_costs, 2), | |
| "existing_liabilities": round(existing_liabilities, 2), | |
| "credit_limits": round(credit_limits, 2), | |
| "ltv": round(ltv, 2), | |
| "interest_rate": round(interest_rate, 4), | |
| "loan_term_years": loan_term_years, | |
| "installment_type": installment_type, | |
| "max_loan_amount": round(max_loan, 2), | |
| }) | |
| df = pd.DataFrame(rows) | |
| df["update_ts"] = time.time() | |
| df.to_csv(DATASET_FILE, index=False) | |
| upload_file( | |
| path_or_fileobj=DATASET_FILE, | |
| path_in_repo=DATASET_FILE, | |
| repo_id=DATASET_REPO, | |
| repo_type="dataset", | |
| token=HF_TOKEN, | |
| ) | |
| return f"Creditworthiness dataset generated ({len(df)} rows)" | |
| # ============================= | |
| # HELPER: DOWNLOAD IF NEWER | |
| # ============================= | |
| def download_if_newer(repo_id, filename, local_path, repo_type="dataset"): | |
| try: | |
| hf_file = hf_hub_download(repo_id=repo_id, filename=filename, repo_type=repo_type, token=HF_TOKEN) | |
| repo_mtime = os.path.getmtime(hf_file) | |
| except Exception: | |
| return f"No remote file found, using local {local_path}" | |
| if os.path.exists(local_path): | |
| local_mtime = os.path.getmtime(local_path) | |
| if repo_mtime > local_mtime: | |
| import shutil | |
| shutil.copy2(hf_file, local_path) | |
| return f"Updated local {local_path} from repo." | |
| else: | |
| return f"Local {local_path} is up-to-date." | |
| else: | |
| import shutil | |
| shutil.copy2(hf_file, local_path) | |
| return f"Downloaded {local_path} from repo." | |
| # ============================= | |
| # TRAIN MODEL | |
| # ============================= | |
| def train_model(): | |
| download_if_newer( | |
| DATASET_REPO, | |
| DATASET_FILE, | |
| DATASET_FILE, | |
| repo_type="dataset" | |
| ) | |
| df = pd.read_csv(DATASET_FILE) | |
| X = df[FEATURES] | |
| y = df[TARGET] | |
| from lightgbm import LGBMRegressor | |
| model = LGBMRegressor( | |
| n_estimators=600, | |
| learning_rate=0.05, | |
| max_depth=7, | |
| num_leaves=40, | |
| subsample=0.85, | |
| colsample_bytree=0.85, | |
| monotone_constraints=[ | |
| 0, # age | |
| +1, # income | |
| -1, # living costs | |
| -1, # liabilities | |
| -1, # credit limits | |
| -1, # LTV | |
| -1, # interest rate | |
| +1, # loan term | |
| -1, # installment type risk | |
| ], | |
| random_state=42, | |
| n_jobs=-1, | |
| ) | |
| model.fit(X, y) | |
| joblib.dump(model, MODEL_FILE) | |
| upload_file( | |
| path_or_fileobj=MODEL_FILE, | |
| path_in_repo=MODEL_FILE, | |
| repo_id=MODEL_REPO, | |
| repo_type="model", | |
| token=HF_TOKEN, | |
| ) | |
| return "Creditworthiness model trained and uploaded" | |
| # ============================= | |
| # INVOKE MODEL | |
| # ============================= | |
| def invoke_model(json_input): | |
| try: | |
| msg = download_if_newer(MODEL_REPO, MODEL_FILE, MODEL_FILE, repo_type="model") | |
| data = json.loads(json_input) | |
| model = joblib.load(MODEL_FILE) | |
| X = pd.DataFrame([data], columns=FEATURES) | |
| prediction = model.predict(X)[0] | |
| return {"max_loan_amount": round(float(prediction), 2), "info": msg} | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # ============================= | |
| # UI | |
| # ============================= | |
| with gr.Blocks() as app: | |
| gr.Markdown("# HF Pricing Engine – MVP") | |
| with gr.Row(): | |
| gen_btn = gr.Button("Generate dataset") | |
| train_btn = gr.Button("Train model") | |
| status = gr.Textbox(label="Status") | |
| gen_btn.click(generate_dataset, outputs=status) | |
| train_btn.click(train_model, outputs=status) | |
| gr.Markdown("## Invoke model, for intallment typ: 0 = fixed 1 = decreasing 2 = variable") | |
| json_input = gr.Textbox( | |
| label="Input JSON", | |
| lines=8, | |
| value=json.dumps({ | |
| "age": 34, | |
| "monthly_income": 10000, | |
| "living_costs": 1000, | |
| "existing_liabilities": 1000, | |
| "credit_limits": 50000, | |
| "ltv": 0.8, | |
| "interest_rate": 0.065, | |
| "loan_term_years": 30, | |
| "installment_type": 0 | |
| }, indent=2), | |
| ) | |
| invoke_btn = gr.Button("Invoke") | |
| result = gr.JSON(label="Result") | |
| invoke_btn.click(invoke_model, inputs=json_input, outputs=result) | |
| app.launch() |