anemia-api / make_scaler.py
sumoy47's picture
Upload 6 files
4ec9b8b verified
import joblib
import numpy as np
from sklearn.preprocessing import MinMaxScaler
# EXACT Feature List from your Frontend
# We define [Min, Max] based on your specific slider values
ranges = [
# 1-2: Demographics
[1, 100], # Age
[0, 1], # Gender (0=Female, 1=Male)
# 3-11: RBC & Iron
[5, 20], # HGB
[2, 8], # RBC Count
[15, 60], # HCT
[50, 120], # MCV
[15, 40], # MCH
[28, 40], # MCHC
[10, 30], # RDW-CV
[30, 60], # RDW-SD
[10, 500], # Ferritin
# 12-18: WBC
[2000, 20000], # WBC Count
[0, 100], # Neutrophils %
[0, 100], # Lymphocytes %
[0, 100], # Monocytes %
[0, 100], # Eosinophils %
[0, 10], # Basophils %
[0, 10], # NLR
# 19-22: Platelets
[50000, 600000], # Platelets
[5, 15], # MPV
[0, 1], # PCT
[0, 30], # PDW
# 23-25: History
[0, 1], # History_Anemia (0=No, 1=Yes)
[0, 1], # Substance_Use
[0, 1] # Chronic_Illness
]
print(f"✅ Configured for {len(ranges)} features.")
# Generate mock data within these ranges to "fit" the scaler
np.random.seed(42)
mock_data = []
for r in ranges:
# Create 1000 random patients within your specific min/max limits
column = np.random.uniform(r[0], r[1], 1000)
mock_data.append(column)
# Transpose to shape (1000 rows, 25 columns)
mock_data = np.array(mock_data).T
print("⚙️ Fitting Scaler to your specific Feature Map...")
scaler = MinMaxScaler()
scaler.fit(mock_data)
# Save
joblib.dump(scaler, "scaler.pkl")
print("✅ 'scaler.pkl' created! It now understands values like 'WBC = 7000'.")