Spaces:
Sleeping
Sleeping
Harsh Yadav commited on
Commit Β·
ba6d7cd
1
Parent(s): 2a34453
fix: remove XGBoost use_label_encoder (removed in v2+), reduce CNN to 3k imgs/5 epochs for HF timeout, pre-download ResNet18
Browse files- Dockerfile +11 -0
- app/models/train_all.py +3 -3
Dockerfile
CHANGED
|
@@ -60,11 +60,22 @@ tamp = load_tampered_images(n_max=150); \
|
|
| 60 |
print(f'Cached {len(auth)} authentic + {len(tamp)} tampered images'); \
|
| 61 |
"
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 64 |
# BUILD STEP 4: Train all models (uses cached data β no network calls)
|
| 65 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 66 |
RUN python -m app.models.train_all
|
| 67 |
|
|
|
|
| 68 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 69 |
# BUILD STEP 5: Verify all required model files exist β fail build if missing
|
| 70 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 60 |
print(f'Cached {len(auth)} authentic + {len(tamp)} tampered images'); \
|
| 61 |
"
|
| 62 |
|
| 63 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 64 |
+
# BUILD STEP 3.5: Pre-download ResNet18 weights
|
| 65 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 66 |
+
RUN python -c "\
|
| 67 |
+
import torchvision.models as tv_models; \
|
| 68 |
+
print('Downloading ResNet18 weights...'); \
|
| 69 |
+
tv_models.resnet18(weights=tv_models.ResNet18_Weights.DEFAULT); \
|
| 70 |
+
print('ResNet18 weights downloaded.') \
|
| 71 |
+
"
|
| 72 |
+
|
| 73 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 74 |
# BUILD STEP 4: Train all models (uses cached data β no network calls)
|
| 75 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 76 |
RUN python -m app.models.train_all
|
| 77 |
|
| 78 |
+
|
| 79 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 80 |
# BUILD STEP 5: Verify all required model files exist β fail build if missing
|
| 81 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
app/models/train_all.py
CHANGED
|
@@ -94,7 +94,7 @@ def train_fraud_model(df: pd.DataFrame) -> None:
|
|
| 94 |
print(" Training XGBClassifier...")
|
| 95 |
xgb_model = xgb.XGBClassifier(
|
| 96 |
n_estimators=200, max_depth=6, learning_rate=0.1,
|
| 97 |
-
|
| 98 |
random_state=42, verbosity=0,
|
| 99 |
)
|
| 100 |
xgb_model.fit(X_train, y_train)
|
|
@@ -208,7 +208,7 @@ def train_image_model() -> None:
|
|
| 208 |
print(f" Created {len(tampered_from_real)} tampered versions of real certs")
|
| 209 |
|
| 210 |
# ββ Step 2: Generate synthetic PIL images to fill volume ββββββββββββββββββ
|
| 211 |
-
N_SYNTHETIC_PER_CLASS =
|
| 212 |
print(f"\n [Phase 2] Generating {N_SYNTHETIC_PER_CLASS * 2} synthetic images...")
|
| 213 |
|
| 214 |
all_images = [] # PIL Images
|
|
@@ -343,7 +343,7 @@ def train_image_model() -> None:
|
|
| 343 |
)
|
| 344 |
|
| 345 |
best_val_acc = 0.0
|
| 346 |
-
N_EPOCHS =
|
| 347 |
|
| 348 |
print("\n Training ResNet-18...")
|
| 349 |
for epoch in range(N_EPOCHS):
|
|
|
|
| 94 |
print(" Training XGBClassifier...")
|
| 95 |
xgb_model = xgb.XGBClassifier(
|
| 96 |
n_estimators=200, max_depth=6, learning_rate=0.1,
|
| 97 |
+
eval_metric="mlogloss",
|
| 98 |
random_state=42, verbosity=0,
|
| 99 |
)
|
| 100 |
xgb_model.fit(X_train, y_train)
|
|
|
|
| 208 |
print(f" Created {len(tampered_from_real)} tampered versions of real certs")
|
| 209 |
|
| 210 |
# ββ Step 2: Generate synthetic PIL images to fill volume ββββββββββββββββββ
|
| 211 |
+
N_SYNTHETIC_PER_CLASS = 1_500 # 3,000 synthetic images β fits in HF build timeout
|
| 212 |
print(f"\n [Phase 2] Generating {N_SYNTHETIC_PER_CLASS * 2} synthetic images...")
|
| 213 |
|
| 214 |
all_images = [] # PIL Images
|
|
|
|
| 343 |
)
|
| 344 |
|
| 345 |
best_val_acc = 0.0
|
| 346 |
+
N_EPOCHS = 5 # 5 epochs fits within HF Spaces 30-min build timeout
|
| 347 |
|
| 348 |
print("\n Training ResNet-18...")
|
| 349 |
for epoch in range(N_EPOCHS):
|