Vu Anh commited on
Commit
81717a0
·
1 Parent(s): ae0d039

Add model validation script with Hugging Face Hub support

Browse files

- Add validate.py to download and test model from HF Hub
- Create script to save model in joblib format
- Add huggingface-hub and joblib dependencies
- Include sklearn_model.joblib for HF Hub compatibility
- Update .gitignore to allow sklearn_model.joblib

Files changed (5) hide show
  1. .gitignore +1 -2
  2. pyproject.toml +2 -0
  3. save_joblib_model.py +21 -0
  4. sklearn_model.joblib +3 -0
  5. validate.py +123 -0
.gitignore CHANGED
@@ -41,10 +41,9 @@ uv.lock
41
  .ipynb_checkpoints
42
  *.ipynb
43
 
44
- # Model files
45
  *.pkl
46
  *.pickle
47
- *.joblib
48
  *.h5
49
  *.hdf5
50
  *.pth
 
41
  .ipynb_checkpoints
42
  *.ipynb
43
 
44
+ # Model files (keep sklearn_model.joblib for HF Hub)
45
  *.pkl
46
  *.pickle
 
47
  *.h5
48
  *.hdf5
49
  *.pth
pyproject.toml CHANGED
@@ -7,6 +7,8 @@ requires-python = ">=3.10"
7
  dependencies = [
8
  "scikit-learn>=1.3.0",
9
  "numpy>=1.24.0",
 
 
10
  ]
11
 
12
  [dependency-groups]
 
7
  dependencies = [
8
  "scikit-learn>=1.3.0",
9
  "numpy>=1.24.0",
10
+ "huggingface-hub>=0.35.0",
11
+ "joblib>=1.5.2",
12
  ]
13
 
14
  [dependency-groups]
save_joblib_model.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to save the trained model in joblib format for Hugging Face Hub.
4
+ """
5
+
6
+ import joblib
7
+ from model import SonarModel
8
+
9
+
10
+ def main():
11
+ print("Loading trained model from pickle...")
12
+ model = SonarModel.load("sonar_model.pkl")
13
+
14
+ print("Saving model in joblib format...")
15
+ joblib.dump(model, "sklearn_model.joblib")
16
+ print("Model saved as sklearn_model.joblib")
17
+ print("\nThis file should be uploaded to Hugging Face Hub for the validate.py script to work.")
18
+
19
+
20
+ if __name__ == "__main__":
21
+ main()
sklearn_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52ae232c454dc00511c23d8201c1287d4680c8949dee41a2f2ded4b8a6f7c8d2
3
+ size 1322748
validate.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Validation script for the Sonar model.
4
+ Downloads the model from Hugging Face Hub and validates it on test data.
5
+ """
6
+
7
+ import numpy as np
8
+ from huggingface_hub import hf_hub_download
9
+ import joblib
10
+ from sklearn.datasets import make_classification
11
+
12
+
13
+ def download_model():
14
+ """Download the trained model from Hugging Face Hub."""
15
+ print("Downloading model from Hugging Face Hub...")
16
+ try:
17
+ model_path = hf_hub_download(
18
+ repo_id="undertheseanlp/sonar_core_1",
19
+ filename="sklearn_model.joblib",
20
+ )
21
+ model = joblib.load(model_path)
22
+ print("Model downloaded successfully!")
23
+ return model
24
+ except Exception as e:
25
+ print(f"Error downloading model: {e}")
26
+ print("Model file might not exist yet on Hugging Face. Using local model instead.")
27
+ # Fallback to local pickle file
28
+ from model import SonarModel
29
+ model = SonarModel.load("sonar_model.pkl")
30
+ return model
31
+
32
+
33
+ def generate_validation_data(n_samples=200, n_features=60):
34
+ """Generate validation data similar to the Sonar dataset."""
35
+ X, y = make_classification(
36
+ n_samples=n_samples,
37
+ n_features=n_features,
38
+ n_informative=40,
39
+ n_redundant=10,
40
+ n_repeated=5,
41
+ n_classes=2,
42
+ n_clusters_per_class=2,
43
+ weights=[0.5, 0.5],
44
+ flip_y=0.01,
45
+ random_state=123, # Different seed from training
46
+ )
47
+ return X, y
48
+
49
+
50
+ def validate_model(model, X, y):
51
+ """Validate the model on test data."""
52
+ print("\nValidating model...")
53
+ print(f"Validation data shape: X={X.shape}, y={y.shape}")
54
+
55
+ # Make predictions
56
+ y_pred = model.predict(X)
57
+ y_proba = model.predict_proba(X)
58
+
59
+ # Calculate accuracy
60
+ accuracy = (y_pred == y).mean()
61
+ print(f"\nValidation Accuracy: {accuracy:.4f}")
62
+
63
+ # Confusion matrix
64
+ from sklearn.metrics import confusion_matrix, classification_report
65
+
66
+ cm = confusion_matrix(y, y_pred)
67
+ print("\nConfusion Matrix:")
68
+ print(f"True Negatives: {cm[0, 0]:3d} | False Positives: {cm[0, 1]:3d}")
69
+ print(f"False Negatives: {cm[1, 0]:3d} | True Positives: {cm[1, 1]:3d}")
70
+
71
+ # Classification report
72
+ print("\nClassification Report:")
73
+ report = classification_report(
74
+ y, y_pred, target_names=["Rock (0)", "Mine (1)"], digits=3
75
+ )
76
+ print(report)
77
+
78
+ # Sample predictions
79
+ print("\nSample Predictions (first 5):")
80
+ for i in range(min(5, len(X))):
81
+ print(
82
+ f" Sample {i + 1}: True={y[i]}, Predicted={y_pred[i]}, "
83
+ f"Probabilities=[{y_proba[i][0]:.3f}, {y_proba[i][1]:.3f}]"
84
+ )
85
+
86
+ # Feature importance (if available)
87
+ try:
88
+ importances = model.get_feature_importance()
89
+ top_features = np.argsort(importances)[-5:][::-1]
90
+ print("\nTop 5 Most Important Features:")
91
+ for i, feat_idx in enumerate(top_features, 1):
92
+ print(f" {i}. Feature {feat_idx}: {importances[feat_idx]:.4f}")
93
+ except AttributeError:
94
+ pass
95
+
96
+ return accuracy
97
+
98
+
99
+ def main():
100
+ print("=" * 60)
101
+ print("Sonar Model Validation Script")
102
+ print("=" * 60)
103
+
104
+ # Download model from Hugging Face
105
+ model = download_model()
106
+
107
+ # Generate validation data
108
+ print("\nGenerating validation data...")
109
+ X_val, y_val = generate_validation_data(n_samples=200)
110
+
111
+ # Validate model
112
+ accuracy = validate_model(model, X_val, y_val)
113
+
114
+ print("\n" + "=" * 60)
115
+ if accuracy > 0.8:
116
+ print("✅ Model validation successful!")
117
+ else:
118
+ print("⚠️ Model accuracy below expected threshold")
119
+ print("=" * 60)
120
+
121
+
122
+ if __name__ == "__main__":
123
+ main()