ml-intern
hello9972 commited on
Commit
bfe95db
·
verified ·
1 Parent(s): 09ba888

Make NB4 fit 90 minute CPU submission limit

Browse files
Files changed (1) hide show
  1. nb04_inference.py +106 -102
nb04_inference.py CHANGED
@@ -1,17 +1,23 @@
1
  """
2
- BirdCLEF+ 2026 — Notebook 4 FAST INFERENCE
3
-
4
- Fix for timeout:
5
- • Batch all 12 chunks of each soundscape per model instead of segment-by-segment.
6
- Use best B0 folds by default instead of all 5.
7
- No TTA by default.
8
- Uses CUDA if available, else CPU.
9
- Raw sigmoid outputs only; no threshold/calibration.
10
-
11
- Before run: set MODEL_DIR to Kaggle dataset containing b0_fold*.pt.
 
 
 
 
 
 
12
  """
13
 
14
- import os, gc, time
15
  import numpy as np
16
  import pandas as pd
17
  import torch
@@ -28,25 +34,26 @@ COMP_DIR = "/kaggle/input/competitions/birdclef-2026"
28
  TEST_DIR = f"{COMP_DIR}/test_soundscapes"
29
  SAMPLE_SUB = f"{COMP_DIR}/sample_submission.csv"
30
 
31
- # CHANGE THIS to your Kaggle dataset path.
32
  MODEL_DIR = "/kaggle/input/birdclef-b0-5fold"
33
- # If your .pt files are inside a models folder:
34
  # MODEL_DIR = "/kaggle/input/birdclef-b0-5fold/models"
35
 
36
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
37
  SR = 32000
38
  DURATION = 5
39
  N_SAMPLES = SR * DURATION
40
 
41
- # FAST DEFAULT: use top validation folds first.
42
- # Fold scores: f2=0.9244, f4=0.8896, f0=0.8556, f1=0.8551, f3=0.8035
43
- B0_FOLDS = [2, 4, 0] # fastest good ensemble. If still timeout, use [2]. If safe, use [2,4,0,1]
44
- USE_B3 = False
45
- B3_FOLDS = []
46
- USE_TTA = False # keep False for Kaggle runtime
47
 
48
- # Lower thread count can reduce CPU thrash in Kaggle containers.
49
- torch.set_num_threads(2)
 
 
 
 
50
 
51
  # =========================
52
  # LOAD SAMPLE
@@ -59,9 +66,9 @@ NUM_CLASSES = len(SPECIES)
59
  # MODEL
60
  # =========================
61
  class Model(nn.Module):
62
- def __init__(self, backbone):
63
  super().__init__()
64
- self.backbone = timm.create_model(backbone, pretrained=False, in_chans=3, features_only=True)
65
  fi = self.backbone.feature_info
66
  ch = fi[-2]["num_chs"] + fi[-1]["num_chs"]
67
  self.pool = nn.AdaptiveAvgPool2d(1)
@@ -79,67 +86,57 @@ class Model(nn.Module):
79
  # =========================
80
  # LOAD MODELS
81
  # =========================
82
- def load_one(backbone, path):
83
- m = Model(backbone)
84
- state = torch.load(path, map_location="cpu")
85
- m.load_state_dict(state, strict=False)
86
- m.to(DEVICE)
87
- m.eval()
88
- return m
89
-
90
  MODELS = []
91
  for fold in B0_FOLDS:
92
  path = f"{MODEL_DIR}/b0_fold{fold}.pt"
93
  if os.path.exists(path):
94
- MODELS.append(("b0", load_one("tf_efficientnet_b0_ns", path)))
 
 
 
 
 
95
  print("loaded:", path)
96
  else:
97
  print("missing:", path)
98
 
99
- if USE_B3:
100
- for fold in B3_FOLDS:
101
- path = f"{MODEL_DIR}/b3_fold{fold}.pt"
102
- if os.path.exists(path):
103
- MODELS.append(("b3", load_one("tf_efficientnet_b3_ns", path)))
104
- print("loaded:", path)
105
- else:
106
- print("missing:", path)
107
-
108
  if len(MODELS) == 0:
109
- raise ValueError(f"No models loaded. Check MODEL_DIR={MODEL_DIR}")
110
 
111
- print("DEVICE:", DEVICE)
112
- print("ensemble size:", len(MODELS))
113
- print("B0_FOLDS:", B0_FOLDS, "USE_TTA:", USE_TTA)
114
 
115
  # =========================
116
- # FEATURES
117
  # =========================
118
- def make_spec_np(chunk, n_fft, hop):
 
119
  mel = librosa.feature.melspectrogram(
120
- y=chunk, sr=SR, n_fft=n_fft, hop_length=hop,
121
  n_mels=128, fmin=20, fmax=16000
122
  )
123
  mel = librosa.power_to_db(mel)
124
  mel = (mel - mel.min()) / (mel.max() - mel.min() + 1e-6)
125
- return np.stack([mel] * 3).astype(np.float32)
126
-
127
- def get_chunks_12(wav):
128
- chunks = []
129
- for sec in range(0, 60, 5):
130
- start = sec * SR
131
- chunk = wav[start:start + N_SAMPLES]
132
- if len(chunk) < N_SAMPLES:
133
- chunk = np.pad(chunk, (0, N_SAMPLES - len(chunk)))
134
- chunks.append(chunk.astype(np.float32))
135
- return chunks
136
-
137
- def make_batch(chunks, model_name):
138
- if model_name == "b0":
139
- specs = [make_spec_np(c, 1024, 64) for c in chunks]
140
- else:
141
- specs = [make_spec_np(c, 2048, 512) for c in chunks]
142
- return torch.tensor(np.stack(specs), dtype=torch.float32, device=DEVICE)
 
143
 
144
  # =========================
145
  # INFERENCE
@@ -147,7 +144,7 @@ def make_batch(chunks, model_name):
147
  files = sorted([f for f in os.listdir(TEST_DIR) if f.endswith((".ogg", ".wav", ".flac", ".mp3"))])
148
  print("test files:", len(files))
149
 
150
- all_rows = []
151
  all_preds = []
152
  t0 = time.time()
153
 
@@ -165,35 +162,41 @@ for file_idx, fname in enumerate(files):
165
  wav = wav.mean(axis=1)
166
  if sr != SR:
167
  wav = librosa.resample(wav, orig_sr=sr, target_sr=SR)
168
-
169
- chunks = get_chunks_12(wav)
170
- row_ids = [f"{stem}_{sec}" for sec in range(5, 65, 5)]
171
-
172
- logits_sum = None
173
- n_pred = 0
174
-
175
- with torch.no_grad():
176
- # Build b0 batch once if needed; b3 batch once if needed.
177
- batch_cache = {}
178
- for model_name, model in MODELS:
179
- if model_name not in batch_cache:
180
- batch_cache[model_name] = make_batch(chunks, model_name)
181
- x = batch_cache[model_name]
182
- logits = model(x).detach().float().cpu().numpy() # shape (12, classes)
183
- logits_sum = logits if logits_sum is None else logits_sum + logits
184
- n_pred += 1
185
- if DEVICE == "cuda":
186
- torch.cuda.empty_cache()
187
-
188
- avg_logits = logits_sum / max(n_pred, 1)
189
- probs = 1.0 / (1.0 + np.exp(-avg_logits))
190
-
191
- all_rows.extend(row_ids)
192
- all_preds.append(probs.astype(np.float32))
193
-
194
- if file_idx == 0 or (file_idx + 1) % 25 == 0:
195
- elapsed = time.time() - t0
196
- print(f"progress {file_idx+1}/{len(files)} elapsed={elapsed/60:.1f} min")
 
 
 
 
 
 
197
 
198
  gc.collect()
199
 
@@ -201,14 +204,14 @@ for file_idx, fname in enumerate(files):
201
  # SUBMISSION
202
  # =========================
203
  if len(all_preds) == 0:
204
- pred_arr = np.zeros((len(all_rows), NUM_CLASSES), dtype=np.float32)
205
  else:
206
  pred_arr = np.vstack(all_preds)
207
 
208
  sub = pd.DataFrame(pred_arr, columns=SPECIES)
209
- sub.insert(0, "row_id", all_rows)
210
 
211
- # Align exactly with sample submission
212
  sub = sample[["row_id"]].merge(sub, on="row_id", how="left").fillna(0)
213
  sub = sub[sample.columns]
214
 
@@ -219,8 +222,9 @@ sub.to_csv("submission.csv", index=False)
219
 
220
  print("SUBMISSION READY")
221
  print("shape:", sub.shape)
222
- print("models:", len(MODELS), "folds:", B0_FOLDS, "DEVICE:", DEVICE)
 
223
  print("mean prob:", float(sub[SPECIES].values.mean()))
224
  print("max prob:", float(sub[SPECIES].values.max()))
225
  print("nonzero ratio:", float((sub[SPECIES].values > 0).mean()))
226
- print("total elapsed min:", (time.time() - t0) / 60)
 
1
  """
2
+ BirdCLEF+ 2026 — Notebook 4 ULTRA-FAST CPU INFERENCE
3
+
4
+ Designed for Kaggle submission CPU limit (~90 min).
5
+
6
+ Speed choices:
7
+ Uses ONLY best B0 fold by default: fold2.
8
+ Computes predictions every 10 seconds by default (6 chunks/file), then duplicates
9
+ each prediction to fill adjacent 5-second rows.
10
+ • No TTA.
11
+ Batched per soundscape.
12
+ • Raw sigmoid probabilities, no thresholds/calibration.
13
+
14
+ If this finishes with time left, improve score by setting:
15
+ B0_FOLDS = [2, 4]
16
+ PREDICT_STRIDE_SEC = 5
17
+ But for first valid CPU submission, keep defaults.
18
  """
19
 
20
+ import os, time, gc
21
  import numpy as np
22
  import pandas as pd
23
  import torch
 
34
  TEST_DIR = f"{COMP_DIR}/test_soundscapes"
35
  SAMPLE_SUB = f"{COMP_DIR}/sample_submission.csv"
36
 
37
+ # CHANGE to your Kaggle model dataset path.
38
  MODEL_DIR = "/kaggle/input/birdclef-b0-5fold"
 
39
  # MODEL_DIR = "/kaggle/input/birdclef-b0-5fold/models"
40
 
41
+ DEVICE = "cpu" # CPU submission limit. Do not depend on GPU.
42
  SR = 32000
43
  DURATION = 5
44
  N_SAMPLES = SR * DURATION
45
 
46
+ # CPU-safe defaults
47
+ B0_FOLDS = [2] # best validation fold: 0.9244. Fastest valid submission.
48
+ USE_TTA = False
49
+ PREDICT_STRIDE_SEC = 10 # 10 = compute 6 chunks/file and duplicate to 12 rows. 5 = full 12 chunks.
 
 
50
 
51
+ # CPU tuning
52
+ try:
53
+ torch.set_num_threads(4)
54
+ torch.set_num_interop_threads(1)
55
+ except Exception:
56
+ pass
57
 
58
  # =========================
59
  # LOAD SAMPLE
 
66
  # MODEL
67
  # =========================
68
  class Model(nn.Module):
69
+ def __init__(self):
70
  super().__init__()
71
+ self.backbone = timm.create_model("tf_efficientnet_b0_ns", pretrained=False, in_chans=3, features_only=True)
72
  fi = self.backbone.feature_info
73
  ch = fi[-2]["num_chs"] + fi[-1]["num_chs"]
74
  self.pool = nn.AdaptiveAvgPool2d(1)
 
86
  # =========================
87
  # LOAD MODELS
88
  # =========================
 
 
 
 
 
 
 
 
89
  MODELS = []
90
  for fold in B0_FOLDS:
91
  path = f"{MODEL_DIR}/b0_fold{fold}.pt"
92
  if os.path.exists(path):
93
+ m = Model()
94
+ state = torch.load(path, map_location="cpu")
95
+ m.load_state_dict(state, strict=False)
96
+ m.eval()
97
+ m.to(DEVICE)
98
+ MODELS.append(m)
99
  print("loaded:", path)
100
  else:
101
  print("missing:", path)
102
 
 
 
 
 
 
 
 
 
 
103
  if len(MODELS) == 0:
104
+ raise ValueError(f"No models loaded from MODEL_DIR={MODEL_DIR}. Check dataset path.")
105
 
106
+ print("CPU ultra-fast config")
107
+ print("models:", len(MODELS), "folds:", B0_FOLDS)
108
+ print("PREDICT_STRIDE_SEC:", PREDICT_STRIDE_SEC)
109
 
110
  # =========================
111
+ # FEATURE HELPERS
112
  # =========================
113
+ def make_spec_np(chunk):
114
+ # Must match B0 training spec_a: n_fft=1024, hop=64, n_mels=128.
115
  mel = librosa.feature.melspectrogram(
116
+ y=chunk, sr=SR, n_fft=1024, hop_length=64,
117
  n_mels=128, fmin=20, fmax=16000
118
  )
119
  mel = librosa.power_to_db(mel)
120
  mel = (mel - mel.min()) / (mel.max() - mel.min() + 1e-6)
121
+ return np.stack([mel, mel, mel]).astype(np.float32)
122
+
123
+ def chunk_at(wav, sec):
124
+ start = sec * SR
125
+ chunk = wav[start:start + N_SAMPLES]
126
+ if len(chunk) < N_SAMPLES:
127
+ chunk = np.pad(chunk, (0, N_SAMPLES - len(chunk)))
128
+ return chunk.astype(np.float32)
129
+
130
+ def predict_chunks(chunks):
131
+ specs = [make_spec_np(c) for c in chunks]
132
+ x = torch.from_numpy(np.stack(specs)).to(DEVICE)
133
+ logits_sum = None
134
+ with torch.inference_mode():
135
+ for m in MODELS:
136
+ logits = m(x).detach().cpu().numpy()
137
+ logits_sum = logits if logits_sum is None else logits_sum + logits
138
+ logits = logits_sum / len(MODELS)
139
+ return (1.0 / (1.0 + np.exp(-logits))).astype(np.float32)
140
 
141
  # =========================
142
  # INFERENCE
 
144
  files = sorted([f for f in os.listdir(TEST_DIR) if f.endswith((".ogg", ".wav", ".flac", ".mp3"))])
145
  print("test files:", len(files))
146
 
147
+ all_row_ids = []
148
  all_preds = []
149
  t0 = time.time()
150
 
 
162
  wav = wav.mean(axis=1)
163
  if sr != SR:
164
  wav = librosa.resample(wav, orig_sr=sr, target_sr=SR)
165
+ wav = wav.astype(np.float32)
166
+
167
+ # Standard row seconds: 5,10,...,60 with chunk starts 0,5,...,55.
168
+ if PREDICT_STRIDE_SEC <= 5:
169
+ start_secs = list(range(0, 60, 5))
170
+ chunks = [chunk_at(wav, s) for s in start_secs]
171
+ probs = predict_chunks(chunks) # (12, C)
172
+ row_secs = list(range(5, 65, 5))
173
+ row_probs = probs
174
+ else:
175
+ # Compute every 10 sec: starts 0,10,20,30,40,50 = 6 predictions.
176
+ # Duplicate each prediction to adjacent 5-sec row.
177
+ start_secs = list(range(0, 60, PREDICT_STRIDE_SEC))
178
+ chunks = [chunk_at(wav, s) for s in start_secs]
179
+ probs6 = predict_chunks(chunks) # (6, C)
180
+ row_secs = []
181
+ row_probs = []
182
+ for i, s in enumerate(start_secs):
183
+ # prediction for chunk s..s+5 fills row end s+5 and s+10
184
+ e1 = s + 5
185
+ e2 = s + 10
186
+ if e1 <= 60:
187
+ row_secs.append(e1)
188
+ row_probs.append(probs6[i])
189
+ if e2 <= 60:
190
+ row_secs.append(e2)
191
+ row_probs.append(probs6[i])
192
+ row_probs = np.stack(row_probs).astype(np.float32)
193
+
194
+ all_row_ids.extend([f"{stem}_{sec}" for sec in row_secs])
195
+ all_preds.append(row_probs)
196
+
197
+ if file_idx == 0 or (file_idx + 1) % 20 == 0:
198
+ elapsed = (time.time() - t0) / 60
199
+ print(f"progress {file_idx+1}/{len(files)} elapsed={elapsed:.1f} min")
200
 
201
  gc.collect()
202
 
 
204
  # SUBMISSION
205
  # =========================
206
  if len(all_preds) == 0:
207
+ pred_arr = np.zeros((len(all_row_ids), NUM_CLASSES), dtype=np.float32)
208
  else:
209
  pred_arr = np.vstack(all_preds)
210
 
211
  sub = pd.DataFrame(pred_arr, columns=SPECIES)
212
+ sub.insert(0, "row_id", all_row_ids)
213
 
214
+ # Align exactly with sample_submission. Missing rows filled 0, but should not be missing.
215
  sub = sample[["row_id"]].merge(sub, on="row_id", how="left").fillna(0)
216
  sub = sub[sample.columns]
217
 
 
222
 
223
  print("SUBMISSION READY")
224
  print("shape:", sub.shape)
225
+ print("models:", len(MODELS), "folds:", B0_FOLDS)
226
+ print("stride:", PREDICT_STRIDE_SEC)
227
  print("mean prob:", float(sub[SPECIES].values.mean()))
228
  print("max prob:", float(sub[SPECIES].values.max()))
229
  print("nonzero ratio:", float((sub[SPECIES].values > 0).mean()))
230
+ print("elapsed min:", (time.time() - t0) / 60)