ml-intern
hello9972 commited on
Commit
f402d0e
·
verified ·
1 Parent(s): 1fc4718

Fix NB3 NB1 filenames and time parsing

Browse files
Files changed (1) hide show
  1. nb03_pseudo_labeling.py +125 -129
nb03_pseudo_labeling.py CHANGED
@@ -1,35 +1,23 @@
1
  """
2
- ╔══════════════════════════════════════════════════════════════════════════════╗
3
- ║ BirdCLEF+ 2026 Notebook 3 (IMPROVED) ║
4
- ║ PSEUDO-LABELING (Noisy Student) ║
5
- ║ ║
6
- Strategy:
7
- ║ • Load ALL trained fold models (5 folds × 2 backbones = 10 models) ║
8
- ║ • Run inference on train_soundscapes (not test — we don't have test!) ║
9
- ║ • Actually: generate pseudo-labels from test_soundscapes via submission ║
10
- ║ • Use high-confidence predictions (>0.5) as pseudo-labels ║
11
- ║ • Retrain on pseudo-labeled data + original training data ║
12
- ╚══════════════════════════════════════════════════════════════════════════════╝
13
-
14
- IMPORTANT: In Kaggle, you don't have test labels. The standard approach:
15
- 1. Train on train_audio + train_soundscapes
16
- 2. Generate predictions on train_soundscapes using models
17
- 3. Use confident predictions as additional training signal
18
- 4. OR: use test predictions from a previous submission as pseudo-labels
19
-
20
- Since we can't see test labels, this notebook implements "noisy student"
21
- by re-training on train_soundscapes with pseudo-labels generated from
22
- our own ensemble predictions on those same soundscapes.
23
  """
24
 
25
- import os, gc, math
26
  import numpy as np
27
  import pandas as pd
28
  import torch
29
  import torch.nn as nn
30
  import torch.nn.functional as F
31
  from torch.utils.data import Dataset, DataLoader
32
- from torch.amp import GradScaler, autocast
33
  import timm, librosa, torchaudio
34
 
35
  # =========================
@@ -42,10 +30,14 @@ class CFG:
42
  n_samples = int(sr * duration)
43
  num_classes = 234
44
  batch_size = 16
45
- epochs = 3
46
  num_workers = 2
47
- device = "cuda"
48
- spec = dict(n_fft=1024, hop_length=64, n_mels=128, fmin=20, fmax=16000)
 
 
 
 
 
49
 
50
  # =========================
51
  # PATHS
@@ -53,30 +45,58 @@ class CFG:
53
  COMP_DIR = "/kaggle/input/competitions/birdclef-2026"
54
  TRAIN_SC = f"{COMP_DIR}/train_soundscapes"
55
 
56
- DATA_DIR = "/kaggle/input/datasets/vivekgaur9972/nb01-dataset/nb01"
 
 
 
57
  MODEL_DIR = "/kaggle/input/datasets/vivekgaur9972/birdclef-nb02-models/nb02-model/models"
58
 
59
  OUTPUT_DIR = "/kaggle/working"
60
- os.makedirs(f"{OUTPUT_DIR}/models", exist_ok=True)
61
 
62
  # =========================
63
- # LOAD
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # =========================
65
  species_df = pd.read_csv(f"{DATA_DIR}/species_list.csv")
66
  SPECIES = species_df["species"].tolist()
67
- MAP = {s:i for i,s in enumerate(SPECIES)}
68
 
69
- # Load all fold models
70
- FOLD_MODELS = []
71
- for name in ["b0", "b3"]:
72
- for fold in range(5):
73
- path = f"{MODEL_DIR}/{name}_fold{fold}.pt"
74
- if os.path.exists(path):
75
- FOLD_MODELS.append((name, fold, path))
76
- else:
77
- print(f" [WARN] Missing: {path}")
78
 
79
- print(f"Loaded {len(FOLD_MODELS)} fold models")
 
80
 
81
  # =========================
82
  # MODEL
@@ -86,26 +106,25 @@ class Model(nn.Module):
86
  super().__init__()
87
  self.backbone = timm.create_model(backbone, pretrained=False, in_chans=3, features_only=True)
88
  fi = self.backbone.feature_info
89
- ch = fi[-2]['num_chs'] + fi[-1]['num_chs']
90
  self.pool = nn.AdaptiveAvgPool2d(1)
91
  self.fc = nn.Linear(ch, CFG.num_classes)
92
 
93
  def forward(self, x):
94
- f = self.backbone(x)
95
- f3, f4 = f[-2], f[-1]
96
  if f3.shape[2:] != f4.shape[2:]:
97
- f4 = F.interpolate(f4, size=f3.shape[2:])
98
  x = torch.cat([f3, f4], 1)
99
- x = self.pool(x).squeeze(-1).squeeze(-1)
100
  return self.fc(x)
101
 
102
  # =========================
103
- # DATASET for inference on soundscapes
104
  # =========================
105
  class SoundscapeDS(Dataset):
106
- def __init__(self, df, spec_cfg):
107
  self.df = df.reset_index(drop=True)
108
- self.spec_cfg = spec_cfg
109
  self.cache = {}
110
 
111
  def __len__(self):
@@ -118,105 +137,82 @@ class SoundscapeDS(Dataset):
118
  wav = wav.mean(0).numpy()
119
  if sr != CFG.sr:
120
  wav = librosa.resample(wav, orig_sr=sr, target_sr=CFG.sr)
121
- self.cache[fname] = wav
122
  except Exception:
123
  self.cache[fname] = np.zeros(CFG.sr * 60, dtype=np.float32)
124
  return self.cache[fname]
125
 
126
- def __getitem__(self, i):
127
- r = self.df.iloc[i]
128
  wav = self.load_audio(r["filename"])
129
- start = int(r["start"] * CFG.sr)
130
  chunk = wav[start:start + CFG.n_samples]
131
  if len(chunk) < CFG.n_samples:
132
  chunk = np.pad(chunk, (0, CFG.n_samples - len(chunk)))
133
- mel = librosa.feature.melspectrogram(y=chunk, sr=CFG.sr, **self.spec_cfg)
134
- mel = librosa.power_to_db(mel)
135
- mel = (mel - mel.min()) / (mel.max() - mel.min() + 1e-6)
136
- x = torch.tensor(mel).unsqueeze(0).repeat(3, 1, 1)
137
- return x.float()
138
-
139
 
140
  # =========================
141
- # GENERATE PSEUDO-LABELS
142
  # =========================
143
- # Use train_soundscapes as target for pseudo-labeling
144
- sc_df = pd.read_csv(f"{DATA_DIR}/soundscape_labels_with_folds_fixed.csv")
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- # Create loader
147
- pseudo_ds = SoundscapeDS(sc_df, CFG.spec)
148
- pseudo_loader = DataLoader(pseudo_ds, batch_size=CFG.batch_size, shuffle=False,
149
- num_workers=CFG.num_workers, pin_memory=True)
150
 
151
- # Ensemble inference
152
- all_preds = []
153
- all_labels = []
154
 
 
 
 
 
 
 
 
 
155
  with torch.no_grad():
156
- for batch_idx, x in enumerate(pseudo_loader):
157
- x = x.to(CFG.device)
158
- logits_sum = None
159
-
160
- for name, fold, path in FOLD_MODELS:
161
- backbone = "tf_efficientnet_b0_ns" if name == "b0" else "tf_efficientnet_b3_ns"
162
- model = Model(backbone).to(CFG.device)
163
- state = torch.load(path, map_location=CFG.device)
164
- model.load_state_dict(state, strict=False)
165
- model.eval()
166
-
167
- # TTA: original + time-reversed
168
- out = model(x)
169
- # time-reversed (flip mel time dimension)
170
- x_rev = torch.flip(x, dims=[3])
171
- out_rev = model(x_rev)
172
-
173
- logits_sum = out + out_rev if logits_sum is None else logits_sum + out + out_rev
174
-
175
- # Average across all models and TTA variants
176
- avg_logits = logits_sum / (len(FOLD_MODELS) * 2)
177
- probs = torch.sigmoid(avg_logits).cpu().numpy()
178
  all_preds.append(probs)
 
 
179
 
180
- if (batch_idx + 1) % 50 == 0:
181
- print(f" Batch {batch_idx+1}/{len(pseudo_loader)}")
182
-
183
- del model
184
- gc.collect()
185
- torch.cuda.empty_cache()
186
 
187
- all_preds = np.concatenate(all_preds)
188
-
189
- # Create pseudo-label dataframe
190
- pseudo_df = sc_df.copy()
191
  for i, sp in enumerate(SPECIES):
192
- pseudo_df[sp] = all_preds[:, i]
193
-
194
- # Save pseudo-labels (soft labels)
195
- pseudo_df.to_csv(f"{OUTPUT_DIR}/pseudo_labels_soft.csv", index=False)
196
- print(f"Saved soft pseudo-labels: {OUTPUT_DIR}/pseudo_labels_soft.csv")
197
 
198
- # Also create hard pseudo-labels (threshold > 0.5)
199
- hard_pseudo = sc_df.copy()
200
  for i, sp in enumerate(SPECIES):
201
- hard_pseudo[sp] = (all_preds[:, i] > 0.5).astype(int)
202
-
203
- # Only keep rows with at least one confident prediction
204
- confident_mask = (all_preds > 0.5).any(axis=1)
205
- hard_pseudo_confident = hard_pseudo[confident_mask].copy()
206
-
207
- print(f" Total soundscape segments: {len(sc_df)}")
208
- print(f" Confident pseudo-labels (>0.5): {confident_mask.sum()}")
209
-
210
- hard_pseudo_confident.to_csv(f"{OUTPUT_DIR}/pseudo_labels_hard_confident.csv", index=False)
211
- print(f"Saved hard confident pseudo-labels")
212
-
213
- # =========================
214
- # NOISY STUDENT RETRAINING (Optional — train one more round)
215
- # =========================
216
- # Use soft pseudo-labels as training targets
217
- # This is a simplified version — you can integrate into NB2 for full retraining
218
-
219
- print("\n" + "="*60)
220
- print("PSEUDO-LABELING COMPLETE")
221
- print("="*60)
222
- print("Next: Use pseudo_labels_soft.csv as additional training data in NB2")
 
1
  """
2
+ BirdCLEF+ 2026 — Notebook 3 (FIXED)
3
+ Pseudo-label generation using NB2 fold models.
4
+
5
+ Fixes:
6
+ 1. Uses NB1 output filenames:
7
+ soundscape_labels_with_folds.csv
8
+ species_list.csv
9
+ 2. Parses soundscape start/end time strings to numeric seconds.
10
+ 3. Loads whatever fold models exist, so you can run after partial NB2 runs.
 
 
 
 
 
 
 
 
 
 
 
 
11
  """
12
 
13
+ import os, gc, random
14
  import numpy as np
15
  import pandas as pd
16
  import torch
17
  import torch.nn as nn
18
  import torch.nn.functional as F
19
  from torch.utils.data import Dataset, DataLoader
20
+ from torch.amp import autocast
21
  import timm, librosa, torchaudio
22
 
23
  # =========================
 
30
  n_samples = int(sr * duration)
31
  num_classes = 234
32
  batch_size = 16
 
33
  num_workers = 2
34
+ device = "cuda" if torch.cuda.is_available() else "cpu"
35
+ spec_b0 = dict(n_fft=1024, hop_length=64, n_mels=128, fmin=20, fmax=16000)
36
+ spec_b3 = dict(n_fft=2048, hop_length=512, n_mels=128, fmin=20, fmax=16000)
37
+
38
+ random.seed(CFG.seed)
39
+ np.random.seed(CFG.seed)
40
+ torch.manual_seed(CFG.seed)
41
 
42
  # =========================
43
  # PATHS
 
45
  COMP_DIR = "/kaggle/input/competitions/birdclef-2026"
46
  TRAIN_SC = f"{COMP_DIR}/train_soundscapes"
47
 
48
+ # NB1 output dataset
49
+ DATA_DIR = "/kaggle/input/datasets/adpassward709/birdcleff-nb1-output"
50
+
51
+ # NB2 model dataset. Update this after saving NB2 outputs as a Kaggle dataset.
52
  MODEL_DIR = "/kaggle/input/datasets/vivekgaur9972/birdclef-nb02-models/nb02-model/models"
53
 
54
  OUTPUT_DIR = "/kaggle/working"
55
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
56
 
57
  # =========================
58
+ # HELPERS
59
+ # =========================
60
+ def parse_time_col(val):
61
+ if pd.isna(val):
62
+ return 0.0
63
+ try:
64
+ return float(val)
65
+ except Exception:
66
+ s = str(val).strip()
67
+ parts = s.split(":")
68
+ try:
69
+ if len(parts) == 3:
70
+ return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
71
+ if len(parts) == 2:
72
+ return float(parts[0]) * 60 + float(parts[1])
73
+ return float(parts[0])
74
+ except Exception:
75
+ return 0.0
76
+
77
+ def make_spec(chunk, spec):
78
+ mel = librosa.feature.melspectrogram(y=chunk, sr=CFG.sr, **spec)
79
+ mel = librosa.power_to_db(mel)
80
+ mel = (mel - mel.min()) / (mel.max() - mel.min() + 1e-6)
81
+ return torch.tensor(mel, dtype=torch.float32).unsqueeze(0).repeat(3, 1, 1)
82
+
83
+ # =========================
84
+ # LOAD DATA
85
  # =========================
86
  species_df = pd.read_csv(f"{DATA_DIR}/species_list.csv")
87
  SPECIES = species_df["species"].tolist()
88
+ CFG.num_classes = len(SPECIES)
89
 
90
+ sc_df = pd.read_csv(f"{DATA_DIR}/soundscape_labels_with_folds.csv")
91
+ if "start" in sc_df.columns:
92
+ sc_df["start"] = sc_df["start"].apply(parse_time_col)
93
+ else:
94
+ sc_df["start"] = 0.0
95
+ if "end" in sc_df.columns:
96
+ sc_df["end"] = sc_df["end"].apply(parse_time_col)
 
 
97
 
98
+ print("sc_df:", sc_df.shape)
99
+ print("species:", len(SPECIES))
100
 
101
  # =========================
102
  # MODEL
 
106
  super().__init__()
107
  self.backbone = timm.create_model(backbone, pretrained=False, in_chans=3, features_only=True)
108
  fi = self.backbone.feature_info
109
+ ch = fi[-2]["num_chs"] + fi[-1]["num_chs"]
110
  self.pool = nn.AdaptiveAvgPool2d(1)
111
  self.fc = nn.Linear(ch, CFG.num_classes)
112
 
113
  def forward(self, x):
114
+ feats = self.backbone(x)
115
+ f3, f4 = feats[-2], feats[-1]
116
  if f3.shape[2:] != f4.shape[2:]:
117
+ f4 = F.interpolate(f4, size=f3.shape[2:], mode="bilinear", align_corners=False)
118
  x = torch.cat([f3, f4], 1)
119
+ x = self.pool(x).flatten(1)
120
  return self.fc(x)
121
 
122
  # =========================
123
+ # DATASET
124
  # =========================
125
  class SoundscapeDS(Dataset):
126
+ def __init__(self, df):
127
  self.df = df.reset_index(drop=True)
 
128
  self.cache = {}
129
 
130
  def __len__(self):
 
137
  wav = wav.mean(0).numpy()
138
  if sr != CFG.sr:
139
  wav = librosa.resample(wav, orig_sr=sr, target_sr=CFG.sr)
140
+ self.cache[fname] = wav.astype(np.float32)
141
  except Exception:
142
  self.cache[fname] = np.zeros(CFG.sr * 60, dtype=np.float32)
143
  return self.cache[fname]
144
 
145
+ def __getitem__(self, idx):
146
+ r = self.df.iloc[idx]
147
  wav = self.load_audio(r["filename"])
148
+ start = int(float(r["start"]) * CFG.sr)
149
  chunk = wav[start:start + CFG.n_samples]
150
  if len(chunk) < CFG.n_samples:
151
  chunk = np.pad(chunk, (0, CFG.n_samples - len(chunk)))
152
+ x_b0 = make_spec(chunk, CFG.spec_b0)
153
+ x_b3 = make_spec(chunk, CFG.spec_b3)
154
+ return x_b0, x_b3
 
 
 
155
 
156
  # =========================
157
+ # LOAD MODELS
158
  # =========================
159
+ models = []
160
+ for name in ["b0", "b3"]:
161
+ backbone = "tf_efficientnet_b0_ns" if name == "b0" else "tf_efficientnet_b3_ns"
162
+ for fold in range(5):
163
+ path = f"{MODEL_DIR}/{name}_fold{fold}.pt"
164
+ if not os.path.exists(path):
165
+ print("missing:", path)
166
+ continue
167
+ model = Model(backbone).to(CFG.device)
168
+ state = torch.load(path, map_location=CFG.device)
169
+ model.load_state_dict(state, strict=False)
170
+ model.eval()
171
+ models.append((name, model))
172
+ print("loaded:", path)
173
 
174
+ if len(models) == 0:
175
+ raise ValueError("No NB2 fold models found. Check MODEL_DIR.")
 
 
176
 
177
+ print("ensemble size:", len(models))
 
 
178
 
179
+ # =========================
180
+ # PSEUDO-LABEL INFERENCE
181
+ # =========================
182
+ ds = SoundscapeDS(sc_df)
183
+ dl = DataLoader(ds, batch_size=CFG.batch_size, shuffle=False,
184
+ num_workers=CFG.num_workers, pin_memory=True)
185
+
186
+ all_preds = []
187
  with torch.no_grad():
188
+ for bi, (x_b0, x_b3) in enumerate(dl):
189
+ x_b0 = x_b0.to(CFG.device, non_blocking=True)
190
+ x_b3 = x_b3.to(CFG.device, non_blocking=True)
191
+ logits_list = []
192
+ for name, model in models:
193
+ x = x_b0 if name == "b0" else x_b3
194
+ with autocast("cuda", dtype=torch.float16, enabled=(CFG.device == "cuda")):
195
+ logits_list.append(model(x).detach().float().cpu().numpy())
196
+ avg_logits = np.mean(logits_list, axis=0)
197
+ probs = 1.0 / (1.0 + np.exp(-avg_logits))
 
 
 
 
 
 
 
 
 
 
 
 
198
  all_preds.append(probs)
199
+ if (bi + 1) % 50 == 0:
200
+ print(f"batch {bi+1}/{len(dl)}")
201
 
202
+ preds = np.concatenate(all_preds, axis=0)
 
 
 
 
 
203
 
204
+ pseudo_soft = sc_df.copy()
 
 
 
205
  for i, sp in enumerate(SPECIES):
206
+ pseudo_soft[sp] = preds[:, i]
207
+ pseudo_soft.to_csv(f"{OUTPUT_DIR}/pseudo_labels_soft.csv", index=False)
 
 
 
208
 
209
+ pseudo_hard = sc_df.copy()
 
210
  for i, sp in enumerate(SPECIES):
211
+ pseudo_hard[sp] = (preds[:, i] > 0.5).astype(np.int8)
212
+ conf_mask = (preds > 0.5).any(axis=1)
213
+ pseudo_hard_conf = pseudo_hard[conf_mask].copy()
214
+ pseudo_hard_conf.to_csv(f"{OUTPUT_DIR}/pseudo_labels_hard_confident.csv", index=False)
215
+
216
+ print("saved:", f"{OUTPUT_DIR}/pseudo_labels_soft.csv")
217
+ print("saved:", f"{OUTPUT_DIR}/pseudo_labels_hard_confident.csv")
218
+ print("confident rows:", int(conf_mask.sum()), "/", len(sc_df))