eloise54 commited on
Commit
8234a97
Β·
1 Parent(s): b4c4828

new requirements file

Browse files
Files changed (2) hide show
  1. PCAM-pipeline.py +0 -973
  2. requirements.txt +250 -5
PCAM-pipeline.py DELETED
@@ -1,973 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- # # 🧬 PCam Dataset: Tumor Detection via Binary Image Classification
5
- #
6
- # For full dataset details, visit the official repository:
7
- # πŸ”— [github.com/basveeling/pcam](https://github.com/basveeling/pcam)
8
- #
9
- #
10
- # ## πŸ“Š Dataset Overview
11
- #
12
- # The **PatchCamelyon (PCam)** benchmark is a challenging image classification dataset designed for breast cancer detection tasks.
13
- #
14
- # - πŸ“¦ **Total images**: 327,680 color patches
15
- # - πŸ–ΌοΈ **Image size**: 96 Γ— 96 pixels
16
- # - πŸ§ͺ **Source**: Histopathologic scans of lymph node sections
17
- # - 🏷️ **Labels**: Binary β€” A positive (1) label indicates that the center 32x32px region of a patch contains at least one pixel of tumor tissue. Tumor tissue in the outer region of the patch does not influence the label.
18
- #
19
- #
20
- # ## 🧠 Solution to Implement
21
- #
22
- # In this notebook, we implement a solution inspired by the following research paper:
23
- #
24
- # > πŸ“„ [**Cancer Image Classification Based on DenseNet Model**](https://arxiv.org/abs/2011.11186)
25
- # > _by Zhong, Ziliang; Zheng, Muhang; Mai, Huafeng; Zhao, Jianan; Liu, Xinyi_
26
- #
27
- # This study explores the application of DenseNet architectures to the PCam dataset for accurate cancer classification.
28
- #
29
- # ---
30
- #
31
- # ## Results
32
- #
33
- # The submission on kaggle with the best model trained on this notebook is
34
- #
35
- # ```Score: 0.9648```
36
- # ```Private score: 0.9702```
37
- #
38
-
39
- # # 1. Load the dataset
40
- # Load the training, test and validation datasets from PCAM.
41
- #
42
- # We are going to use the kaggle version that is a cleaned version of the official PCAM dataset.
43
- #
44
- # In the kaggle version duplicates ar removed and there is no leakage between training and test datasets.
45
-
46
- # In[1]:
47
-
48
-
49
- import typing as tp
50
- import numpy as np
51
- import torch
52
- import torchvision
53
- from torch import nn
54
- from torch.utils.data import Dataset, DataLoader, ConcatDataset
55
- from torchvision.transforms import ToTensor
56
- from torchvision import datasets
57
- from torch.utils.tensorboard import SummaryWriter
58
-
59
-
60
- # We need to use GPU if available
61
-
62
- # In[2]:
63
-
64
-
65
- from torch.optim import Optimizer, lr_scheduler
66
- from torch.optim.lr_scheduler import LRScheduler
67
-
68
- if torch.cuda.is_available():
69
- device = torch.device("cuda")
70
- else:
71
- device = torch.device("cpu")
72
- print("Using device", device)
73
-
74
-
75
- # Let's download the kaggle dataset.
76
- # For this you need your credentials.
77
- # If you did not set already your ```~/.kaggle/kaggle.json``` key:
78
- # - Go to your kaggle account setting and create a new API token if needed.
79
- # - Then feel in this part with your information ```creds = '{"username":"xxxxx","key":"xxxxx"}'```
80
-
81
- # In[3]:
82
-
83
-
84
- get_ipython().system('pip install kaggle')
85
- creds = '{"username":"xxxxx","key":"xxxxx"}'
86
- from pathlib import Path
87
-
88
- cred_path = Path('~/.kaggle/kaggle.json').expanduser()
89
- if not cred_path.exists():
90
- cred_path.parent.mkdir(exist_ok=True)
91
- cred_path.write_text(creds)
92
- cred_path.chmod(0o600)
93
-
94
-
95
- # In[4]:
96
-
97
-
98
- import os
99
- import zipfile
100
-
101
- root = "data/"
102
- dataset_dir = "data/histopathologic-cancer-detection"
103
- zip_file = "histopathologic-cancer-detection.zip"
104
- train_path = os.path.join(dataset_dir, "train")
105
-
106
- if not os.path.exists(root):
107
- os.mkdir(root)
108
-
109
- if not os.path.exists('results'):
110
- os.mkdir('results')
111
-
112
- if not os.path.exists(train_path):
113
- print("Downloading Histopathologic Cancer Detection dataset...")
114
- get_ipython().system('kaggle competitions download -c histopathologic-cancer-detection -p {root} --force')
115
- else:
116
- print("Dataset zip already downloaded.")
117
-
118
- if not os.path.exists(train_path):
119
- print("Unzipping dataset...")
120
- with zipfile.ZipFile(os.path.join(root, zip_file), 'r') as zip_ref:
121
- zip_ref.extractall(dataset_dir)
122
- else:
123
- print("Dataset already unzipped.")
124
-
125
-
126
- # Know Let's create our pytorch dataset class.
127
- # I have used train_test_split from sklearn to have a stratified dataset (The kaggle PCAM dataset is unbalanced)
128
-
129
- # In[5]:
130
-
131
-
132
- from sklearn.model_selection import train_test_split
133
- from PIL import Image
134
- import pandas as pd
135
-
136
- class PcamDatasetKaggle(torchvision.datasets.VisionDataset):
137
- def __init__(self, root, split, transform, target_transform = None):
138
- super().__init__(root, transform=transform, target_transform=target_transform)
139
- self.root = root
140
- self.split = split
141
- self.transform = transform
142
- self.img_path = os.path.join(self.root, "train")
143
-
144
- self.full_labels = pd.read_csv(self.root+'/train_labels.csv')
145
- X_train, X_test, y_train, y_test = train_test_split(self.full_labels['id'],
146
- self.full_labels['label'],
147
- test_size = 0.2,
148
- train_size = 0.8,
149
- random_state=30,
150
- shuffle=True,
151
- stratify=self.full_labels['label'])
152
-
153
- if (split == "train"):
154
- self.imgs = X_train + ".tif"
155
- self.labels = y_train
156
- elif (split == "val"):
157
- self.imgs = X_test + ".tif"
158
- self.labels = y_test
159
- else:
160
- self.img_path = os.path.join(self.root, self.split)
161
- self.imgs = pd.Series(list(sorted(os.listdir(self.img_path))))
162
- self.labels = pd.Series(torch.full((len(self.imgs),), -10))
163
- assert len(self.labels) == len(self.imgs)
164
- print("Split", split, "Negative/Positive samples % " , 100.0*(self.labels.value_counts() / self.labels.shape[0]))
165
-
166
- def __getitem__(self, idx):
167
- assert idx < len(self.imgs)
168
- img_pil = Image.open(os.path.join(self.img_path, self.imgs.iloc[idx]))
169
- img = self.transform(img_pil)
170
- label = self.labels.iloc[idx]
171
- return img, label
172
- def __len__(self) :
173
- return len(self.imgs)
174
-
175
- def check_dataset_leakage(dataset1, dataset2):
176
- duplicates = set(dataset1.imgs) & set(dataset2.imgs)
177
- assert len(duplicates) == 0
178
-
179
- def check_same_imgs(dataset1, dataset2):
180
- duplicates = set(dataset1.imgs) & set(dataset2.imgs)
181
- assert len(duplicates) == len(dataset1.imgs)
182
- assert len(duplicates) == len(dataset2.imgs)
183
-
184
-
185
- # Let's define some transforms for dataloading and data augmentation
186
- #
187
- # An improvment could be to use [albumentation](https://albumentations.ai/) to define a more refined ```transform_data_augment```
188
-
189
- # In[6]:
190
-
191
-
192
- import torchvision.transforms as transforms
193
-
194
- torch.manual_seed(30)
195
- torch.cuda.manual_seed_all(30)
196
-
197
- # Preprocess images with transforms
198
- transform = transforms.Compose([
199
- transforms.Resize((224, 224)), #Match resnet original input size
200
- transforms.ToTensor()
201
- ])
202
-
203
- # For augmenting data
204
- transform_data_augment = transforms.Compose([
205
- transforms.Resize((300, 300)),
206
- transforms.RandomHorizontalFlip(),
207
- transforms.RandomVerticalFlip(),
208
- transforms.GaussianBlur(kernel_size = (5,5),sigma=(0.1, 0.5)),
209
- transforms.RandomRotation(degrees=25),
210
- transforms.ColorJitter(
211
- brightness=0.1,
212
- contrast=0.1,
213
- saturation=0.01,
214
- hue=0.005
215
- ),
216
- transforms.CenterCrop((224, 224)),
217
- transforms.RandomResizedCrop(size = (224, 224), scale = (0.9, 1.0)),
218
- transforms.ToTensor()
219
- ])
220
-
221
-
222
-
223
- # In[7]:
224
-
225
-
226
- from copy import deepcopy
227
-
228
- """ PCAM pytorch version but the dataset is not clean
229
- training_set_original = datasets.PCAM(root="data", split="train",download = True, transform = transform)
230
- training_set_augment = datasets.PCAM(root="data", split="train",download = True, transform = transform_data_augment)
231
- val_set = datasets.PCAM(root="data", split="val", download=True, transform = transform)
232
- test_set = datasets.PCAM(root="data", split="test", download=True, transform = transform)
233
- """
234
-
235
- training_set_original = PcamDatasetKaggle(root=dataset_dir, split="train", transform = deepcopy(transform))
236
- training_set_augment = PcamDatasetKaggle(root=dataset_dir, split="train", transform = deepcopy(transform_data_augment))
237
-
238
- val_set = PcamDatasetKaggle(root=dataset_dir, split="val", transform = deepcopy(transform))
239
- val_set_augment = PcamDatasetKaggle(root=dataset_dir, split="val", transform = deepcopy(transform_data_augment))
240
-
241
- test_set = PcamDatasetKaggle(root=dataset_dir, split="test", transform = deepcopy(transform))
242
- test_set_augment = PcamDatasetKaggle(root=dataset_dir, split="test", transform = deepcopy(transform_data_augment)) #For TTA
243
-
244
- check_dataset_leakage(training_set_original, val_set)
245
- check_dataset_leakage(training_set_original, test_set)
246
- check_dataset_leakage(val_set, test_set)
247
- check_same_imgs(training_set_original, training_set_augment)
248
- check_same_imgs(val_set, val_set_augment)
249
- check_same_imgs(test_set, test_set_augment)
250
-
251
-
252
- # # 2. Plot and visualize original and augmented the data
253
- # Each (3,96,96) image is associated with a binary label indicates the presence of a tumor.
254
- #
255
- # Let's define a function to plot some images with their label.
256
- #
257
- # Let's save the plots in an experiment directory for logging purposes
258
- #
259
-
260
- # In[8]:
261
-
262
-
263
- import matplotlib.pyplot as plt
264
-
265
- def plot_training_set_sample(training_set,
266
- file_name = "results/pcam/data.png",
267
- rows = 5,
268
- cols = 5,
269
- mean_stdev = torch.Tensor([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]])):
270
- mean = mean_stdev[0].numpy()
271
- std = mean_stdev[1].numpy()
272
- fig = plt.figure(figsize=(2*cols, 2*rows))
273
- for i in range(1, rows*cols + 1):
274
- random_idx = torch.randint(len(training_set), (1,)).item()
275
- fig.add_subplot(rows, cols, i)
276
- img = training_set[random_idx][0].permute(1,2,0).numpy()
277
- img_unnormalized = img*std + mean
278
- img_unnormalized = np.clip(img_unnormalized, 0, 1)
279
- plt.imshow(img_unnormalized)
280
- plt.axis("off")
281
- plt.title(training_set[random_idx][1])
282
- plt.savefig(file_name)
283
- plt.show()
284
-
285
-
286
-
287
- # In[9]:
288
-
289
-
290
- import os
291
- from datetime import datetime
292
- exp_dir = "results/pcam/"+datetime.now().strftime("%d_%m_%Y_%H_%M_%S")
293
- os.mkdir(exp_dir)
294
-
295
-
296
- # In[10]:
297
-
298
-
299
- print("Original Training Set")
300
- plot_training_set_sample(training_set_original, exp_dir + "/training_set_original.png",rows=2, cols=5)
301
-
302
-
303
- # In[11]:
304
-
305
-
306
- print("Augmented Training Set")
307
- plot_training_set_sample(training_set_augment, exp_dir + "/training_set_augment.png",rows=2, cols=5)
308
-
309
-
310
- # # 3.Normalize and create augmented dataset
311
-
312
- # Let's create a function that computes mean, standard deviation and class balance for a pytorch DataLoader.
313
- #
314
- # Normalize the datasets accordingly
315
-
316
- # In[12]:
317
-
318
-
319
- def compute_dataset_mean_stdev_class_balance(dataloader: DataLoader, device: torch.cuda.device) -> tp.List[float]:
320
- mean = 0.0
321
- stdev = 0.0
322
- y_full = torch.Tensor([]).to(device)
323
- for batch, (X,y) in enumerate(dataloader):
324
- X = X.to(device)
325
- y = y.to(device)
326
- batch_samples = X.size(0)
327
- mean += torch.mean(X, dim = (0,2,3)) * batch_samples
328
- stdev += torch.std(X, dim = (0,2,3)) * batch_samples
329
- y_full = torch.cat([y_full, y])
330
- positive_labels = (y_full == torch.Tensor([1]).to(device)).sum()
331
- negative_labels = (y_full == torch.Tensor([0]).to(device)).sum()
332
- return [mean.detach().cpu() / len(dataloader.dataset), stdev.detach().cpu() / len(dataloader.dataset)], positive_labels.detach().cpu(), negative_labels.detach().cpu()
333
-
334
-
335
-
336
- # In[13]:
337
-
338
-
339
- # Create DataLoader
340
- batch_size = 128
341
- training_set_original_dataloader = DataLoader(training_set_original, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=6, persistent_workers = True)
342
- training_set_augment_dataloader = DataLoader(training_set_augment, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=6, persistent_workers = True)
343
-
344
- # Compute Mean and Std to normalize images if not already done
345
- COMPUTE_NORMALIZATION_AGAIN = False
346
-
347
- mean_stdev_original =[torch.Tensor([0.7022, 0.5459, 0.6962]), torch.Tensor([0.2218, 0.2668, 0.1982])]
348
- mean_stdev_augment = [torch.Tensor([0.6939, 0.5397, 0.6904]), torch.Tensor([0.2225, 0.2661, 0.1988])]
349
-
350
- pos = 71294
351
- neg = 104726
352
- apos = 71294
353
- aneg = 104726
354
-
355
- if (COMPUTE_NORMALIZATION_AGAIN):
356
- mean_stdev_original, pos, neg = compute_dataset_mean_stdev_class_balance(training_set_original_dataloader, device)
357
- mean_stdev_augment, apos, aneg = compute_dataset_mean_stdev_class_balance(training_set_augment_dataloader, device)
358
-
359
-
360
- def combine_std(mean1_stdev1: torch.torch.Tensor, mean2_stdev2: torch.Tensor):
361
- mean1, stdev1 = mean1_stdev1[0], mean1_stdev1[1]
362
- mean2, stdev2 = mean2_stdev2[0], mean2_stdev2[1]
363
-
364
- mean3 = (mean1 + mean2) * 0.5
365
-
366
- var1 = stdev1 ** 2
367
- var2 = stdev2 ** 2
368
- var3 = 0.5 * (var1 + (mean1 - mean3) ** 2 + var2 + (mean2 - mean3) ** 2)
369
-
370
- stdev3 = torch.sqrt(var3)
371
- return [mean3, stdev3]
372
-
373
- new_mean_stdev = combine_std(mean_stdev_original, mean_stdev_augment)
374
- new_mean_stdev = torch.stack(new_mean_stdev).cpu().detach()
375
-
376
- print("Normalization done with")
377
- print("training_set [mean, stdev]: ", new_mean_stdev)
378
-
379
- training_set_original_transform = transforms.Compose([*training_set_original.transforms.transform.transforms,
380
- transforms.Normalize(new_mean_stdev[0], new_mean_stdev[1])])
381
-
382
- training_set_augment_transform = transforms.Compose([*training_set_augment.transforms.transform.transforms,
383
- transforms.Normalize(new_mean_stdev[0], new_mean_stdev[1])])
384
-
385
-
386
- training_set_original = PcamDatasetKaggle(root=dataset_dir, split="train", transform = deepcopy(training_set_original_transform))
387
- training_set_augment = PcamDatasetKaggle(root=dataset_dir, split="train", transform = deepcopy(training_set_augment_transform))
388
- val_set = PcamDatasetKaggle(root=dataset_dir, split="val", transform = deepcopy(training_set_original_transform))
389
- val_set_augment = PcamDatasetKaggle(root=dataset_dir, split="val", transform = deepcopy(training_set_augment_transform))
390
- test_set = PcamDatasetKaggle(root=dataset_dir, split="test", transform = deepcopy(training_set_original_transform))
391
-
392
-
393
- # Create Augmented Training Dataset
394
- training_set = ConcatDataset([training_set_original, training_set_augment])
395
-
396
- # Create Final DataLoaders
397
- training_dataloader = DataLoader(training_set, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=6, persistent_workers = True)
398
- val_dataloader = DataLoader(val_set, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=6, persistent_workers = True)
399
- val_dataloader_augment = DataLoader(val_set_augment, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=6, persistent_workers = True)
400
- test_dataloader = DataLoader(test_set, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=6, persistent_workers = True)
401
-
402
-
403
- # In[14]:
404
-
405
-
406
- print("Full Training Set Normalized")
407
- plot_training_set_sample(training_set, exp_dir + "/training_set_final.png", rows = 2, cols = 5, mean_stdev=new_mean_stdev)
408
-
409
-
410
- # # 3. Defining a training loop over one epoch and a metric
411
- # The dataset is not balance thus it is better to use roc_auc_score than accuracy
412
-
413
- # In[15]:
414
-
415
-
416
- def compute_metrics(full_y: torch.Tensor,
417
- full_logits: torch.Tensor,
418
- full_pred: torch.Tensor,
419
- sk_learn_metrics_logits: tp.List[tp.Callable],
420
- sk_learn_metrics_pred: tp.List[tp.Callable]) -> tp.Dict:
421
- full_y = full_y.detach().cpu().numpy()
422
- full_logits = torch.sigmoid(full_logits).detach().cpu().numpy()
423
- full_pred = full_pred.detach().cpu().numpy()
424
-
425
- results = {}
426
- for metric in sk_learn_metrics_logits:
427
- results[metric.__name__] = metric(full_y, full_logits)
428
- for metric in sk_learn_metrics_pred:
429
- results[metric.__name__] = metric(full_y, full_pred)
430
- return results
431
-
432
-
433
- # In[16]:
434
-
435
-
436
- def run_one_epoch(model : nn.Module,
437
- training_dataloader: DataLoader,
438
- optimizer: Optimizer,
439
- loss_function: nn.Module,
440
- scheduler : LRScheduler,
441
- device: torch.cuda.device,
442
- writer: SummaryWriter,
443
- epoch: int,
444
- sk_learn_metrics_logits: tp.List[tp.Callable],
445
- sk_learn_metrics_pred: tp.List[tp.Callable],
446
- threshold: float = 0.5):
447
- running_loss = 0.0
448
- num_batch = len(training_dataloader)
449
- full_y = torch.Tensor([]).to(device)
450
- full_logits = torch.Tensor([]).to(device)
451
- full_pred = torch.Tensor([]).to(device)
452
-
453
- model.train()
454
- scaler = torch.amp.GradScaler("cuda")
455
- for batch, (X, y) in enumerate(training_dataloader):
456
- optimizer.zero_grad()
457
- X = X.to(device, non_blocking=True)
458
- y = y.to(device, non_blocking=True)
459
- with torch.amp.autocast("cuda"):
460
- logits = model(X).squeeze()
461
- loss = loss_function(logits, y.float())
462
- scaler.scale(loss).backward()
463
- scaler.step(optimizer)
464
- scaler.update()
465
-
466
- with torch.no_grad():
467
- preds = (torch.sigmoid(logits) > threshold).float()
468
- full_y = torch.cat([full_y, y])
469
- full_logits = torch.cat([full_logits, logits])
470
- full_pred = torch.cat([full_pred, preds])
471
-
472
- running_loss += loss.item()
473
- avg_loss = running_loss / (batch + 1.)
474
- if batch % 250 == 0:
475
- writer.add_scalar('Training Loss(avg)', avg_loss, batch + epoch*num_batch)
476
- writer.add_scalar('Training Loss (raw)', loss.item(), batch + epoch*num_batch)
477
- scheduler.step()
478
- writer.flush()
479
- return compute_metrics(full_y, full_logits, full_pred, sk_learn_metrics_logits, sk_learn_metrics_pred)
480
-
481
-
482
- # In[17]:
483
-
484
-
485
- def eval_model(model: nn.Module,
486
- dataloader: DataLoader,
487
- sk_learn_metrics_logits: tp.List[tp.Callable],
488
- sk_learn_metrics_pred: tp.List[tp.Callable],
489
- device: torch.cuda.device,
490
- threshold: float = 0.5) -> tp.Dict:
491
-
492
- model.eval()
493
- full_y = torch.Tensor([]).to(device)
494
- full_logits = torch.Tensor([]).to(device)
495
- full_pred = torch.Tensor([]).to(device)
496
-
497
- with torch.no_grad():
498
- for X, y in dataloader:
499
- X = X.to(device)
500
- y = y.to(device)
501
- logits = model(X).squeeze()
502
- preds = (torch.sigmoid(logits) > threshold).float()
503
-
504
- full_y = torch.cat([full_y, y])
505
- full_logits = torch.cat([full_logits, logits])
506
- full_pred = torch.cat([full_pred, preds])
507
- return compute_metrics(full_y, full_logits, full_pred, sk_learn_metrics_logits, sk_learn_metrics_pred)
508
-
509
-
510
- # # 4.Β Setup tensorboard for monitoring
511
-
512
- # In[18]:
513
-
514
-
515
- import threading
516
- import tensorboard
517
- from tensorboard import program
518
-
519
- def start_tensorboard(logdir):
520
- tb = program.TensorBoard()
521
- tb.configure(argv=[None, '--logdir', logdir])
522
- url = tb.launch()
523
- print(f"TensorBoard is running at {url}")
524
-
525
- # Replace 'logs' with your actual log directory
526
- logdir = exp_dir
527
- tb_thread = threading.Thread(target=start_tensorboard, args=(logdir,), daemon=True)
528
- tb_thread.start()
529
-
530
-
531
- # In[19]:
532
-
533
-
534
- from PIL import Image
535
-
536
- def load_image(path):
537
- img = Image.open(path)
538
- # Convert to numpy array and add batch dimension (C, H, W)
539
- img_array = np.array(img)
540
- if len(img_array.shape) == 2: # Grayscale image
541
- img_array = np.expand_dims(img_array, axis=0) # (1, H, W)
542
- else: # Color image
543
- img_array = img_array.transpose(2, 0, 1) # (C, H, W)
544
- return img_array
545
-
546
- writer = SummaryWriter(exp_dir + '/tensorboard')
547
- writer.add_image('training_set_original', load_image(exp_dir + "/training_set_original.png"), 0)
548
- writer.flush()
549
- writer.add_image('training_set_augment', load_image(exp_dir + "/training_set_augment.png"), 0)
550
- writer.flush()
551
- writer.add_image('training_set_final', load_image(exp_dir + "/training_set_final.png"), 0)
552
- writer.flush()
553
-
554
-
555
- # # 5. Find best learning rate
556
- #
557
- # > πŸ“„ [**Cancer Image Classification Based on DenseNet Model**](https://arxiv.org/abs/2011.11186)
558
- # > _by Zhong, Ziliang; Zheng, Muhang; Mai, Huafeng; Zhao, Jianan; Liu, Xinyi_
559
- #
560
- # Suggest to use a learning rate lr = 1e-4 for densenet201.
561
- #
562
- # You can also plot the loss with respect to the lr evaluated on a few batches.
563
- #
564
- # It gives insight on which lr to take: between 1e-4 and 1e-3
565
-
566
- # In[20]:
567
-
568
-
569
- from torchvision.models import densenet201, DenseNet201_Weights
570
- model = densenet201(weights=DenseNet201_Weights.DEFAULT)
571
-
572
- for params in model.parameters():
573
- params.requires_grad = False
574
-
575
- model.classifier = nn.Sequential(nn.Linear(1920, 1, bias= True))
576
-
577
- for param in model.classifier.parameters():
578
- param.requires_grad = True
579
-
580
- model = model.to(device)
581
-
582
- def custom_lr_find(model : nn.Module,
583
- dataloader: DataLoader,
584
- loss_function: nn.Module,
585
- device: str,
586
- start_lr = 1e-7,
587
- end_lr = 1.0,
588
- num_iteration = 200):
589
- rates = []
590
- lossses = []
591
- model = model.to(device)
592
- optimizer = torch.optim.Adam(model.parameters(),lr=start_lr)
593
-
594
-
595
- def lr_lambda(iteration):
596
- return (end_lr / start_lr) ** (iteration / num_iteration)
597
-
598
- scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda)
599
- initial_weights = model.state_dict()
600
- model.train()
601
-
602
- X_full = torch.Tensor([]).to(device)
603
- y_full = torch.Tensor([]).to(device)
604
-
605
- for h in range (0, 5):
606
- X, y = next(iter(dataloader))
607
- X = X.to(device)
608
- y = y.to(device)
609
- X_full = torch.cat([X_full, X])
610
- y_full = torch.cat([y_full, y])
611
-
612
- for i in range(0, num_iteration):
613
- optimizer.zero_grad()
614
-
615
- pred = model(X_full).squeeze()
616
- loss = loss_function(pred, y_full.float())
617
- lossses.append(loss.item())
618
- rates.append(scheduler.get_last_lr()[0])
619
- loss.backward()
620
- optimizer.step()
621
- scheduler.step()
622
- model.load_state_dict(initial_weights)
623
- if(scheduler.get_last_lr()[0] > end_lr):
624
- break
625
- return rates, lossses
626
-
627
- def plot_lr_find(rates, losses, file_name):
628
- fig = plt.Figure()
629
- plt.plot(rates, losses)
630
- plt.xscale('log')
631
- plt.xlabel('learning_rate')
632
- plt.ylabel('loss')
633
- plt.ylim(0.0, 1.0)
634
- plt.title('lr_find_results')
635
- plt.legend()
636
- plt.savefig(file_name)
637
- plt.figure()
638
-
639
- pos_weight = torch.Tensor([float(neg) / float(pos)]).to(device)# models class imbalance.
640
- #rates, losses = custom_lr_find(model, training_dataloader, torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight), device)
641
- rates, losses = custom_lr_find(model, training_dataloader, torch.nn.BCEWithLogitsLoss(), device)
642
- plot_lr_find(rates, losses, exp_dir + '/lr_find.jpg')
643
- writer.add_image('lr_find', load_image(exp_dir + "/lr_find.jpg"), 0)
644
- writer.flush()
645
-
646
-
647
- # # 6. Using already trained networks: Train the head only
648
- #
649
- # First train the head and freeze all other layers
650
-
651
- # In[21]:
652
-
653
-
654
- from torchvision.models import densenet201, DenseNet201_Weights, densenet121, DenseNet121_Weights
655
- model = densenet201(weights=DenseNet201_Weights.DEFAULT)
656
-
657
- for params in model.parameters():
658
- params.requires_grad = False
659
-
660
- #Replace the last layer (to output a 1d prediction)
661
- model.classifier = nn.Sequential(nn.Linear(model.classifier.in_features, 1, bias= True))
662
-
663
- for param in model.classifier.parameters():
664
- param.requires_grad = True
665
-
666
- model = model.to(device)
667
-
668
-
669
- # In[22]:
670
-
671
-
672
- #optionnaly load from checkpoint
673
- """
674
- model = torch.load('results/pcam/14_06_2025_10_25_48/model_'+str(19)+'.pt', weights_only = False)
675
- for params in model.parameters():
676
- params.requires_grad = False
677
- for param in model.classifier.parameters():
678
- param.requires_grad = True
679
- model = model.to(device)
680
- """
681
-
682
-
683
- # In[23]:
684
-
685
-
686
- lr = 1e-4
687
-
688
- optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
689
- #loss_func = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)
690
- loss_func = torch.nn.BCEWithLogitsLoss()
691
- scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.01)
692
-
693
-
694
- # In[24]:
695
-
696
-
697
- from sklearn.metrics import classification_report, roc_auc_score, f1_score, precision_score, recall_score, accuracy_score, classification_report
698
- import time
699
- epoch_num = 2
700
- sk_learn_metrics_logits = [roc_auc_score]
701
- sk_learn_metrics_pred = [f1_score, accuracy_score]
702
- for i in range(0, epoch_num):
703
- start_time = time.time()
704
- train_res = run_one_epoch(model,
705
- training_dataloader,
706
- optimizer,
707
- loss_func,
708
- scheduler,
709
- device,
710
- writer,
711
- i,
712
- sk_learn_metrics_logits,
713
- sk_learn_metrics_pred)
714
- end_time = time.time()
715
- print("epoch nΒ°: ", i, " training time : ", end_time-start_time, " sec")
716
- start_time = time.time()
717
- val_res = eval_model(model, val_dataloader, sk_learn_metrics_logits, sk_learn_metrics_pred, device)
718
- for key in train_res.keys():
719
- writer.add_scalars(key, {"Train " + key: train_res[key], "Val "+ key : val_res[key]}, i*len(training_dataloader))
720
- end_time = time.time()
721
- print("epoch nΒ°: ", i, " evaluation time : ", end_time-start_time, " sec")
722
- torch.save(model, exp_dir+"/model_" + str(i) + ".pt")
723
-
724
-
725
- # # 7. Using already trained networks: Fine Tune a few layers
726
- # I did not use it in the end, this is optional
727
-
728
- # In[25]:
729
-
730
-
731
- '''
732
- for name, param in model.features.denseblock4.denselayer32.conv1.named_parameters():
733
- param.requires_grad = True
734
-
735
- for name, param in model.features.denseblock4.denselayer32.conv2.named_parameters():
736
- param.requires_grad = True
737
- '''
738
-
739
-
740
- # In[26]:
741
-
742
-
743
- # Unfreeze last two blocks (features.6 and features.7)
744
- '''
745
- lr = 1e-4
746
- #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
747
- #loss_func = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)
748
- loss_func = torch.nn.BCEWithLogitsLoss()
749
- # Use lower LR for fine-tuning
750
- optimizer = torch.optim.Adam([
751
- {"params": model.classifier.parameters(), "lr": 1e-4},
752
- {"params": model.features.denseblock4.denselayer32.conv1.parameters(), "lr": 1e-5},
753
- {"params": model.features.denseblock4.denselayer32.conv2.parameters(), "lr": 1e-5},
754
- ])
755
- '''
756
-
757
-
758
- # In[27]:
759
-
760
-
761
- '''
762
- from sklearn.metrics import classification_report, roc_auc_score, f1_score, precision_score, recall_score, accuracy_score, classification_report
763
- import time
764
- sk_learn_metrics_logits = [roc_auc_score]
765
- sk_learn_metrics_pred = [f1_score, accuracy_score]
766
- epoch_num = 2
767
- finetune_epoch_num = 6
768
- for i in range(epoch_num, epoch_num + finetune_epoch_num):
769
- start_time = time.time()
770
- train_res = run_one_epoch(model,
771
- training_dataloader,
772
- optimizer,
773
- loss_func,
774
- scheduler,
775
- device,
776
- writer,
777
- i,
778
- sk_learn_metrics_logits,
779
- sk_learn_metrics_pred)
780
- end_time = time.time()
781
- print("epoch nΒ°: ", i, " training time : ", end_time-start_time, " sec")
782
- start_time = time.time()
783
- val_res = eval_model(model, val_dataloader, sk_learn_metrics_logits, sk_learn_metrics_pred, device)
784
- for key in train_res.keys():
785
- writer.add_scalars(key, {"Train " + key: train_res[key], "Val "+ key : val_res[key]}, i*len(training_dataloader))
786
- end_time = time.time()
787
- print("epoch nΒ°: ", i, " evaluation time : ", end_time-start_time, " sec")
788
- torch.save(model, exp_dir+"/model_" + str(i) + ".pt")
789
-
790
- '''
791
-
792
-
793
- # # 8. Fine tune the entire model
794
-
795
- # In[28]:
796
-
797
-
798
- for params in model.parameters():
799
- params.requires_grad = True
800
-
801
- lr = 1e-5
802
- optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
803
- #loss_func = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)
804
- loss_func = torch.nn.BCEWithLogitsLoss()
805
- scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.01)
806
-
807
-
808
- # In[ ]:
809
-
810
-
811
- from sklearn.metrics import classification_report, roc_auc_score, f1_score, precision_score, recall_score, accuracy_score, classification_report
812
- import time
813
- sk_learn_metrics_logits = [roc_auc_score]
814
- sk_learn_metrics_pred = [f1_score, accuracy_score]
815
- epoch_num = 2
816
- finetune_epoch_num = 4
817
- for i in range(epoch_num, epoch_num + finetune_epoch_num):
818
- start_time = time.time()
819
- train_res = run_one_epoch(model,
820
- training_dataloader,
821
- optimizer,
822
- loss_func,
823
- scheduler,
824
- device,
825
- writer,
826
- i,
827
- sk_learn_metrics_logits,
828
- sk_learn_metrics_pred)
829
- end_time = time.time()
830
- print("epoch nΒ°: ", i, " training time : ", end_time-start_time, " sec")
831
- start_time = time.time()
832
- val_res = eval_model(model, val_dataloader, sk_learn_metrics_logits, sk_learn_metrics_pred, device)
833
- for key in train_res.keys():
834
- writer.add_scalars(key, {"Train " + key: train_res[key], "Val "+ key : val_res[key]}, i*len(training_dataloader))
835
- end_time = time.time()
836
- print("epoch nΒ°: ", i, " evaluation time : ", end_time-start_time, " sec")
837
- torch.save(model, exp_dir+"/model_" + str(i) + ".pt")
838
-
839
-
840
- # # 9. Compute test set prediction and submit to kaggle
841
- #
842
- # We will use TTA (Test Time with Augmentation).
843
- # We can also optionally use several models to make a prediction and average the results
844
-
845
- # In[30]:
846
-
847
-
848
- def run_inference(model: nn.Module,
849
- dataloader: DataLoader,
850
- device: torch.cuda.device):
851
-
852
- model.eval()
853
- full_y = torch.Tensor([]).to(device)
854
- full_logits = torch.Tensor([]).to(device)
855
-
856
- with torch.no_grad():
857
- for X, y in dataloader:
858
- X = X.to(device)
859
- y = y.to(device)
860
- logits = model(X).squeeze()
861
-
862
- full_y = torch.cat([full_y, y])
863
- full_logits = torch.cat([full_logits, logits])
864
-
865
- return full_y, full_logits
866
-
867
-
868
- # In[54]:
869
-
870
-
871
- models_paths = ['results/pcam/17_06_2025_12_19_40/model_5.pt']
872
-
873
- # First create tta_num augmented dataloaders
874
- tta_num = 5
875
- logits = []
876
- for i in range(0, tta_num):
877
- test_set_augment = PcamDatasetKaggle(root=dataset_dir, split="test", transform = deepcopy(transform_data_augment)) #For TTA
878
- test_dataloader_augment = DataLoader(test_set_augment, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=6, persistent_workers = True)
879
- for model in models_paths:
880
- pcam_model = torch.load(models_paths[0], weights_only = False)
881
- pcam_model = pcam_model.to(device)
882
- test_y, test_logits = run_inference(pcam_model, test_dataloader, device)
883
- logits.append(test_logits)
884
- test_y_augm, test_logits_aum = run_inference(pcam_model, test_dataloader_augment, device)
885
- logits.append(test_logits_aum)
886
-
887
-
888
- # In[55]:
889
-
890
-
891
- # Average logits
892
- logits_stacked = torch.stack(logits)
893
- mean_logits = torch.mean(logits_stacked, dim = 0, keepdims=True)
894
-
895
-
896
- # In[56]:
897
-
898
-
899
- #Create submission file with final predictions
900
- image_ids = [img.replace('.tif', '') for img in test_set.imgs.tolist()]
901
- test_preds = torch.sigmoid(mean_logits)
902
-
903
- submission_df = pd.DataFrame({
904
- 'id': image_ids,
905
- 'label': test_preds.squeeze().detach().cpu().numpy()
906
- })
907
-
908
- submission_df.to_csv(exp_dir+'/submission.csv', index=False)
909
-
910
-
911
- # In[57]:
912
-
913
-
914
- sub_path = exp_dir + '/submission.csv'
915
- get_ipython().system('kaggle competitions submit -c histopathologic-cancer-detection -f {sub_path} -m "DenseNet201 + correct normalization + no ensemble, no 42*42 crop pytorch "')
916
-
917
-
918
- # # 11. Find best threshold for prediction on validation set
919
-
920
- # In[40]:
921
-
922
-
923
- models_paths = ['results/pcam/17_06_2025_12_19_40/model_4.pt']
924
- pcam_model = torch.load(models_paths[0], weights_only = False)
925
- pcam_model = pcam_model.to(device)
926
- test_y, test_logits = run_inference(pcam_model, val_dataloader, device)
927
- test_y_augment, test_logits_augment = run_inference(pcam_model, val_dataloader_augment, device)
928
- full_y = torch.cat([test_y, test_y_augment])
929
- full_logits = torch.cat([test_logits, test_logits_augment])
930
-
931
-
932
- # In[41]:
933
-
934
-
935
- from sklearn.metrics import roc_curve, auc
936
- fpr, tpr, thresholds = roc_curve(full_y.detach().cpu().numpy(), torch.sigmoid(full_logits).detach().cpu().numpy())
937
- roc_auc = auc(fpr, tpr)
938
-
939
-
940
- # In[42]:
941
-
942
-
943
- plt.figure(figsize=(8,6))
944
- plt.plot(fpr, tpr, color='orange', lw=2, label=f'ROC curve (AUC = {roc_auc})')
945
- plt.xlim([0.0, 1.0])
946
- plt.ylim([0.0, 1.0])
947
- plt.xlabel('False Positive Rate')
948
- plt.ylabel('True Positive Rate')
949
- plt.title('Receiver Operating Characteristic')
950
- plt.grid(alpha=0.3)
951
- plt.show()
952
-
953
-
954
- # In[43]:
955
-
956
-
957
- # Find best threshold index (maximize TPR-FPR).
958
- j_scores = tpr - fpr
959
- best_idx = np.argmax(j_scores)
960
- best_threshold = thresholds[best_idx]
961
-
962
-
963
- # In[44]:
964
-
965
-
966
- best_threshold
967
-
968
-
969
- # In[ ]:
970
-
971
-
972
-
973
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,8 +1,253 @@
1
- gradio==5.34.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  matplotlib==3.10.3
3
- numpy==2.3.0
4
- pandas==2.3.0
5
- Pillow==11.2.1
6
- scikit_learn==1.7.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  torch==2.7.0
8
  torchvision==0.22.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.3.0
2
+ aiofiles==24.1.0
3
+ aiohappyeyeballs==2.6.1
4
+ aiohttp==3.12.2
5
+ aiosignal==1.3.2
6
+ albucore==0.0.24
7
+ albumentations==2.0.8
8
+ annotated-types==0.7.0
9
+ anyio==4.9.0
10
+ argon2-cffi==23.1.0
11
+ argon2-cffi-bindings==21.2.0
12
+ arrow==1.3.0
13
+ asttokens==3.0.0
14
+ astunparse==1.6.3
15
+ async-lru==2.0.5
16
+ attrs==25.3.0
17
+ azure-cognitiveservices-search-imagesearch==2.0.1
18
+ azure-common==1.1.28
19
+ azure-core==1.34.0
20
+ azure-mgmt-core==1.5.0
21
+ babel==2.17.0
22
+ backcall==0.2.0
23
+ beartype==0.21.0
24
+ beautifulsoup4==4.13.4
25
+ bleach==6.2.0
26
+ blessed==1.21.0
27
+ blis==1.3.0
28
+ catalogue==2.0.10
29
+ certifi==2025.4.26
30
+ cffi==1.17.1
31
+ charset-normalizer==3.4.2
32
+ click==8.2.1
33
+ cloudpathlib==0.21.1
34
+ cloudpickle==3.1.1
35
+ comm==0.2.2
36
+ confection==0.1.5
37
+ contourpy==1.3.2
38
+ cycler==0.12.1
39
+ cymem==2.0.11
40
+ datasets==3.6.0
41
+ debugpy==1.8.14
42
+ decorator==5.2.1
43
+ defusedxml==0.7.1
44
+ dill==0.3.8
45
+ docopt==0.6.2
46
+ execnb==0.1.14
47
+ executing==2.2.0
48
+ fastai==2.8.2
49
+ fastapi==0.115.12
50
+ fastbook==0.0.29
51
+ fastcore==1.8.2
52
+ fastdownload==0.0.7
53
+ fastjsonschema==2.21.1
54
+ fastprogress==1.0.3
55
+ fasttransform==0.0.2
56
+ ffmpy==0.6.0
57
+ filelock==3.18.0
58
+ fonttools==4.58.1
59
+ fqdn==1.5.1
60
+ frozenlist==1.6.0
61
+ fsspec==2025.3.0
62
+ gdown==5.2.0
63
+ ghapi==1.0.6
64
+ gpustat==1.1.1
65
+ gradio==5.33.1
66
+ gradio_client==1.10.3
67
+ graphviz==0.20.3
68
+ groovy==0.1.2
69
+ grpcio==1.72.1
70
+ h11==0.16.0
71
+ h5py==3.13.0
72
+ hf-xet==1.1.2
73
+ httpcore==1.0.9
74
+ httpx==0.28.1
75
+ huggingface-hub==0.32.2
76
+ idna==3.10
77
+ importlib_metadata==8.7.0
78
+ ipykernel==6.29.5
79
+ ipython==8.12.3
80
+ ipython-genutils==0.2.0
81
+ ipython_pygments_lexers==1.1.1
82
+ ipywidgets==7.8.5
83
+ isodate==0.7.2
84
+ isoduration==20.11.0
85
+ jedi==0.19.2
86
+ Jinja2==3.1.6
87
+ joblib==1.5.1
88
+ json5==0.12.0
89
+ jsonpointer==3.0.0
90
+ jsonschema==4.24.0
91
+ jsonschema-specifications==2025.4.1
92
+ jupyter-events==0.12.0
93
+ jupyter-lsp==2.2.5
94
+ jupyter_client==8.6.3
95
+ jupyter_core==5.8.1
96
+ jupyter_server==2.16.0
97
+ jupyter_server_terminals==0.5.3
98
+ jupyterlab==4.4.3
99
+ jupyterlab_pygments==0.3.0
100
+ jupyterlab_server==2.27.3
101
+ jupyterlab_widgets==1.1.11
102
+ kaggle==1.7.4.5
103
+ kagglehub==0.3.12
104
+ kiwisolver==1.4.8
105
+ langcodes==3.5.0
106
+ language_data==1.3.0
107
+ marisa-trie==1.2.1
108
+ Markdown==3.8
109
+ markdown-it-py==3.0.0
110
+ MarkupSafe==3.0.2
111
  matplotlib==3.10.3
112
+ matplotlib-inline==0.1.7
113
+ mdurl==0.1.2
114
+ mistune==3.1.3
115
+ mpmath==1.3.0
116
+ msrest==0.7.1
117
+ multidict==6.4.4
118
+ multiprocess==0.70.16
119
+ murmurhash==1.0.13
120
+ nbclient==0.10.2
121
+ nbconvert==7.16.6
122
+ nbdev==2.4.2
123
+ nbformat==5.10.4
124
+ nest-asyncio==1.6.0
125
+ networkx==3.4.2
126
+ notebook==7.4.3
127
+ notebook_shim==0.2.4
128
+ numpy==2.2.6
129
+ nvidia-cublas-cu12==12.6.4.1
130
+ nvidia-cuda-cupti-cu12==12.6.80
131
+ nvidia-cuda-nvrtc-cu12==12.6.77
132
+ nvidia-cuda-runtime-cu12==12.6.77
133
+ nvidia-cudnn-cu12==9.5.1.17
134
+ nvidia-cufft-cu12==11.3.0.4
135
+ nvidia-cufile-cu12==1.11.1.6
136
+ nvidia-curand-cu12==10.3.7.77
137
+ nvidia-cusolver-cu12==11.7.1.2
138
+ nvidia-cusparse-cu12==12.5.4.2
139
+ nvidia-cusparselt-cu12==0.6.3
140
+ nvidia-ml-py==12.575.51
141
+ nvidia-nccl-cu12==2.26.2
142
+ nvidia-nvjitlink-cu12==12.6.85
143
+ nvidia-nvtx-cu12==12.6.77
144
+ oauthlib==3.2.2
145
+ opencv-python==4.11.0.86
146
+ opencv-python-headless==4.11.0.86
147
+ orjson==3.10.18
148
+ overrides==7.7.0
149
+ packaging==25.0
150
+ pandas==2.2.3
151
+ pandocfilters==1.5.1
152
+ parso==0.8.4
153
+ pexpect==4.9.0
154
+ pickleshare==0.7.5
155
+ pillow==11.2.1
156
+ pipreqs==0.5.0
157
+ platformdirs==4.3.8
158
+ plum-dispatch==2.5.7
159
+ preshed==3.0.10
160
+ prometheus_client==0.22.0
161
+ prompt_toolkit==3.0.51
162
+ propcache==0.3.1
163
+ protobuf==6.31.1
164
+ psutil==7.0.0
165
+ ptyprocess==0.7.0
166
+ pure_eval==0.2.3
167
+ pyarrow==20.0.0
168
+ pycparser==2.22
169
+ pydantic==2.11.5
170
+ pydantic_core==2.33.2
171
+ pydub==0.25.1
172
+ Pygments==2.19.1
173
+ pyparsing==3.2.3
174
+ PySocks==1.7.1
175
+ python-dateutil==2.9.0.post0
176
+ python-json-logger==3.3.0
177
+ python-multipart==0.0.20
178
+ python-slugify==8.0.4
179
+ pytz==2025.2
180
+ PyYAML==6.0.2
181
+ pyzmq==26.4.0
182
+ referencing==0.36.2
183
+ regex==2024.11.6
184
+ requests==2.32.3
185
+ requests-oauthlib==2.0.0
186
+ rfc3339-validator==0.1.4
187
+ rfc3986-validator==0.1.1
188
+ rich==14.0.0
189
+ rpds-py==0.25.1
190
+ ruff==0.11.13
191
+ safehttpx==0.1.6
192
+ safetensors==0.5.3
193
+ scikit-learn==1.6.1
194
+ scipy==1.15.3
195
+ semantic-version==2.10.0
196
+ Send2Trash==1.8.3
197
+ sentencepiece==0.2.0
198
+ setuptools==80.9.0
199
+ shellingham==1.5.4
200
+ simsimd==6.4.9
201
+ six==1.17.0
202
+ smart-open==7.1.0
203
+ sniffio==1.3.1
204
+ soupsieve==2.7
205
+ spacy==3.8.7
206
+ spacy-legacy==3.0.12
207
+ spacy-loggers==1.0.5
208
+ srsly==2.5.1
209
+ stack-data==0.6.3
210
+ starlette==0.46.2
211
+ stringzilla==3.12.5
212
+ sympy==1.14.0
213
+ tensorboard==2.19.0
214
+ tensorboard-data-server==0.7.2
215
+ tensordict==0.8.3
216
+ terminado==0.18.1
217
+ text-unidecode==1.3
218
+ thinc==8.3.6
219
+ threadpoolctl==3.6.0
220
+ tinycss2==1.4.0
221
+ tokenizers==0.21.1
222
+ tomlkit==0.13.3
223
  torch==2.7.0
224
  torchvision==0.22.0
225
+ tornado==6.5.1
226
+ tqdm==4.67.1
227
+ traitlets==5.14.3
228
+ transformers==4.52.3
229
+ triton==3.3.0
230
+ typer==0.16.0
231
+ types-python-dateutil==2.9.0.20250516
232
+ typing-inspection==0.4.1
233
+ typing_extensions==4.13.2
234
+ tzdata==2025.2
235
+ uri-template==1.3.0
236
+ urllib3==2.4.0
237
+ uvicorn==0.34.3
238
+ wasabi==1.1.3
239
+ watchdog==6.0.0
240
+ wcwidth==0.2.13
241
+ weasel==0.4.1
242
+ webcolors==24.11.1
243
+ webencodings==0.5.1
244
+ websocket-client==1.8.0
245
+ websockets==15.0.1
246
+ Werkzeug==3.1.3
247
+ wheel==0.45.1
248
+ widgetsnbextension==3.6.10
249
+ wrapt==1.17.2
250
+ xxhash==3.5.0
251
+ yarg==0.1.9
252
+ yarl==1.20.0
253
+ zipp==3.22.0