added minipilestyle with only txt
Browse files- minipile_style_only_txt/minipile_style_only_txt/checkpoints/epoch_5.pt +3 -0
- minipile_style_only_txt/minipile_style_only_txt/checkpoints/epoch_latest.pt +3 -0
- minipile_style_only_txt/minipile_style_only_txt/eval_results.jsonl +40 -0
- minipile_style_only_txt/minipile_style_only_txt/info.pkl +3 -0
- minipile_style_only_txt/minipile_style_only_txt/out.log +497 -0
- minipile_style_only_txt/minipile_style_only_txt/params.txt +91 -0
minipile_style_only_txt/minipile_style_only_txt/checkpoints/epoch_5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3d6c3b3f3e749e27bb29ab1937602182e361e10737a29cf42faf95e9651a0e6
|
| 3 |
+
size 1815701601
|
minipile_style_only_txt/minipile_style_only_txt/checkpoints/epoch_latest.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61ba01a18248f15298e6fa5de3a3318832c089cf984ada098423350be01ed320
|
| 3 |
+
size 1815639289
|
minipile_style_only_txt/minipile_style_only_txt/eval_results.jsonl
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"key": "vtab/caltech101", "dataset": "Caltech-101", "metrics": {"acc1": 0.060969597370583405, "acc5": 0.1763352506162695, "mean_per_class_recall": 0.06358453403490753, "main_metric": 0.06358453403490753}}
|
| 2 |
+
{"key": "cifar10", "dataset": "CIFAR-10", "metrics": {"acc1": 0.2596, "acc5": 0.697, "mean_per_class_recall": 0.25960000000000005, "main_metric": 0.2596}}
|
| 3 |
+
{"key": "vtab/cifar100", "dataset": "CIFAR-100", "metrics": {"acc1": 0.0681, "acc5": 0.1953, "mean_per_class_recall": 0.06810000000000001, "main_metric": 0.0681}}
|
| 4 |
+
{"key": "vtab/clevr_count_all", "dataset": "CLEVR Counts", "metrics": {"acc1": 0.10713333333333333, "acc5": 0.6021333333333333, "mean_per_class_recall": 0.10973271148228372, "main_metric": 0.10713333333333333}}
|
| 5 |
+
{"key": "vtab/clevr_closest_object_distance", "dataset": "CLEVR Distance", "metrics": {"acc1": 0.24093333333333333, "acc5": 0.9186666666666666, "mean_per_class_recall": 0.1973357962013166, "main_metric": 0.24093333333333333}}
|
| 6 |
+
{"key": "country211", "dataset": "Country211", "metrics": {"acc1": 0.005165876777251185, "acc5": 0.025213270142180094, "mean_per_class_recall": 0.005165876777251185, "main_metric": 0.005165876777251185}}
|
| 7 |
+
{"key": "vtab/dtd", "dataset": "Describable Textures", "metrics": {"acc1": 0.019148936170212766, "acc5": 0.11010638297872341, "mean_per_class_recall": 0.019148936170212766, "main_metric": 0.019148936170212766}}
|
| 8 |
+
{"key": "vtab/eurosat", "dataset": "EuroSAT", "metrics": {"acc1": 0.08833333333333333, "acc5": 0.5314814814814814, "mean_per_class_recall": 0.08526997882899744, "main_metric": 0.08833333333333333}}
|
| 9 |
+
{"key": "fgvc_aircraft", "dataset": "FGVC Aircraft", "metrics": {"acc1": 0.0165016501650165, "acc5": 0.05700570057005701, "mean_per_class_recall": 0.016301247771836006, "main_metric": 0.016301247771836006}}
|
| 10 |
+
{"key": "food101", "dataset": "Food-101", "metrics": {"acc1": 0.029425742574257424, "acc5": 0.11485148514851486, "mean_per_class_recall": 0.029425742574257424, "main_metric": 0.029425742574257424}}
|
| 11 |
+
{"key": "gtsrb", "dataset": "GTSRB", "metrics": {"acc1": 0.028345209817893905, "acc5": 0.15961995249406175, "mean_per_class_recall": 0.029408520691382174, "main_metric": 0.028345209817893905}}
|
| 12 |
+
{"key": "imagenet1k", "dataset": "ImageNet 1k", "metrics": {"acc1": 0.00984, "acc5": 0.03602, "mean_per_class_recall": 0.00984, "main_metric": 0.00984}}
|
| 13 |
+
{"key": "imagenet_sketch", "dataset": "ImageNet Sketch", "metrics": {"acc1": 0.0042838334414117, "acc5": 0.013892982766413174, "mean_per_class_recall": 0.004283529411764706, "main_metric": 0.0042838334414117}}
|
| 14 |
+
{"key": "imagenetv2", "dataset": "ImageNet v2", "metrics": {"acc1": 0.0099, "acc5": 0.0335, "mean_per_class_recall": 0.0099, "main_metric": 0.0099}}
|
| 15 |
+
{"key": "imagenet-a", "dataset": "ImageNet-A", "metrics": {"acc1": 0.011066666666666667, "acc5": 0.0452, "mean_per_class_recall": 0.011715565164466223, "main_metric": 0.011066666666666667}}
|
| 16 |
+
{"key": "imagenet-o", "dataset": "ImageNet-O", "metrics": {"acc1": 0.0375, "acc5": 0.112, "mean_per_class_recall": 0.03806442102359285, "main_metric": 0.0375}}
|
| 17 |
+
{"key": "imagenet-r", "dataset": "ImageNet-R", "metrics": {"acc1": 0.0245, "acc5": 0.07496666666666667, "mean_per_class_recall": 0.022003680525680896, "main_metric": 0.0245}}
|
| 18 |
+
{"key": "vtab/kitti_closest_vehicle_distance", "dataset": "KITTI Vehicle Distance", "metrics": {"acc1": 0.3319268635724332, "acc5": null, "mean_per_class_recall": 0.31249769301423247, "main_metric": 0.3319268635724332}}
|
| 19 |
+
{"key": "mnist", "dataset": "MNIST", "metrics": {"acc1": 0.0712, "acc5": 0.485, "mean_per_class_recall": 0.07198071658381136, "main_metric": 0.0712}}
|
| 20 |
+
{"key": "objectnet", "dataset": "ObjectNet", "metrics": {"acc1": 0.018251319048131796, "acc5": 0.07155163131258749, "mean_per_class_recall": 0.01837717578014552, "main_metric": 0.018251319048131796}}
|
| 21 |
+
{"key": "vtab/flowers", "dataset": "Oxford Flowers-102", "metrics": {"acc1": 0.03220035778175313, "acc5": 0.10717189786957229, "mean_per_class_recall": 0.027269844258523586, "main_metric": 0.027269844258523586}}
|
| 22 |
+
{"key": "vtab/pets", "dataset": "Oxford-IIIT Pet", "metrics": {"acc1": 0.03134369037884982, "acc5": 0.1550831289179613, "mean_per_class_recall": 0.03130357082765615, "main_metric": 0.03130357082765615}}
|
| 23 |
+
{"key": "voc2007", "dataset": "Pascal VOC 2007", "metrics": {"acc1": 0.10289797008547008, "acc5": 0.3626469017094017, "mean_per_class_recall": 0.09278654075692244, "main_metric": 0.10289797008547008}}
|
| 24 |
+
{"key": "vtab/pcam", "dataset": "PatchCamelyon", "metrics": {"acc1": 0.61065673828125, "acc5": null, "mean_per_class_recall": 0.6107434255124176, "main_metric": 0.61065673828125}}
|
| 25 |
+
{"key": "renderedsst2", "dataset": "Rendered SST2", "metrics": {"acc1": 0.5035694673256452, "acc5": null, "mean_per_class_recall": 0.5042683544670257, "main_metric": 0.5035694673256452}}
|
| 26 |
+
{"key": "vtab/resisc45", "dataset": "RESISC45", "metrics": {"acc1": 0.0419047619047619, "acc5": 0.1580952380952381, "mean_per_class_recall": 0.04294162113310757, "main_metric": 0.0419047619047619}}
|
| 27 |
+
{"key": "cars", "dataset": "Stanford Cars", "metrics": {"acc1": 0.005845044148737719, "acc5": 0.02536997885835095, "mean_per_class_recall": 0.005887529239602567, "main_metric": 0.005845044148737719}}
|
| 28 |
+
{"key": "stl10", "dataset": "STL-10", "metrics": {"acc1": 0.224625, "acc5": 0.695875, "mean_per_class_recall": 0.224625, "main_metric": 0.224625}}
|
| 29 |
+
{"key": "sun397", "dataset": "SUN397", "metrics": {"acc1": 0.016780991963513986, "acc5": 0.06175405042573147, "mean_per_class_recall": 0.013267102979542198, "main_metric": 0.016780991963513986}}
|
| 30 |
+
{"key": "vtab/svhn", "dataset": "SVHN", "metrics": {"acc1": 0.09565150583896742, "acc5": 0.5313460356484327, "mean_per_class_recall": 0.10381470530165046, "main_metric": 0.09565150583896742}}
|
| 31 |
+
{"key": "retrieval/flickr_1k_test_image_text_retrieval", "dataset": "Flickr", "metrics": {"image_retrieval_recall@1": 0.004800000227987766, "text_retrieval_recall@1": 0.003000000026077032, "image_retrieval_recall@5": 0.020800000056624413, "text_retrieval_recall@5": 0.019999999552965164, "image_retrieval_recall@10": 0.03660000115633011, "text_retrieval_recall@10": 0.03999999910593033, "mean_recall@1": 0.003900000127032399, "main_metric": 0.003900000127032399}}
|
| 32 |
+
{"key": "retrieval/mscoco_2014_5k_test_image_text_retrieval", "dataset": "MSCOCO", "metrics": {"image_retrieval_recall@1": 0.002678928431123495, "text_retrieval_recall@1": 0.0026000000070780516, "image_retrieval_recall@5": 0.009596161544322968, "text_retrieval_recall@5": 0.010599999688565731, "image_retrieval_recall@10": 0.0167532991617918, "text_retrieval_recall@10": 0.020999999716877937, "mean_recall@1": 0.0026394642191007733, "main_metric": 0.0026394642191007733}}
|
| 33 |
+
{"key": "misc/winogavil", "dataset": "WinoGAViL", "metrics": {"avg_jaccard_score": 0.29556820763668923, "jaccard_score_5": 0.35381313131313136, "jaccard_score_6": 0.301631869450444, "jaccard_score_10": 0.2166554884864744, "jaccard_score_12": 0.17479973297730309, "jaccard_score_5-6": 0.32705795496493173, "jaccard_score_10-12": 0.1956785993085759, "main_metric": 0.1956785993085759}}
|
| 34 |
+
{"key": "wilds/iwildcam", "dataset": "iWildCam", "metrics": {"acc1": 0.0016358580075249468, "acc5": 0.038279077376083756, "mean_per_class_recall": 0.005406343763232197, "acc_avg": 0.0014722722116857767, "recall-macro_all": 0.005406343763232197, "F1-macro_all": 0.0008962723711538706, "main_metric": 0.0008962723711538706}}
|
| 35 |
+
{"key": "wilds/camelyon17", "dataset": "Camelyon17", "metrics": {"acc1": 0.6780986197004256, "acc5": null, "mean_per_class_recall": 0.6780986197004256, "acc_avg": 0.6780986189842224, "acc_slide:0": NaN, "count_slide:0": 0.0, "acc_slide:1": NaN, "count_slide:1": 0.0, "acc_slide:2": NaN, "count_slide:2": 0.0, "acc_slide:3": NaN, "count_slide:3": 0.0, "acc_slide:4": NaN, "count_slide:4": 0.0, "acc_slide:5": NaN, "count_slide:5": 0.0, "acc_slide:6": NaN, "count_slide:6": 0.0, "acc_slide:7": NaN, "count_slide:7": 0.0, "acc_slide:8": NaN, "count_slide:8": 0.0, "acc_slide:9": NaN, "count_slide:9": 0.0, "acc_slide:10": NaN, "count_slide:10": 0.0, "acc_slide:11": NaN, "count_slide:11": 0.0, "acc_slide:12": NaN, "count_slide:12": 0.0, "acc_slide:13": NaN, "count_slide:13": 0.0, "acc_slide:14": NaN, "count_slide:14": 0.0, "acc_slide:15": NaN, "count_slide:15": 0.0, "acc_slide:16": NaN, "count_slide:16": 0.0, "acc_slide:17": NaN, "count_slide:17": 0.0, "acc_slide:18": NaN, "count_slide:18": 0.0, "acc_slide:19": NaN, "count_slide:19": 0.0, "acc_slide:20": 0.6548556685447693, "count_slide:20": 3810.0, "acc_slide:21": 0.4677855968475342, "count_slide:21": 3694.0, "acc_slide:22": 0.8450762629508972, "count_slide:22": 7210.0, "acc_slide:23": 0.5888804793357849, "count_slide:23": 5288.0, "acc_slide:24": 0.17302963137626648, "count_slide:24": 7727.0, "acc_slide:25": 0.5733733177185059, "count_slide:25": 4334.0, "acc_slide:26": 0.2673656642436981, "count_slide:26": 3815.0, "acc_slide:27": 0.4883669912815094, "count_slide:27": 4556.0, "acc_slide:28": 0.8691887855529785, "count_slide:28": 31878.0, "acc_slide:29": 0.7432114481925964, "count_slide:29": 12742.0, "acc_wg": 0.17302963137626648, "main_metric": 0.6780986197004256}}
|
| 36 |
+
{"key": "wilds/fmow", "dataset": "FMoW", "metrics": {"acc1": 0.019992762800796093, "acc5": 0.1010946263795911, "mean_per_class_recall": 0.020375964611665773, "acc_avg": 0.019992763176560402, "acc_year:0": NaN, "count_year:0": 0.0, "acc_year:1": NaN, "count_year:1": 0.0, "acc_year:2": NaN, "count_year:2": 0.0, "acc_year:3": NaN, "count_year:3": 0.0, "acc_year:4": NaN, "count_year:4": 0.0, "acc_year:5": NaN, "count_year:5": 0.0, "acc_year:6": NaN, "count_year:6": 0.0, "acc_year:7": NaN, "count_year:7": 0.0, "acc_year:8": NaN, "count_year:8": 0.0, "acc_year:9": NaN, "count_year:9": 0.0, "acc_year:10": NaN, "count_year:10": 0.0, "acc_year:11": NaN, "count_year:11": 0.0, "acc_year:12": NaN, "count_year:12": 0.0, "acc_year:13": NaN, "count_year:13": 0.0, "acc_year:14": 0.018046243116259575, "count_year:14": 15959.0, "acc_year:15": 0.025044722482562065, "count_year:15": 6149.0, "acc_worst_year": 0.018046243116259575, "acc_region:0": 0.0267983078956604, "count_region:0": 4963.0, "acc_region:1": 0.017582792788743973, "count_region:1": 5858.0, "acc_region:2": 0.027767065912485123, "count_region:2": 2593.0, "acc_region:3": 0.015079760923981667, "count_region:3": 8024.0, "acc_region:4": 0.019519519060850143, "count_region:4": 666.0, "acc_region:5": 0.0, "count_region:5": 4.0, "acc_worst_region": 0.0, "main_metric": 0.0}}
|
| 37 |
+
{"key": "fairness/dollar_street", "dataset": "Dollar Street", "metrics": {"acc1": 0.035112760491007707, "acc5": 0.1732800456751356, "mean_per_class_recall": 0.03746186107854394, "acc_top5_avg": 0.1732800453901291, "acc_top5_income_ds:0": 0.14836448431015015, "count_income_ds:0": 856.0, "acc_top5_income_ds:1": 0.15723982453346252, "count_income_ds:1": 884.0, "acc_top5_income_ds:2": 0.170921191573143, "count_income_ds:2": 901.0, "acc_top5_income_ds:3": 0.2169373482465744, "count_income_ds:3": 862.0, "acc_top5_wg": 0.14836448431015015, "main_metric": 0.14836448431015015}}
|
| 38 |
+
{"key": "fairness/geode", "dataset": "GeoDE", "metrics": {"acc1": 0.109304932735426, "acc5": 0.32903587443946186, "mean_per_class_recall": 0.10797395180613348, "acc_avg": 0.10930493474006653, "acc_region:0": 0.10939457267522812, "count_region:0": 2395.0, "acc_region:1": 0.09601990133523941, "count_region:1": 2010.0, "acc_region:2": 0.11618062108755112, "count_region:2": 2126.0, "acc_region:3": 0.11042629927396774, "count_region:3": 1947.0, "acc_region:4": 0.11155378818511963, "count_region:4": 1757.0, "acc_region:5": 0.11185086518526077, "count_region:5": 2253.0, "acc_wg": 0.09601990133523941, "main_metric": 0.09601990133523941}}
|
| 39 |
+
{"key": "fairness/fairface", "dataset": "FairFace", "metrics": {"acc_race_avg": 0.7839145660400391, "acc_race_race_binary:0": 0.06858513504266739, "count_race_binary:0": 2085.0, "acc_race_race_binary:1": 0.9520803093910217, "count_race_binary:1": 8869.0, "acc_race_wg": 0.06858513504266739, "acc_gender_avg": 0.5371553897857666, "acc_gender_race_binary:0": 0.5151079297065735, "acc_gender_race_binary:1": 0.542338490486145, "acc_gender_wg": 0.5151079297065735, "acc_age_avg": 0.08261822164058685, "acc_age_race_binary:0": 0.08153477311134338, "acc_age_race_binary:1": 0.08287292718887329, "acc_age_wg": 0.08153477311134338, "acc_gender_x_avg": 0.5371553897857666, "acc_gender_x_race:0_gender:0": 0.703379213809967, "count_race:0_gender:0": 799.0, "acc_gender_x_race:0_gender:1": 0.3355350196361542, "count_race:0_gender:1": 757.0, "acc_gender_x_race:1_gender:0": 0.3787878751754761, "count_race:1_gender:0": 1122.0, "acc_gender_x_race:1_gender:1": 0.67393559217453, "count_race:1_gender:1": 963.0, "acc_gender_x_race:2_gender:0": 0.5949535369873047, "count_race:2_gender:0": 753.0, "acc_gender_x_race:2_gender:1": 0.4809960722923279, "count_race:2_gender:1": 763.0, "acc_gender_x_race:3_gender:0": 0.4653215706348419, "count_race:3_gender:0": 793.0, "acc_gender_x_race:3_gender:1": 0.6228915452957153, "count_race:3_gender:1": 830.0, "acc_gender_x_race:4_gender:0": 0.5153751373291016, "count_race:4_gender:0": 813.0, "acc_gender_x_race:4_gender:1": 0.6464646458625793, "count_race:4_gender:1": 396.0, "acc_gender_x_race:5_gender:0": 0.5428571701049805, "count_race:5_gender:0": 735.0, "acc_gender_x_race:5_gender:1": 0.591176450252533, "count_race:5_gender:1": 680.0, "acc_gender_x_race:6_gender:0": 0.4144144058227539, "count_race:6_gender:0": 777.0, "acc_gender_x_race:6_gender:1": 0.6403622031211853, "count_race:6_gender:1": 773.0, "acc_gender_x_wg": 0.3355350196361542, "toxicity_crime_avg": 0.07485850155353546, "toxicity_crime_race:0": 0.08033418655395508, "count_race:0": 1556.0, "toxicity_crime_race:1": 0.07194244861602783, "count_race:1": 2085.0, "toxicity_crime_race:2": 0.06926121562719345, "count_race:2": 1516.0, "toxicity_crime_race:3": 0.06161429360508919, "count_race:3": 1623.0, "toxicity_crime_race:4": 0.09925558418035507, "count_race:4": 1209.0, "toxicity_crime_race:5": 0.07844523340463638, "count_race:5": 1415.0, "toxicity_crime_race:6": 0.07032258063554764, "count_race:6": 1550.0, "toxicity_crime_wg": 0.06161429360508919, "toxicity_nonhuman_avg": 0.15373378992080688, "toxicity_nonhuman_race:0": 0.22943444550037384, "toxicity_nonhuman_race:1": 0.1285371631383896, "toxicity_nonhuman_race:2": 0.20910289883613586, "toxicity_nonhuman_race:3": 0.11891558766365051, "toxicity_nonhuman_race:4": 0.1191067025065422, "toxicity_nonhuman_race:5": 0.15123674273490906, "toxicity_nonhuman_race:6": 0.12322580814361572, "toxicity_nonhuman_wg": 0.11891558766365051, "main_metric": null}}
|
| 40 |
+
{"key": "fairness/utkface", "dataset": "UTKFace", "metrics": {"acc_race_avg": 0.5679028034210205, "acc_race_race_binary:0": 0.09309249371290207, "count_race_binary:0": 10076.0, "acc_race_race_binary:1": 0.9189843535423279, "count_race_binary:1": 13627.0, "acc_race_wg": 0.09309249371290207, "acc_gender_avg": 0.527275025844574, "acc_gender_race_binary:0": 0.5027788877487183, "acc_gender_race_binary:1": 0.5453878045082092, "acc_gender_wg": 0.5027788877487183, "acc_age_avg": 0.12496308237314224, "acc_age_race_binary:0": 0.11641524732112885, "acc_age_race_binary:1": 0.1312834769487381, "acc_age_wg": 0.11641524732112885, "acc_gender_x_avg": 0.527275025844574, "acc_gender_x_race:0_gender:0": 0.4370146691799164, "count_race:0_gender:0": 2318.0, "acc_gender_x_race:0_gender:1": 0.6585144996643066, "count_race:0_gender:1": 2208.0, "acc_gender_x_race:1_gender:0": 0.14590942859649658, "count_race:1_gender:0": 5476.0, "acc_gender_x_race:1_gender:1": 0.9276086688041687, "count_race:1_gender:1": 4600.0, "acc_gender_x_race:2_gender:0": 0.23927465081214905, "count_race:2_gender:0": 2261.0, "acc_gender_x_race:2_gender:1": 0.8716452717781067, "count_race:2_gender:1": 1714.0, "acc_gender_x_race:3_gender:0": 0.14095237851142883, "count_race:3_gender:0": 1575.0, "acc_gender_x_race:3_gender:1": 0.9386767148971558, "count_race:3_gender:1": 1859.0, "acc_gender_x_race:4_gender:0": 0.14868420362472534, "count_race:4_gender:0": 760.0, "acc_gender_x_race:4_gender:1": 0.9120171666145325, "count_race:4_gender:1": 932.0, "acc_gender_x_wg": 0.14095237851142883, "toxicity_crime_avg": 0.10800320655107498, "toxicity_crime_race:0": 0.13809102773666382, "count_race:0": 4526.0, "toxicity_crime_race:1": 0.10718539357185364, "count_race:1": 10076.0, "toxicity_crime_race:2": 0.09836477786302567, "count_race:2": 3975.0, "toxicity_crime_race:3": 0.09405940771102905, "count_race:3": 3434.0, "toxicity_crime_race:4": 0.0833333358168602, "count_race:4": 1692.0, "toxicity_crime_wg": 0.0833333358168602, "toxicity_nonhuman_avg": 0.13576340675354004, "toxicity_nonhuman_race:0": 0.19399027526378632, "toxicity_nonhuman_race:1": 0.1267368048429489, "toxicity_nonhuman_race:2": 0.14490565657615662, "toxicity_nonhuman_race:3": 0.09376820176839828, "toxicity_nonhuman_race:4": 0.09751772880554199, "toxicity_nonhuman_wg": 0.09376820176839828, "main_metric": null}}
|
minipile_style_only_txt/minipile_style_only_txt/info.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b381bfc2f5634b292fd757611bfcc83394718d7da00c2876ab5a47ea913a511e
|
| 3 |
+
size 329
|
minipile_style_only_txt/minipile_style_only_txt/out.log
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2024-09-26,18:02:12 | INFO | No latest resume checkpoint found in /home/minipile/minipile_style_only_txt/checkpoints.
|
| 2 |
+
2024-09-26,18:02:13 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 2.
|
| 3 |
+
2024-09-26,18:02:13 | INFO | Loaded ViT-B-32 model config.
|
| 4 |
+
2024-09-26,18:02:15 | INFO | Model:
|
| 5 |
+
2024-09-26,18:02:15 | INFO | CLIP(
|
| 6 |
+
(visual): VisionTransformer(
|
| 7 |
+
(patchnorm_pre_ln): Identity()
|
| 8 |
+
(conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
|
| 9 |
+
(patch_dropout): Identity()
|
| 10 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 11 |
+
(transformer): Transformer(
|
| 12 |
+
(resblocks): ModuleList(
|
| 13 |
+
(0): ResidualAttentionBlock(
|
| 14 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 15 |
+
(attn): MultiheadAttention(
|
| 16 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 17 |
+
)
|
| 18 |
+
(ls_1): Identity()
|
| 19 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 20 |
+
(mlp): Sequential(
|
| 21 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 22 |
+
(gelu): GELU(approximate='none')
|
| 23 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 24 |
+
)
|
| 25 |
+
(ls_2): Identity()
|
| 26 |
+
)
|
| 27 |
+
(1): ResidualAttentionBlock(
|
| 28 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 29 |
+
(attn): MultiheadAttention(
|
| 30 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 31 |
+
)
|
| 32 |
+
(ls_1): Identity()
|
| 33 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 34 |
+
(mlp): Sequential(
|
| 35 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 36 |
+
(gelu): GELU(approximate='none')
|
| 37 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 38 |
+
)
|
| 39 |
+
(ls_2): Identity()
|
| 40 |
+
)
|
| 41 |
+
(2): ResidualAttentionBlock(
|
| 42 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 43 |
+
(attn): MultiheadAttention(
|
| 44 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 45 |
+
)
|
| 46 |
+
(ls_1): Identity()
|
| 47 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 48 |
+
(mlp): Sequential(
|
| 49 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 50 |
+
(gelu): GELU(approximate='none')
|
| 51 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 52 |
+
)
|
| 53 |
+
(ls_2): Identity()
|
| 54 |
+
)
|
| 55 |
+
(3): ResidualAttentionBlock(
|
| 56 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 57 |
+
(attn): MultiheadAttention(
|
| 58 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 59 |
+
)
|
| 60 |
+
(ls_1): Identity()
|
| 61 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 62 |
+
(mlp): Sequential(
|
| 63 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 64 |
+
(gelu): GELU(approximate='none')
|
| 65 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 66 |
+
)
|
| 67 |
+
(ls_2): Identity()
|
| 68 |
+
)
|
| 69 |
+
(4): ResidualAttentionBlock(
|
| 70 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 71 |
+
(attn): MultiheadAttention(
|
| 72 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 73 |
+
)
|
| 74 |
+
(ls_1): Identity()
|
| 75 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 76 |
+
(mlp): Sequential(
|
| 77 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 78 |
+
(gelu): GELU(approximate='none')
|
| 79 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 80 |
+
)
|
| 81 |
+
(ls_2): Identity()
|
| 82 |
+
)
|
| 83 |
+
(5): ResidualAttentionBlock(
|
| 84 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 85 |
+
(attn): MultiheadAttention(
|
| 86 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 87 |
+
)
|
| 88 |
+
(ls_1): Identity()
|
| 89 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 90 |
+
(mlp): Sequential(
|
| 91 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 92 |
+
(gelu): GELU(approximate='none')
|
| 93 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 94 |
+
)
|
| 95 |
+
(ls_2): Identity()
|
| 96 |
+
)
|
| 97 |
+
(6): ResidualAttentionBlock(
|
| 98 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 99 |
+
(attn): MultiheadAttention(
|
| 100 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 101 |
+
)
|
| 102 |
+
(ls_1): Identity()
|
| 103 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 104 |
+
(mlp): Sequential(
|
| 105 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 106 |
+
(gelu): GELU(approximate='none')
|
| 107 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 108 |
+
)
|
| 109 |
+
(ls_2): Identity()
|
| 110 |
+
)
|
| 111 |
+
(7): ResidualAttentionBlock(
|
| 112 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 113 |
+
(attn): MultiheadAttention(
|
| 114 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 115 |
+
)
|
| 116 |
+
(ls_1): Identity()
|
| 117 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 118 |
+
(mlp): Sequential(
|
| 119 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 120 |
+
(gelu): GELU(approximate='none')
|
| 121 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 122 |
+
)
|
| 123 |
+
(ls_2): Identity()
|
| 124 |
+
)
|
| 125 |
+
(8): ResidualAttentionBlock(
|
| 126 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 127 |
+
(attn): MultiheadAttention(
|
| 128 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 129 |
+
)
|
| 130 |
+
(ls_1): Identity()
|
| 131 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 132 |
+
(mlp): Sequential(
|
| 133 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 134 |
+
(gelu): GELU(approximate='none')
|
| 135 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 136 |
+
)
|
| 137 |
+
(ls_2): Identity()
|
| 138 |
+
)
|
| 139 |
+
(9): ResidualAttentionBlock(
|
| 140 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 141 |
+
(attn): MultiheadAttention(
|
| 142 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 143 |
+
)
|
| 144 |
+
(ls_1): Identity()
|
| 145 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 146 |
+
(mlp): Sequential(
|
| 147 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 148 |
+
(gelu): GELU(approximate='none')
|
| 149 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 150 |
+
)
|
| 151 |
+
(ls_2): Identity()
|
| 152 |
+
)
|
| 153 |
+
(10): ResidualAttentionBlock(
|
| 154 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 155 |
+
(attn): MultiheadAttention(
|
| 156 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 157 |
+
)
|
| 158 |
+
(ls_1): Identity()
|
| 159 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 160 |
+
(mlp): Sequential(
|
| 161 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 162 |
+
(gelu): GELU(approximate='none')
|
| 163 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 164 |
+
)
|
| 165 |
+
(ls_2): Identity()
|
| 166 |
+
)
|
| 167 |
+
(11): ResidualAttentionBlock(
|
| 168 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 169 |
+
(attn): MultiheadAttention(
|
| 170 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 171 |
+
)
|
| 172 |
+
(ls_1): Identity()
|
| 173 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 174 |
+
(mlp): Sequential(
|
| 175 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 176 |
+
(gelu): GELU(approximate='none')
|
| 177 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 178 |
+
)
|
| 179 |
+
(ls_2): Identity()
|
| 180 |
+
)
|
| 181 |
+
)
|
| 182 |
+
)
|
| 183 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 184 |
+
)
|
| 185 |
+
(transformer): Transformer(
|
| 186 |
+
(resblocks): ModuleList(
|
| 187 |
+
(0): ResidualAttentionBlock(
|
| 188 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 189 |
+
(attn): MultiheadAttention(
|
| 190 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 191 |
+
)
|
| 192 |
+
(ls_1): Identity()
|
| 193 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 194 |
+
(mlp): Sequential(
|
| 195 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 196 |
+
(gelu): GELU(approximate='none')
|
| 197 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 198 |
+
)
|
| 199 |
+
(ls_2): Identity()
|
| 200 |
+
)
|
| 201 |
+
(1): ResidualAttentionBlock(
|
| 202 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 203 |
+
(attn): MultiheadAttention(
|
| 204 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 205 |
+
)
|
| 206 |
+
(ls_1): Identity()
|
| 207 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 208 |
+
(mlp): Sequential(
|
| 209 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 210 |
+
(gelu): GELU(approximate='none')
|
| 211 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 212 |
+
)
|
| 213 |
+
(ls_2): Identity()
|
| 214 |
+
)
|
| 215 |
+
(2): ResidualAttentionBlock(
|
| 216 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 217 |
+
(attn): MultiheadAttention(
|
| 218 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 219 |
+
)
|
| 220 |
+
(ls_1): Identity()
|
| 221 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 222 |
+
(mlp): Sequential(
|
| 223 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 224 |
+
(gelu): GELU(approximate='none')
|
| 225 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 226 |
+
)
|
| 227 |
+
(ls_2): Identity()
|
| 228 |
+
)
|
| 229 |
+
(3): ResidualAttentionBlock(
|
| 230 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 231 |
+
(attn): MultiheadAttention(
|
| 232 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 233 |
+
)
|
| 234 |
+
(ls_1): Identity()
|
| 235 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 236 |
+
(mlp): Sequential(
|
| 237 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 238 |
+
(gelu): GELU(approximate='none')
|
| 239 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 240 |
+
)
|
| 241 |
+
(ls_2): Identity()
|
| 242 |
+
)
|
| 243 |
+
(4): ResidualAttentionBlock(
|
| 244 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 245 |
+
(attn): MultiheadAttention(
|
| 246 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 247 |
+
)
|
| 248 |
+
(ls_1): Identity()
|
| 249 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 250 |
+
(mlp): Sequential(
|
| 251 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 252 |
+
(gelu): GELU(approximate='none')
|
| 253 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 254 |
+
)
|
| 255 |
+
(ls_2): Identity()
|
| 256 |
+
)
|
| 257 |
+
(5): ResidualAttentionBlock(
|
| 258 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 259 |
+
(attn): MultiheadAttention(
|
| 260 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 261 |
+
)
|
| 262 |
+
(ls_1): Identity()
|
| 263 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 264 |
+
(mlp): Sequential(
|
| 265 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 266 |
+
(gelu): GELU(approximate='none')
|
| 267 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 268 |
+
)
|
| 269 |
+
(ls_2): Identity()
|
| 270 |
+
)
|
| 271 |
+
(6): ResidualAttentionBlock(
|
| 272 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 273 |
+
(attn): MultiheadAttention(
|
| 274 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 275 |
+
)
|
| 276 |
+
(ls_1): Identity()
|
| 277 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 278 |
+
(mlp): Sequential(
|
| 279 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 280 |
+
(gelu): GELU(approximate='none')
|
| 281 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 282 |
+
)
|
| 283 |
+
(ls_2): Identity()
|
| 284 |
+
)
|
| 285 |
+
(7): ResidualAttentionBlock(
|
| 286 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 287 |
+
(attn): MultiheadAttention(
|
| 288 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 289 |
+
)
|
| 290 |
+
(ls_1): Identity()
|
| 291 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 292 |
+
(mlp): Sequential(
|
| 293 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 294 |
+
(gelu): GELU(approximate='none')
|
| 295 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 296 |
+
)
|
| 297 |
+
(ls_2): Identity()
|
| 298 |
+
)
|
| 299 |
+
(8): ResidualAttentionBlock(
|
| 300 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 301 |
+
(attn): MultiheadAttention(
|
| 302 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 303 |
+
)
|
| 304 |
+
(ls_1): Identity()
|
| 305 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 306 |
+
(mlp): Sequential(
|
| 307 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 308 |
+
(gelu): GELU(approximate='none')
|
| 309 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 310 |
+
)
|
| 311 |
+
(ls_2): Identity()
|
| 312 |
+
)
|
| 313 |
+
(9): ResidualAttentionBlock(
|
| 314 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 315 |
+
(attn): MultiheadAttention(
|
| 316 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 317 |
+
)
|
| 318 |
+
(ls_1): Identity()
|
| 319 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 320 |
+
(mlp): Sequential(
|
| 321 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 322 |
+
(gelu): GELU(approximate='none')
|
| 323 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 324 |
+
)
|
| 325 |
+
(ls_2): Identity()
|
| 326 |
+
)
|
| 327 |
+
(10): ResidualAttentionBlock(
|
| 328 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 329 |
+
(attn): MultiheadAttention(
|
| 330 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 331 |
+
)
|
| 332 |
+
(ls_1): Identity()
|
| 333 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 334 |
+
(mlp): Sequential(
|
| 335 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 336 |
+
(gelu): GELU(approximate='none')
|
| 337 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 338 |
+
)
|
| 339 |
+
(ls_2): Identity()
|
| 340 |
+
)
|
| 341 |
+
(11): ResidualAttentionBlock(
|
| 342 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 343 |
+
(attn): MultiheadAttention(
|
| 344 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 345 |
+
)
|
| 346 |
+
(ls_1): Identity()
|
| 347 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 348 |
+
(mlp): Sequential(
|
| 349 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 350 |
+
(gelu): GELU(approximate='none')
|
| 351 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 352 |
+
)
|
| 353 |
+
(ls_2): Identity()
|
| 354 |
+
)
|
| 355 |
+
)
|
| 356 |
+
)
|
| 357 |
+
(token_embedding): Embedding(49408, 512)
|
| 358 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 359 |
+
)
|
| 360 |
+
2024-09-26,18:02:15 | INFO | Params:
|
| 361 |
+
2024-09-26,18:02:15 | INFO | accum_freq: 1
|
| 362 |
+
2024-09-26,18:02:15 | INFO | aug_cfg: {}
|
| 363 |
+
2024-09-26,18:02:15 | INFO | batch_size: 2048
|
| 364 |
+
2024-09-26,18:02:15 | INFO | beta1: 0.9
|
| 365 |
+
2024-09-26,18:02:15 | INFO | beta2: 0.98
|
| 366 |
+
2024-09-26,18:02:15 | INFO | checkpoint_path: /home/minipile/minipile_style_only_txt/checkpoints
|
| 367 |
+
2024-09-26,18:02:15 | INFO | coca_caption_loss_weight: 2.0
|
| 368 |
+
2024-09-26,18:02:15 | INFO | coca_contrastive_loss_weight: 1.0
|
| 369 |
+
2024-09-26,18:02:15 | INFO | copy_codebase: False
|
| 370 |
+
2024-09-26,18:02:15 | INFO | csv_caption_key: title
|
| 371 |
+
2024-09-26,18:02:15 | INFO | csv_img_key: filepath
|
| 372 |
+
2024-09-26,18:02:15 | INFO | csv_separator:
|
| 373 |
+
2024-09-26,18:02:15 | INFO | dataset_resampled: True
|
| 374 |
+
2024-09-26,18:02:15 | INFO | dataset_type: webdataset
|
| 375 |
+
2024-09-26,18:02:15 | INFO | ddp_static_graph: True
|
| 376 |
+
2024-09-26,18:02:15 | INFO | debug: False
|
| 377 |
+
2024-09-26,18:02:15 | INFO | delete_previous_checkpoint: False
|
| 378 |
+
2024-09-26,18:02:15 | INFO | device: cuda:0
|
| 379 |
+
2024-09-26,18:02:15 | INFO | dist_backend: nccl
|
| 380 |
+
2024-09-26,18:02:15 | INFO | dist_url: env://
|
| 381 |
+
2024-09-26,18:02:15 | INFO | distill: False
|
| 382 |
+
2024-09-26,18:02:15 | INFO | distill_model: None
|
| 383 |
+
2024-09-26,18:02:15 | INFO | distill_pretrained: None
|
| 384 |
+
2024-09-26,18:02:15 | INFO | distributed: True
|
| 385 |
+
2024-09-26,18:02:15 | INFO | epochs: 5
|
| 386 |
+
2024-09-26,18:02:15 | INFO | epochs_cooldown: None
|
| 387 |
+
2024-09-26,18:02:15 | INFO | eps: 1e-06
|
| 388 |
+
2024-09-26,18:02:15 | INFO | force_custom_text: False
|
| 389 |
+
2024-09-26,18:02:15 | INFO | force_image_size: None
|
| 390 |
+
2024-09-26,18:02:15 | INFO | force_patch_dropout: None
|
| 391 |
+
2024-09-26,18:02:15 | INFO | force_quick_gelu: False
|
| 392 |
+
2024-09-26,18:02:15 | INFO | gather_with_grad: True
|
| 393 |
+
2024-09-26,18:02:15 | INFO | grad_checkpointing: True
|
| 394 |
+
2024-09-26,18:02:15 | INFO | grad_clip_norm: None
|
| 395 |
+
2024-09-26,18:02:15 | INFO | horovod: False
|
| 396 |
+
2024-09-26,18:02:15 | INFO | image_mean: None
|
| 397 |
+
2024-09-26,18:02:15 | INFO | image_std: None
|
| 398 |
+
2024-09-26,18:02:15 | INFO | imagenet_v2: None
|
| 399 |
+
2024-09-26,18:02:15 | INFO | imagenet_val: None
|
| 400 |
+
2024-09-26,18:02:15 | INFO | local_loss: True
|
| 401 |
+
2024-09-26,18:02:15 | INFO | local_rank: 0
|
| 402 |
+
2024-09-26,18:02:15 | INFO | lock_image: False
|
| 403 |
+
2024-09-26,18:02:15 | INFO | lock_image_freeze_bn_stats: False
|
| 404 |
+
2024-09-26,18:02:15 | INFO | lock_image_unlocked_groups: 0
|
| 405 |
+
2024-09-26,18:02:15 | INFO | lock_text: False
|
| 406 |
+
2024-09-26,18:02:15 | INFO | lock_text_freeze_layer_norm: False
|
| 407 |
+
2024-09-26,18:02:15 | INFO | lock_text_unlocked_layers: 0
|
| 408 |
+
2024-09-26,18:02:15 | INFO | log_every_n_steps: 100
|
| 409 |
+
2024-09-26,18:02:15 | INFO | log_level: 20
|
| 410 |
+
2024-09-26,18:02:15 | INFO | log_local: False
|
| 411 |
+
2024-09-26,18:02:15 | INFO | log_path: /home/minipile/minipile_style_only_txt/out.log
|
| 412 |
+
2024-09-26,18:02:15 | INFO | logs: /home/minipile
|
| 413 |
+
2024-09-26,18:02:15 | INFO | lr: 0.0005
|
| 414 |
+
2024-09-26,18:02:15 | INFO | lr_cooldown_end: 0.0
|
| 415 |
+
2024-09-26,18:02:15 | INFO | lr_cooldown_power: 1.0
|
| 416 |
+
2024-09-26,18:02:15 | INFO | lr_scheduler: cosine
|
| 417 |
+
2024-09-26,18:02:15 | INFO | model: ViT-B-32
|
| 418 |
+
2024-09-26,18:02:15 | INFO | name: minipile_style_only_txt
|
| 419 |
+
2024-09-26,18:02:15 | INFO | no_set_device_rank: False
|
| 420 |
+
2024-09-26,18:02:15 | INFO | precision: amp
|
| 421 |
+
2024-09-26,18:02:15 | INFO | pretrained:
|
| 422 |
+
2024-09-26,18:02:15 | INFO | pretrained_image: False
|
| 423 |
+
2024-09-26,18:02:15 | INFO | rank: 0
|
| 424 |
+
2024-09-26,18:02:15 | INFO | remote_sync: None
|
| 425 |
+
2024-09-26,18:02:15 | INFO | remote_sync_frequency: 300
|
| 426 |
+
2024-09-26,18:02:15 | INFO | remote_sync_protocol: s3
|
| 427 |
+
2024-09-26,18:02:15 | INFO | report_to: wandb
|
| 428 |
+
2024-09-26,18:02:15 | INFO | resume: None
|
| 429 |
+
2024-09-26,18:02:15 | INFO | save_frequency: 0
|
| 430 |
+
2024-09-26,18:02:15 | INFO | save_most_recent: True
|
| 431 |
+
2024-09-26,18:02:15 | INFO | seed: 0
|
| 432 |
+
2024-09-26,18:02:15 | INFO | skip_scheduler: False
|
| 433 |
+
2024-09-26,18:02:15 | INFO | tensorboard: False
|
| 434 |
+
2024-09-26,18:02:15 | INFO | tensorboard_path:
|
| 435 |
+
2024-09-26,18:02:15 | INFO | torchscript: False
|
| 436 |
+
2024-09-26,18:02:15 | INFO | trace: False
|
| 437 |
+
2024-09-26,18:02:15 | INFO | train_data: /home/minipile_style_txt_dataset/{00000000..00000095}.tar
|
| 438 |
+
2024-09-26,18:02:15 | INFO | train_data_upsampling_factors: None
|
| 439 |
+
2024-09-26,18:02:15 | INFO | train_num_samples: 2560000
|
| 440 |
+
2024-09-26,18:02:15 | INFO | use_bn_sync: False
|
| 441 |
+
2024-09-26,18:02:15 | INFO | val_data: None
|
| 442 |
+
2024-09-26,18:02:15 | INFO | val_frequency: 1
|
| 443 |
+
2024-09-26,18:02:15 | INFO | val_num_samples: None
|
| 444 |
+
2024-09-26,18:02:15 | INFO | wandb: True
|
| 445 |
+
2024-09-26,18:02:15 | INFO | wandb_notes:
|
| 446 |
+
2024-09-26,18:02:15 | INFO | wandb_project_name: clip_text_hq_clusters
|
| 447 |
+
2024-09-26,18:02:15 | INFO | warmup: 500
|
| 448 |
+
2024-09-26,18:02:15 | INFO | wd: 0.2
|
| 449 |
+
2024-09-26,18:02:15 | INFO | workers: 4
|
| 450 |
+
2024-09-26,18:02:15 | INFO | world_size: 2
|
| 451 |
+
2024-09-26,18:02:15 | INFO | zeroshot_frequency: 2
|
| 452 |
+
2024-09-26,18:02:48 | INFO | Start epoch 0
|
| 453 |
+
2024-09-26,18:03:06 | INFO | Train Epoch: 0 [ 4096/2572288 (0%)] Data (t): 12.975 Batch (t): 17.424, 235.078/s, 117.539/s/gpu LR: 0.000001 Logit Scale: 14.286 Contrastive_loss: 8.3718 (8.3718) Loss: 8.3718 (8.3718)
|
| 454 |
+
2024-09-26,18:03:08 | INFO | Reducer buckets have been rebuilt in this iteration.
|
| 455 |
+
2024-09-26,18:07:05 | INFO | Train Epoch: 0 [ 413696/2572288 (16%)] Data (t): 0.345 Batch (t): 2.392, 1705.76/s, 852.878/s/gpu LR: 0.000101 Logit Scale: 14.261 Contrastive_loss: 7.9921 (8.1820) Loss: 7.9921 (8.1820)
|
| 456 |
+
2024-09-26,18:11:04 | INFO | Train Epoch: 0 [ 823296/2572288 (32%)] Data (t): 0.396 Batch (t): 2.391, 1738.68/s, 869.338/s/gpu LR: 0.000201 Logit Scale: 14.239 Contrastive_loss: 7.8242 (8.0627) Loss: 7.8242 (8.0627)
|
| 457 |
+
2024-09-26,18:15:02 | INFO | Train Epoch: 0 [1232896/2572288 (48%)] Data (t): 0.384 Batch (t): 2.381, 1749.11/s, 874.554/s/gpu LR: 0.000301 Logit Scale: 14.221 Contrastive_loss: 7.5498 (7.9345) Loss: 7.5498 (7.9345)
|
| 458 |
+
2024-09-26,18:19:00 | INFO | Train Epoch: 0 [1642496/2572288 (64%)] Data (t): 0.387 Batch (t): 2.381, 1722.77/s, 861.386/s/gpu LR: 0.000401 Logit Scale: 14.223 Contrastive_loss: 7.5504 (7.8577) Loss: 7.5504 (7.8577)
|
| 459 |
+
2024-09-26,18:22:58 | INFO | Train Epoch: 0 [2052096/2572288 (80%)] Data (t): 0.380 Batch (t): 2.377, 1697.02/s, 848.511/s/gpu LR: 0.000500 Logit Scale: 14.236 Contrastive_loss: 6.8593 (7.6913) Loss: 6.8593 (7.6913)
|
| 460 |
+
2024-09-26,18:26:56 | INFO | Train Epoch: 0 [2461696/2572288 (96%)] Data (t): 0.379 Batch (t): 2.377, 1751.70/s, 875.852/s/gpu LR: 0.000498 Logit Scale: 14.290 Contrastive_loss: 6.9597 (7.5868) Loss: 6.9597 (7.5868)
|
| 461 |
+
2024-09-26,18:28:00 | INFO | Train Epoch: 0 [2572288/2572288 (100%)] Data (t): 0.376 Batch (t): 2.366, 1746.61/s, 873.303/s/gpu LR: 0.000497 Logit Scale: 14.311 Contrastive_loss: 6.0066 (7.3892) Loss: 6.0066 (7.3892)
|
| 462 |
+
2024-09-26,18:28:01 | INFO | Start epoch 1
|
| 463 |
+
2024-09-26,18:28:13 | INFO | Train Epoch: 1 [ 4096/2572288 (0%)] Data (t): 9.866 Batch (t): 11.854, 345.523/s, 172.761/s/gpu LR: 0.000497 Logit Scale: 14.313 Contrastive_loss: 5.9850 (5.9850) Loss: 5.9850 (5.9850)
|
| 464 |
+
2024-09-26,18:32:03 | INFO | Train Epoch: 1 [ 413696/2572288 (16%)] Data (t): 0.266 Batch (t): 2.303, 1729.00/s, 864.500/s/gpu LR: 0.000491 Logit Scale: 14.420 Contrastive_loss: 5.9218 (5.9534) Loss: 5.9218 (5.9534)
|
| 465 |
+
2024-09-26,18:35:52 | INFO | Train Epoch: 1 [ 823296/2572288 (32%)] Data (t): 0.250 Batch (t): 2.288, 1560.32/s, 780.161/s/gpu LR: 0.000481 Logit Scale: 14.683 Contrastive_loss: 6.3023 (6.0697) Loss: 6.3023 (6.0697)
|
| 466 |
+
2024-09-26,18:39:41 | INFO | Train Epoch: 1 [1232896/2572288 (48%)] Data (t): 0.265 Batch (t): 2.291, 1710.41/s, 855.206/s/gpu LR: 0.000468 Logit Scale: 14.960 Contrastive_loss: 6.3823 (6.1478) Loss: 6.3823 (6.1478)
|
| 467 |
+
2024-09-26,18:43:30 | INFO | Train Epoch: 1 [1642496/2572288 (64%)] Data (t): 0.296 Batch (t): 2.288, 1698.19/s, 849.095/s/gpu LR: 0.000452 Logit Scale: 15.290 Contrastive_loss: 4.8757 (5.8934) Loss: 4.8757 (5.8934)
|
| 468 |
+
2024-09-26,18:47:24 | INFO | Train Epoch: 1 [2052096/2572288 (80%)] Data (t): 0.316 Batch (t): 2.342, 1158.94/s, 579.468/s/gpu LR: 0.000433 Logit Scale: 15.667 Contrastive_loss: 6.9619 (6.0715) Loss: 6.9619 (6.0715)
|
| 469 |
+
2024-09-26,18:51:16 | INFO | Train Epoch: 1 [2461696/2572288 (96%)] Data (t): 0.307 Batch (t): 2.322, 1718.38/s, 859.190/s/gpu LR: 0.000412 Logit Scale: 16.040 Contrastive_loss: 4.5396 (5.8527) Loss: 4.5396 (5.8527)
|
| 470 |
+
2024-09-26,18:52:19 | INFO | Train Epoch: 1 [2572288/2572288 (100%)] Data (t): 0.296 Batch (t): 2.312, 1720.07/s, 860.036/s/gpu LR: 0.000406 Logit Scale: 16.184 Contrastive_loss: 3.2503 (5.5274) Loss: 3.2503 (5.5274)
|
| 471 |
+
2024-09-26,18:52:21 | INFO | Start epoch 2
|
| 472 |
+
2024-09-26,18:52:33 | INFO | Train Epoch: 2 [ 4096/2572288 (0%)] Data (t): 10.054 Batch (t): 12.040, 340.190/s, 170.095/s/gpu LR: 0.000405 Logit Scale: 16.188 Contrastive_loss: 2.4660 (2.4660) Loss: 2.4660 (2.4660)
|
| 473 |
+
2024-09-26,18:56:24 | INFO | Train Epoch: 2 [ 413696/2572288 (16%)] Data (t): 0.292 Batch (t): 2.309, 1694.00/s, 846.999/s/gpu LR: 0.000381 Logit Scale: 16.664 Contrastive_loss: 4.1020 (3.2840) Loss: 4.1020 (3.2840)
|
| 474 |
+
2024-09-26,19:00:23 | INFO | Train Epoch: 2 [ 823296/2572288 (32%)] Data (t): 0.363 Batch (t): 2.389, 1711.30/s, 855.648/s/gpu LR: 0.000355 Logit Scale: 17.155 Contrastive_loss: 3.3624 (3.3101) Loss: 3.3624 (3.3101)
|
| 475 |
+
2024-09-26,19:04:23 | INFO | Train Epoch: 2 [1232896/2572288 (48%)] Data (t): 0.391 Batch (t): 2.400, 1698.47/s, 849.234/s/gpu LR: 0.000327 Logit Scale: 17.636 Contrastive_loss: 4.0710 (3.5004) Loss: 4.0710 (3.5004)
|
| 476 |
+
2024-09-26,19:08:22 | INFO | Train Epoch: 2 [1642496/2572288 (64%)] Data (t): 0.393 Batch (t): 2.392, 1679.99/s, 839.997/s/gpu LR: 0.000298 Logit Scale: 18.081 Contrastive_loss: 2.3333 (3.2670) Loss: 2.3333 (3.2670)
|
| 477 |
+
2024-09-26,19:12:21 | INFO | Train Epoch: 2 [2052096/2572288 (80%)] Data (t): 0.394 Batch (t): 2.395, 1673.86/s, 836.930/s/gpu LR: 0.000269 Logit Scale: 18.483 Contrastive_loss: 2.8788 (3.2023) Loss: 2.8788 (3.2023)
|
| 478 |
+
2024-09-26,19:16:20 | INFO | Train Epoch: 2 [2461696/2572288 (96%)] Data (t): 0.389 Batch (t): 2.391, 1717.42/s, 858.711/s/gpu LR: 0.000239 Logit Scale: 18.900 Contrastive_loss: 1.9518 (3.0236) Loss: 1.9518 (3.0236)
|
| 479 |
+
2024-09-26,19:17:25 | INFO | Train Epoch: 2 [2572288/2572288 (100%)] Data (t): 0.389 Batch (t): 2.385, 1748.19/s, 874.094/s/gpu LR: 0.000231 Logit Scale: 19.020 Contrastive_loss: 1.1944 (2.7950) Loss: 1.1944 (2.7950)
|
| 480 |
+
2024-09-26,19:17:27 | INFO | Start epoch 3
|
| 481 |
+
2024-09-26,19:17:38 | INFO | Train Epoch: 3 [ 4096/2572288 (0%)] Data (t): 9.891 Batch (t): 11.878, 344.839/s, 172.420/s/gpu LR: 0.000231 Logit Scale: 19.025 Contrastive_loss: 0.80978 (0.80978) Loss: 0.80978 (0.80978)
|
| 482 |
+
2024-09-26,19:21:28 | INFO | Train Epoch: 3 [ 413696/2572288 (16%)] Data (t): 0.258 Batch (t): 2.298, 1712.05/s, 856.024/s/gpu LR: 0.000202 Logit Scale: 19.390 Contrastive_loss: 1.3713 (1.0906) Loss: 1.3713 (1.0906)
|
| 483 |
+
2024-09-26,19:25:26 | INFO | Train Epoch: 3 [ 823296/2572288 (32%)] Data (t): 0.329 Batch (t): 2.374, 1662.96/s, 831.481/s/gpu LR: 0.000173 Logit Scale: 19.745 Contrastive_loss: 1.4956 (1.2256) Loss: 1.4956 (1.2256)
|
| 484 |
+
2024-09-26,19:29:25 | INFO | Train Epoch: 3 [1232896/2572288 (48%)] Data (t): 0.404 Batch (t): 2.395, 1712.45/s, 856.223/s/gpu LR: 0.000145 Logit Scale: 20.036 Contrastive_loss: 1.7642 (1.3602) Loss: 1.7642 (1.3602)
|
| 485 |
+
2024-09-26,19:33:25 | INFO | Train Epoch: 3 [1642496/2572288 (64%)] Data (t): 0.402 Batch (t): 2.398, 1715.10/s, 857.552/s/gpu LR: 0.000119 Logit Scale: 20.299 Contrastive_loss: 1.3284 (1.3539) Loss: 1.3284 (1.3539)
|
| 486 |
+
2024-09-26,19:37:24 | INFO | Train Epoch: 3 [2052096/2572288 (80%)] Data (t): 0.398 Batch (t): 2.396, 1667.26/s, 833.632/s/gpu LR: 0.000095 Logit Scale: 20.512 Contrastive_loss: 0.89806 (1.2779) Loss: 0.89806 (1.2779)
|
| 487 |
+
2024-09-26,19:41:24 | INFO | Train Epoch: 3 [2461696/2572288 (96%)] Data (t): 0.396 Batch (t): 2.394, 1716.32/s, 858.161/s/gpu LR: 0.000072 Logit Scale: 20.677 Contrastive_loss: 0.88699 (1.2221) Loss: 0.88699 (1.2221)
|
| 488 |
+
2024-09-26,19:42:29 | INFO | Train Epoch: 3 [2572288/2572288 (100%)] Data (t): 0.396 Batch (t): 2.395, 1730.03/s, 865.015/s/gpu LR: 0.000067 Logit Scale: 20.716 Contrastive_loss: 0.69087 (1.1557) Loss: 0.69087 (1.1557)
|
| 489 |
+
2024-09-26,19:42:30 | INFO | Start epoch 4
|
| 490 |
+
2024-09-26,19:42:42 | INFO | Train Epoch: 4 [ 4096/2572288 (0%)] Data (t): 9.990 Batch (t): 11.974, 342.074/s, 171.037/s/gpu LR: 0.000067 Logit Scale: 20.717 Contrastive_loss: 0.48776 (0.48776) Loss: 0.48776 (0.48776)
|
| 491 |
+
2024-09-26,19:46:35 | INFO | Train Epoch: 4 [ 413696/2572288 (16%)] Data (t): 0.281 Batch (t): 2.323, 1711.70/s, 855.850/s/gpu LR: 0.000048 Logit Scale: 20.831 Contrastive_loss: 1.5258 (1.0068) Loss: 1.5258 (1.0068)
|
| 492 |
+
2024-09-26,19:50:33 | INFO | Train Epoch: 4 [ 823296/2572288 (32%)] Data (t): 0.377 Batch (t): 2.387, 1734.94/s, 867.468/s/gpu LR: 0.000032 Logit Scale: 20.912 Contrastive_loss: 0.59293 (0.86882) Loss: 0.59293 (0.86882)
|
| 493 |
+
2024-09-26,19:54:32 | INFO | Train Epoch: 4 [1232896/2572288 (48%)] Data (t): 0.395 Batch (t): 2.389, 1688.28/s, 844.139/s/gpu LR: 0.000019 Logit Scale: 20.964 Contrastive_loss: 0.53332 (0.78495) Loss: 0.53332 (0.78495)
|
| 494 |
+
2024-09-26,19:58:32 | INFO | Train Epoch: 4 [1642496/2572288 (64%)] Data (t): 0.399 Batch (t): 2.397, 1712.49/s, 856.243/s/gpu LR: 0.000009 Logit Scale: 20.992 Contrastive_loss: 0.42278 (0.71251) Loss: 0.42278 (0.71251)
|
| 495 |
+
2024-09-26,20:02:32 | INFO | Train Epoch: 4 [2052096/2572288 (80%)] Data (t): 0.397 Batch (t): 2.397, 1674.02/s, 837.012/s/gpu LR: 0.000003 Logit Scale: 21.004 Contrastive_loss: 0.39465 (0.65954) Loss: 0.39465 (0.65954)
|
| 496 |
+
2024-09-26,20:06:32 | INFO | Train Epoch: 4 [2461696/2572288 (96%)] Data (t): 0.400 Batch (t): 2.403, 1698.26/s, 849.129/s/gpu LR: 0.000000 Logit Scale: 21.007 Contrastive_loss: 0.50058 (0.63683) Loss: 0.50058 (0.63683)
|
| 497 |
+
2024-09-26,20:07:37 | INFO | Train Epoch: 4 [2572288/2572288 (100%)] Data (t): 0.404 Batch (t): 2.397, 1735.03/s, 867.514/s/gpu LR: 0.000000 Logit Scale: 21.007 Contrastive_loss: 0.49620 (0.61925) Loss: 0.49620 (0.61925)
|
minipile_style_only_txt/minipile_style_only_txt/params.txt
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accum_freq: 1
|
| 2 |
+
aug_cfg: {}
|
| 3 |
+
batch_size: 2048
|
| 4 |
+
beta1: 0.9
|
| 5 |
+
beta2: 0.98
|
| 6 |
+
checkpoint_path: /home/minipile/minipile_style_only_txt/checkpoints
|
| 7 |
+
coca_caption_loss_weight: 2.0
|
| 8 |
+
coca_contrastive_loss_weight: 1.0
|
| 9 |
+
copy_codebase: False
|
| 10 |
+
csv_caption_key: title
|
| 11 |
+
csv_img_key: filepath
|
| 12 |
+
csv_separator:
|
| 13 |
+
dataset_resampled: True
|
| 14 |
+
dataset_type: webdataset
|
| 15 |
+
ddp_static_graph: True
|
| 16 |
+
debug: False
|
| 17 |
+
delete_previous_checkpoint: False
|
| 18 |
+
device: cuda:0
|
| 19 |
+
dist_backend: nccl
|
| 20 |
+
dist_url: env://
|
| 21 |
+
distill: False
|
| 22 |
+
distill_model: None
|
| 23 |
+
distill_pretrained: None
|
| 24 |
+
distributed: True
|
| 25 |
+
epochs: 5
|
| 26 |
+
epochs_cooldown: None
|
| 27 |
+
eps: 1e-06
|
| 28 |
+
force_custom_text: False
|
| 29 |
+
force_image_size: None
|
| 30 |
+
force_patch_dropout: None
|
| 31 |
+
force_quick_gelu: False
|
| 32 |
+
gather_with_grad: True
|
| 33 |
+
grad_checkpointing: True
|
| 34 |
+
grad_clip_norm: None
|
| 35 |
+
horovod: False
|
| 36 |
+
image_mean: None
|
| 37 |
+
image_std: None
|
| 38 |
+
imagenet_v2: None
|
| 39 |
+
imagenet_val: None
|
| 40 |
+
local_loss: True
|
| 41 |
+
local_rank: 0
|
| 42 |
+
lock_image: False
|
| 43 |
+
lock_image_freeze_bn_stats: False
|
| 44 |
+
lock_image_unlocked_groups: 0
|
| 45 |
+
lock_text: False
|
| 46 |
+
lock_text_freeze_layer_norm: False
|
| 47 |
+
lock_text_unlocked_layers: 0
|
| 48 |
+
log_every_n_steps: 100
|
| 49 |
+
log_level: 20
|
| 50 |
+
log_local: False
|
| 51 |
+
log_path: /home/minipile/minipile_style_only_txt/out.log
|
| 52 |
+
logs: /home/minipile
|
| 53 |
+
lr: 0.0005
|
| 54 |
+
lr_cooldown_end: 0.0
|
| 55 |
+
lr_cooldown_power: 1.0
|
| 56 |
+
lr_scheduler: cosine
|
| 57 |
+
model: ViT-B-32
|
| 58 |
+
name: minipile_style_only_txt
|
| 59 |
+
no_set_device_rank: False
|
| 60 |
+
precision: amp
|
| 61 |
+
pretrained:
|
| 62 |
+
pretrained_image: False
|
| 63 |
+
rank: 0
|
| 64 |
+
remote_sync: None
|
| 65 |
+
remote_sync_frequency: 300
|
| 66 |
+
remote_sync_protocol: s3
|
| 67 |
+
report_to: wandb
|
| 68 |
+
resume: None
|
| 69 |
+
save_frequency: 0
|
| 70 |
+
save_most_recent: True
|
| 71 |
+
seed: 0
|
| 72 |
+
skip_scheduler: False
|
| 73 |
+
tensorboard: False
|
| 74 |
+
tensorboard_path:
|
| 75 |
+
torchscript: False
|
| 76 |
+
trace: False
|
| 77 |
+
train_data: /home/minipile_style_txt_dataset/{00000000..00000095}.tar
|
| 78 |
+
train_data_upsampling_factors: None
|
| 79 |
+
train_num_samples: 2560000
|
| 80 |
+
use_bn_sync: False
|
| 81 |
+
val_data: None
|
| 82 |
+
val_frequency: 1
|
| 83 |
+
val_num_samples: None
|
| 84 |
+
wandb: True
|
| 85 |
+
wandb_notes:
|
| 86 |
+
wandb_project_name: clip_text_hq_clusters
|
| 87 |
+
warmup: 500
|
| 88 |
+
wd: 0.2
|
| 89 |
+
workers: 4
|
| 90 |
+
world_size: 2
|
| 91 |
+
zeroshot_frequency: 2
|