apoorvrajdev commited on
Commit
540e3d5
·
1 Parent(s): 50ea9ed

feat(evaluation): add stabilized training results (greedy + beam)

Browse files
results/history.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 4.689607620239258,
4
+ 4.012580871582031,
5
+ 3.859469175338745,
6
+ 3.7521610260009766,
7
+ 3.658834457397461,
8
+ 3.5711705684661865,
9
+ 3.4886834621429443,
10
+ 3.4167635440826416,
11
+ 3.3627333641052246,
12
+ 3.3309690952301025
13
+ ],
14
+ "acc": [
15
+ 0.35664623975753784,
16
+ 0.4205029308795929,
17
+ 0.4372301995754242,
18
+ 0.45031386613845825,
19
+ 0.4619741439819336,
20
+ 0.4737516939640045,
21
+ 0.48592403531074524,
22
+ 0.49649009108543396,
23
+ 0.5055381655693054,
24
+ 0.5104578137397766
25
+ ],
26
+ "val_loss": [
27
+ 3.9543943405151367,
28
+ 3.813389539718628,
29
+ 3.750491142272949,
30
+ 3.715622901916504,
31
+ 3.6967527866363525,
32
+ 3.669666290283203,
33
+ 3.658308744430542,
34
+ 3.6533355712890625,
35
+ 3.6508076190948486,
36
+ 3.647895574569702
37
+ ],
38
+ "val_acc": [
39
+ 0.4293306767940521,
40
+ 0.4465056359767914,
41
+ 0.4552256464958191,
42
+ 0.4590519964694977,
43
+ 0.46400371193885803,
44
+ 0.468044251203537,
45
+ 0.47079238295555115,
46
+ 0.47307583689689636,
47
+ 0.47429201006889343,
48
+ 0.4751632809638977
49
+ ]
50
+ }
results/stabilized-beam-w4-lp07-rp12/diagnostics.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/stabilized-beam-w4-lp07-rp12/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_examples": 500,
3
+ "bleu1": 41.92993161784126,
4
+ "bleu2": 25.407254877413084,
5
+ "bleu3": 16.009033427971406,
6
+ "bleu4": 10.388352276291533,
7
+ "rouge_l": 36.84029057315173,
8
+ "meteor": 15.56136011285077,
9
+ "cider": 0.8260395027190287,
10
+ "errors": {}
11
+ }
results/stabilized-beam-w4-lp07-rp12/predictions.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/stabilized-beam-w4-lp07-rp12/qualitative.jsonl ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000381195.jpg", "prediction": "a close up of broccoli on a plate", "references": ["a bowl with broccoli next to a bowl with a lentil in it"], "length_tokens": 8, "longest_repeat_run": 1, "sentence_bleu4": 3.8902180856807296, "sentence_rouge_l": 28.57142857142857, "flags": []}
2
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000065965.jpg", "prediction": "a man holding a banana in his mouth", "references": ["woman wearing a banana mask on her head", "a woman wearing an odd banana mask on her head"], "length_tokens": 8, "longest_repeat_run": 1, "sentence_bleu4": 11.044795567078939, "sentence_rouge_l": 25.0, "flags": []}
3
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000014774.jpg", "prediction": "a bathroom with a toilet and a sink", "references": ["a house plant on a sink in a bathroom", "a bathroom with a green plant and a toilet that has its top off", "a plant is setting on the counter in this bathroom"], "length_tokens": 8, "longest_repeat_run": 1, "sentence_bleu4": 43.147305290109884, "sentence_rouge_l": 54.54545454545454, "flags": []}
4
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000440844.jpg", "prediction": "a man holding a tennis racquet on a tennis court", "references": ["two children playing tennis on the same team", "two people standing on a tennis court playing a game"], "length_tokens": 10, "longest_repeat_run": 1, "sentence_bleu4": 27.77619034011791, "sentence_rouge_l": 40.00000000000001, "flags": []}
5
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000163506.jpg", "prediction": "a group of people standing on top of a snow covered slope", "references": ["a group of men standing next to each other in the snow", "a couple of people on skis stand on some snow"], "length_tokens": 12, "longest_repeat_run": 1, "sentence_bleu4": 17.242221289766626, "sentence_rouge_l": 54.54545454545454, "flags": []}
6
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000145409.jpg", "prediction": "a group of people flying kites in the sky", "references": ["a grassy with cars and kites flying in the sky"], "length_tokens": 9, "longest_repeat_run": 1, "sentence_bleu4": 18.88588859215946, "sentence_rouge_l": 52.63157894736842, "flags": []}
7
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000132540.jpg", "prediction": "a living room filled with furniture and a flat screen tv", "references": ["a television is turned on in a living room"], "length_tokens": 11, "longest_repeat_run": 1, "sentence_bleu4": 14.991106946711685, "sentence_rouge_l": 30.0, "flags": []}
8
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000082881.jpg", "prediction": "a man riding skis down a snow covered slope", "references": ["two people ski over a snow covered slope"], "length_tokens": 9, "longest_repeat_run": 1, "sentence_bleu4": 29.84745896009822, "sentence_rouge_l": 58.82352941176471, "flags": []}
9
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000437710.jpg", "prediction": "a birthday cake with a candle on top of it", "references": ["a white and red frosted cake fashioned to look like the united states", "the welcome home cake is decorated with red lettering and a flag", "a soldier is welcomed home with a celebratory cake"], "length_tokens": 10, "longest_repeat_run": 1, "sentence_bleu4": 9.980099403873663, "sentence_rouge_l": 31.57894736842105, "flags": []}
10
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000061262.jpg", "prediction": "a polar bear laying on a rock in the water", "references": ["a polar bear in the water playing with a giant disc", "polar bear hanging onto a floating device in water"], "length_tokens": 10, "longest_repeat_run": 1, "sentence_bleu4": 27.301208627090666, "sentence_rouge_l": 57.14285714285713, "flags": []}
11
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000402020.jpg", "prediction": "a sandwich on a plate on a table", "references": ["a sandwich on a plate and full wine glass are under blurry lights", "salad and lettuce sitting on a plate on a dining room table", "a white plate with a ham sandwich on it"], "length_tokens": 8, "longest_repeat_run": 1, "sentence_bleu4": 74.20884818558928, "sentence_rouge_l": 60.0, "flags": []}
12
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000440283.jpg", "prediction": "a large building with a clock on it", "references": ["a clock tower with a grassy field in the foreground", "a tall clock tower in a field next to a building", "two clocks on a tower near a building with american flags"], "length_tokens": 8, "longest_repeat_run": 1, "sentence_bleu4": 14.898573527259423, "sentence_rouge_l": 33.33333333333333, "flags": []}
13
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000529522.jpg", "prediction": "a man riding a wave on top of a surfboard", "references": ["two surfers on boards at a wave park"], "length_tokens": 10, "longest_repeat_run": 1, "sentence_bleu4": 9.287528999566801, "sentence_rouge_l": 22.222222222222225, "flags": []}
14
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000326712.jpg", "prediction": "a group of people flying kites in the sky", "references": ["a row of green white and orange kites in the sky", "a very long line of pretty kites in the sky", "the large kite is flying in the clear book sky"], "length_tokens": 9, "longest_repeat_run": 1, "sentence_bleu4": 30.719343730842187, "sentence_rouge_l": 63.1578947368421, "flags": []}
15
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000051652.jpg", "prediction": "a close up of a street sign on a pole", "references": ["a small sign on the side of a road", "a close up of a street sign that is not in english", "black and white metal sign pointing to the enclosure"], "length_tokens": 10, "longest_repeat_run": 1, "sentence_bleu4": 68.65890479690394, "sentence_rouge_l": 63.63636363636365, "flags": []}
16
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000352936.jpg", "prediction": "a wooden bench sitting in the middle of a park", "references": ["this bird waited all day to get a seat on the bench"], "length_tokens": 10, "longest_repeat_run": 1, "sentence_bleu4": 4.521356896113449, "sentence_rouge_l": 18.18181818181818, "flags": []}
17
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000252596.jpg", "prediction": "a person riding a skateboard down a street", "references": ["a person skateboards down a street that has greenery on either side"], "length_tokens": 8, "longest_repeat_run": 1, "sentence_bleu4": 15.678003406168177, "sentence_rouge_l": 60.0, "flags": []}
18
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000018773.jpg", "prediction": "a hot dog with mustard and ketchup on it", "references": ["a hot dog on top of a bun covered in ketchup"], "length_tokens": 9, "longest_repeat_run": 1, "sentence_bleu4": 16.14682615668325, "sentence_rouge_l": 40.0, "flags": []}
19
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000017667.jpg", "prediction": "a red and white boat on a lake", "references": ["a long red boat traveling under a bridge on a river", "a boat close to the bank and under a bridge"], "length_tokens": 8, "longest_repeat_run": 1, "sentence_bleu4": 11.320467831315831, "sentence_rouge_l": 52.63157894736842, "flags": []}
20
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000056028.jpg", "prediction": "a couple of cows that are standing in the grass", "references": ["an elderly man and a mature woman standing in a cow pasture", "two people are standing in a field with cows"], "length_tokens": 10, "longest_repeat_run": 1, "sentence_bleu4": 17.747405280050266, "sentence_rouge_l": 31.57894736842105, "flags": []}
21
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000129379.jpg", "prediction": "a woman sitting on a bench talking on a cell phone", "references": ["a woman sitting on a cement wall talking on a cell phone", "a woman on a sidewalk using a phone"], "length_tokens": 11, "longest_repeat_run": 1, "sentence_bleu4": 64.07117598241614, "sentence_rouge_l": 86.95652173913044, "flags": []}
22
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000137730.jpg", "prediction": "a couple of zebra standing next to each other", "references": ["two zebras standing in a meadow on a sunny day"], "length_tokens": 9, "longest_repeat_run": 1, "sentence_bleu4": 5.0735520042259505, "sentence_rouge_l": 21.05263157894737, "flags": []}
23
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000302666.jpg", "prediction": "a cat sitting on top of a toilet next to a sink", "references": ["a grey cat looking at the waters in the toilet pit"], "length_tokens": 12, "longest_repeat_run": 1, "sentence_bleu4": 4.456882760699064, "sentence_rouge_l": 26.08695652173913, "flags": []}
24
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000360371.jpg", "prediction": "a white toilet sitting in a bathroom next to a sink", "references": ["a toilet sitting in a bathroom next to a scale", "a tan toilet sitting by a mint green wall"], "length_tokens": 11, "longest_repeat_run": 1, "sentence_bleu4": 69.89307622784945, "sentence_rouge_l": 85.71428571428572, "flags": []}
25
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000015667.jpg", "prediction": "a red fire hydrant on the side of a street", "references": ["a yellow fire hydrant near the curb of a street"], "length_tokens": 10, "longest_repeat_run": 1, "sentence_bleu4": 21.36435031981171, "sentence_rouge_l": 70.0, "flags": []}
26
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000336027.jpg", "prediction": "a man with a beard and a woman standing in a kitchen", "references": ["a man making a surprised face is getting a hair cut", "a man is getting a hair cut with eyeglasses"], "length_tokens": 12, "longest_repeat_run": 1, "sentence_bleu4": 8.51659301881964, "sentence_rouge_l": 34.78260869565217, "flags": []}
27
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000117759.jpg", "prediction": "a little girl holding a toothbrush in her mouth", "references": ["a woman looking in a mirror and blowdrying her hair"], "length_tokens": 9, "longest_repeat_run": 1, "sentence_bleu4": 6.033504141761816, "sentence_rouge_l": 31.57894736842105, "flags": []}
28
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000425135.jpg", "prediction": "a bathroom with a toilet and a sink", "references": ["a bathroom with two sinks a bathtub and a shower with lots of lighting from the windows"], "length_tokens": 8, "longest_repeat_run": 1, "sentence_bleu4": 8.783184684610797, "sentence_rouge_l": 48.0, "flags": []}
29
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000387515.jpg", "prediction": "a bus is parked on the side of a road", "references": ["the bus is driving on the city street", "a blue and white city bus traveling down the street"], "length_tokens": 10, "longest_repeat_run": 1, "sentence_bleu4": 12.549310621989482, "sentence_rouge_l": 44.44444444444445, "flags": []}
30
+ {"image": "/kaggle/input/datasets/awsaf49/coco-2017-dataset/coco2017/train2017/000000416246.jpg", "prediction": "a stop sign on the side of a road", "references": ["the stop sign is posted along the deserted road"], "length_tokens": 9, "longest_repeat_run": 1, "sentence_bleu4": 11.339582221952005, "sentence_rouge_l": 44.44444444444444, "flags": []}
results/stabilized-beam-w4-lp07-rp12/report.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Evaluation run — inceptionv3-transformer-stabilized
2
+
3
+ - Decode strategy: `beam`
4
+ - Weights: `models/v1.0.0/model.h5`
5
+ - Tokenizer dir: `models/v1.0.0`
6
+ - Samples: **500**
7
+ - Timestamp (UTC): 2026-06-01T13:49:29.575579+00:00
8
+ - Beam width: 4
9
+ - Length penalty: 0.7
10
+ - Repetition penalty: 1.2
11
+
12
+ ## Metrics
13
+
14
+ | Metric | Value |
15
+ |---|---|
16
+ | BLEU-1 | 41.93 |
17
+ | BLEU-2 | 25.41 |
18
+ | BLEU-3 | 16.01 |
19
+ | BLEU-4 | 10.39 |
20
+ | ROUGE-L | 36.84 |
21
+ | METEOR | 15.56 |
22
+ | CIDEr | 0.83 |
results/stabilized-beam-w4-lp07-rp12/run_meta.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "inceptionv3-transformer-stabilized",
3
+ "decode_strategy": "beam",
4
+ "weights_path": "models/v1.0.0/model.h5",
5
+ "tokenizer_dir": "models/v1.0.0",
6
+ "n_samples": 500,
7
+ "max_length": 40,
8
+ "beam_width": 4,
9
+ "length_penalty": 0.7,
10
+ "repetition_penalty": 1.2,
11
+ "timestamp_utc": "2026-06-01T13:49:29.575579+00:00"
12
+ }
results/stabilized-greedy/diagnostics.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/stabilized-greedy/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_examples": 500,
3
+ "bleu1": 42.196743776968695,
4
+ "bleu2": 26.0895810249633,
5
+ "bleu3": 16.519095802602447,
6
+ "bleu4": 10.565569657054366,
7
+ "rouge_l": 37.571303951091465,
8
+ "meteor": 15.447442354081048,
9
+ "cider": 0.7891216278499353,
10
+ "errors": {}
11
+ }
results/stabilized-greedy/predictions.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/stabilized-greedy/report.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Evaluation run — inceptionv3-transformer-stabilized
2
+
3
+ - Decode strategy: `greedy`
4
+ - Weights: `models/v1.0.0/model.h5`
5
+ - Tokenizer dir: `models/v1.0.0`
6
+ - Samples: **500**
7
+ - Timestamp (UTC): 2026-06-01T13:37:57.408004+00:00
8
+ - Repetition penalty: 1.0
9
+
10
+ ## Metrics
11
+
12
+ | Metric | Value |
13
+ |---|---|
14
+ | BLEU-1 | 42.20 |
15
+ | BLEU-2 | 26.09 |
16
+ | BLEU-3 | 16.52 |
17
+ | BLEU-4 | 10.57 |
18
+ | ROUGE-L | 37.57 |
19
+ | METEOR | 15.45 |
20
+ | CIDEr | 0.79 |
results/stabilized-greedy/run_meta.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "inceptionv3-transformer-stabilized",
3
+ "decode_strategy": "greedy",
4
+ "weights_path": "models/v1.0.0/model.h5",
5
+ "tokenizer_dir": "models/v1.0.0",
6
+ "n_samples": 500,
7
+ "max_length": 40,
8
+ "beam_width": null,
9
+ "length_penalty": null,
10
+ "repetition_penalty": 1.0,
11
+ "timestamp_utc": "2026-06-01T13:37:57.408004+00:00"
12
+ }