CodeGoat24 commited on
Commit
605c7ec
·
verified ·
1 Parent(s): e38f467

Update leaderboard_data.json

Browse files
Files changed (1) hide show
  1. leaderboard_data.json +431 -832
leaderboard_data.json CHANGED
@@ -6,19 +6,16 @@
6
  "hf": "https://huggingface.co/Tongyi-MAI/Z-Image-Turbo",
7
  "open_source": true,
8
  "release_date": "2025-11",
9
-
10
  "Overall": 74.18,
11
- "Style": 91.70,
12
  "World Knowledge": 90.98,
13
-
14
  "Attribute-Overall": 76.92,
15
  "Quantity": 75.69,
16
  "Expression": 66.03,
17
  "Material": 88.21,
18
  "Size": 77.78,
19
- "Shape": 60.00,
20
  "Color": 94.17,
21
-
22
  "Action-Overall": 74.71,
23
  "Hand": 71.15,
24
  "Full body": 79.89,
@@ -26,28 +23,22 @@
26
  "Non Contact": 69.39,
27
  "Contact": 72.02,
28
  "State": 76.42,
29
-
30
  "Relationship-Overall": 72.08,
31
- "Composition": 75.00,
32
  "Similarity": 61.11,
33
  "Inclusion": 77.17,
34
  "Comparison": 73.44,
35
-
36
  "Compound-Overall": 65.85,
37
  "Imagination": 69.39,
38
- "Feature matching": 62.44,
39
-
40
  "Grammar-Overall": 65.51,
41
  "Pronoun Reference": 79.04,
42
  "Consistency": 64.35,
43
  "Negation": 52.31,
44
-
45
  "Layout-Overall": 80.97,
46
  "2D": 82.72,
47
  "3D": 79.17,
48
-
49
  "Logical Reasoning": 50.69,
50
-
51
  "Text": 72.41
52
  },
53
  {
@@ -56,11 +47,9 @@
56
  "hf": "https://huggingface.co/black-forest-labs/FLUX.2-dev",
57
  "open_source": true,
58
  "release_date": "2025-11",
59
-
60
  "Overall": 81.44,
61
- "Style": 95.70,
62
- "World Knowledge": 93.20,
63
-
64
  "Attribute-Overall": 90.49,
65
  "Quantity": 86.81,
66
  "Expression": 83.97,
@@ -68,36 +57,29 @@
68
  "Size": 89.58,
69
  "Shape": 86.25,
70
  "Color": 100.0,
71
-
72
  "Action-Overall": 87.55,
73
  "Hand": 87.18,
74
- "Full body": 91.30,
75
- "Animal": 87.50,
76
  "Non Contact": 82.14,
77
- "Contact": 86.90,
78
  "State": 90.09,
79
-
80
  "Relationship-Overall": 89.34,
81
  "Composition": 94.26,
82
  "Similarity": 82.78,
83
  "Inclusion": 93.48,
84
  "Comparison": 81.25,
85
-
86
  "Compound-Overall": 84.02,
87
  "Imagination": 86.73,
88
  "Feature matching": 81.25,
89
-
90
- "Grammar-Overall": 76.20,
91
  "Pronoun Reference": 90.81,
92
  "Consistency": 82.41,
93
  "Negation": 55.77,
94
-
95
  "Layout-Overall": 90.49,
96
  "2D": 91.54,
97
  "3D": 89.39,
98
-
99
  "Logical Reasoning": 68.35,
100
-
101
  "Text": 39.08
102
  },
103
  {
@@ -106,13 +88,9 @@
106
  "hf": "-",
107
  "open_source": false,
108
  "release_date": "2025-11",
109
-
110
  "Overall": 93.82,
111
-
112
- "Style": 99.50,
113
-
114
  "World Knowledge": 97.47,
115
-
116
  "Attribute-Overall": 94.55,
117
  "Quantity": 90.97,
118
  "Expression": 96.15,
@@ -120,87 +98,71 @@
120
  "Size": 95.14,
121
  "Shape": 91.25,
122
  "Color": 98.33,
123
-
124
  "Action-Overall": 94.96,
125
  "Hand": 94.23,
126
  "Full body": 94.57,
127
  "Animal": 97.06,
128
  "Non Contact": 92.35,
129
  "Contact": 95.24,
130
- "State": 96.70,
131
-
132
  "Relationship-Overall": 96.07,
133
  "Composition": 96.96,
134
  "Similarity": 91.67,
135
  "Inclusion": 97.83,
136
  "Comparison": 97.66,
137
-
138
- "Compound-Overall": 94.20,
139
  "Imagination": 96.68,
140
  "Feature matching": 91.67,
141
-
142
  "Grammar-Overall": 89.04,
143
  "Pronoun Reference": 94.49,
144
  "Consistency": 90.74,
145
  "Negation": 81.92,
146
-
147
- "Layout-Overall": 94.40,
148
  "2D": 96.32,
149
  "3D": 92.42,
150
-
151
  "Logical Reasoning": 82.34,
152
-
153
  "Text": 95.69
154
- },
155
  {
156
  "model": "wan2.5-t2i-preview",
157
  "link": "https://help.aliyun.com/zh/model-studio/text-to-image-v2-api-reference",
158
  "hf": "-",
159
  "open_source": false,
160
  "release_date": "2025-09",
161
-
162
- "Overall": 78.40,
163
- "Style": 93.30,
164
- "World Knowledge": 93.51,
165
-
166
- "Attribute-Overall": 83.65,
167
- "Quantity": 78.47,
168
  "Expression": 75.64,
169
- "Material": 90.09,
170
  "Size": 84.72,
171
- "Shape": 76.88,
172
- "Color": 96.67,
173
-
174
- "Action-Overall": 76.62,
175
- "Hand": 73.72,
176
- "Full body": 72.28,
177
  "Animal": 81.62,
178
- "Non Contact": 77.04,
179
- "Contact": 73.81,
180
- "State": 81.13,
181
-
182
- "Relationship-Overall": 81.85,
183
- "Composition": 80.07,
184
- "Similarity": 73.33,
185
- "Inclusion": 88.04,
186
- "Comparison": 89.06,
187
-
188
  "Compound-Overall": 78.74,
189
- "Imagination": 84.95,
190
- "Feature matching": 72.40,
191
-
192
- "Grammar-Overall": 72.58,
193
- "Pronoun Reference": 82.72,
194
- "Consistency": 70.37,
195
- "Negation": 63.67,
196
-
197
- "Layout-Overall": 75.93,
198
- "2D": 76.10,
199
- "3D": 75.76,
200
-
201
- "Logical Reasoning": 63.64,
202
-
203
- "Text": 64.22
204
  },
205
  {
206
  "model": "Echo-4o",
@@ -208,48 +170,39 @@
208
  "hf": "https://huggingface.co/Yejy53/Echo-4o",
209
  "open_source": true,
210
  "release_date": "2025-8",
211
-
212
- "Overall": 72.40,
213
- "Style": 92.80,
214
  "World Knowledge": 87.66,
215
-
216
  "Attribute-Overall": 84.29,
217
  "Quantity": 72.92,
218
  "Expression": 77.56,
219
  "Material": 89.15,
220
  "Size": 88.19,
221
- "Shape": 80.00,
222
  "Color": 99.17,
223
-
224
  "Action-Overall": 76.05,
225
  "Hand": 73.08,
226
  "Full body": 83.15,
227
  "Animal": 85.29,
228
- "Non Contact": 75.00,
229
  "Contact": 65.48,
230
  "State": 75.47,
231
-
232
  "Relationship-Overall": 82.23,
233
  "Composition": 85.81,
234
- "Similarity": 75.00,
235
  "Inclusion": 88.04,
236
  "Comparison": 75.78,
237
-
238
  "Compound-Overall": 77.96,
239
  "Imagination": 82.91,
240
  "Feature matching": 72.92,
241
-
242
- "Grammar-Overall": 75.40,
243
  "Pronoun Reference": 80.15,
244
  "Consistency": 77.31,
245
  "Negation": 68.85,
246
-
247
  "Layout-Overall": 83.02,
248
  "2D": 84.19,
249
  "3D": 81.82,
250
-
251
  "Logical Reasoning": 56.82,
252
-
253
  "Text": 7.76
254
  },
255
  {
@@ -258,13 +211,9 @@
258
  "hf": "https://huggingface.co/BAAI/Emu3-Gen",
259
  "open_source": true,
260
  "release_date": "2024-09",
261
-
262
  "Overall": 33.91,
263
-
264
  "Style": 78.08,
265
-
266
  "World Knowledge": 55.54,
267
-
268
  "Attribute-Overall": 38.29,
269
  "Quantity": 27.78,
270
  "Expression": 30.13,
@@ -272,7 +221,6 @@
272
  "Size": 32.64,
273
  "Shape": 27.67,
274
  "Color": 71.67,
275
-
276
  "Action-Overall": 31.18,
277
  "Hand": 16.67,
278
  "Full body": 36.96,
@@ -280,29 +228,23 @@
280
  "Non Contact": 26.02,
281
  "Contact": 17.86,
282
  "State": 40.57,
283
-
284
  "Relationship-Overall": 36.68,
285
  "Composition": 43.58,
286
  "Similarity": 31.67,
287
  "Inclusion": 38.04,
288
  "Comparison": 25.78,
289
-
290
  "Compound-Overall": 21.65,
291
  "Imagination": 29.85,
292
  "Feature matching": 13.28,
293
-
294
  "Grammar-Overall": 41.31,
295
  "Pronoun Reference": 41.91,
296
  "Consistency": 38.89,
297
  "Negation": 42.69,
298
-
299
  "Layout-Overall": 22.43,
300
  "2D": 17.71,
301
  "3D": 27.27,
302
-
303
- "Logical Reasoning": 13.90,
304
-
305
- "Text": 0.00
306
  },
307
  {
308
  "model": "UniWorld-V1",
@@ -310,19 +252,16 @@
310
  "hf": "https://huggingface.co/LanguageBind/UniWorld-V1",
311
  "open_source": true,
312
  "release_date": "2025-06",
313
-
314
  "Overall": 15.21,
315
- "Style": 49.40,
316
  "World Knowledge": 16.61,
317
-
318
  "Attribute-Overall": 15.06,
319
  "Quantity": 14.58,
320
  "Expression": 19.87,
321
  "Material": 8.02,
322
  "Size": 13.19,
323
- "Shape": 5.00,
324
- "Color": 37.50,
325
-
326
  "Action-Overall": 14.64,
327
  "Hand": 9.62,
328
  "Full body": 17.93,
@@ -330,28 +269,22 @@
330
  "Non Contact": 9.69,
331
  "Contact": 6.55,
332
  "State": 24.06,
333
-
334
- "Relationship-Overall": 11.80,
335
  "Composition": 16.55,
336
  "Similarity": 6.67,
337
- "Inclusion": 12.50,
338
  "Comparison": 7.03,
339
-
340
  "Compound-Overall": 4.38,
341
  "Imagination": 6.63,
342
  "Feature matching": 2.08,
343
-
344
  "Grammar-Overall": 27.81,
345
  "Pronoun Reference": 19.85,
346
- "Consistency": 16.20,
347
  "Negation": 45.77,
348
-
349
  "Layout-Overall": 9.14,
350
  "2D": 8.09,
351
  "3D": 10.23,
352
-
353
  "Logical Reasoning": 2.95,
354
-
355
  "Text": 0.29
356
  },
357
  {
@@ -360,19 +293,16 @@
360
  "hf": "https://huggingface.co/Alpha-VLLM/Lumina-DiMOO",
361
  "open_source": true,
362
  "release_date": "2025-09",
363
-
364
  "Overall": 58.35,
365
- "Style": 80.90,
366
  "World Knowledge": 69.46,
367
-
368
  "Attribute-Overall": 75.64,
369
- "Quantity": 62.50,
370
  "Expression": 71.79,
371
  "Material": 77.83,
372
  "Size": 78.47,
373
- "Shape": 70.00,
374
  "Color": 96.67,
375
-
376
  "Action-Overall": 61.12,
377
  "Hand": 42.95,
378
  "Full body": 61.41,
@@ -380,29 +310,23 @@
380
  "Non Contact": 58.67,
381
  "Contact": 51.79,
382
  "State": 74.06,
383
-
384
  "Relationship-Overall": 67.13,
385
  "Composition": 68.58,
386
  "Similarity": 62.78,
387
  "Inclusion": 76.09,
388
  "Comparison": 57.03,
389
-
390
  "Compound-Overall": 56.06,
391
- "Imagination": 56.96,
392
  "Feature matching": 52.34,
393
-
394
  "Grammar-Overall": 64.84,
395
- "Pronoun Reference": 76.10,
396
  "Consistency": 70.37,
397
  "Negation": 48.46,
398
-
399
  "Layout-Overall": 69.22,
400
  "2D": 73.53,
401
  "3D": 64.77,
402
-
403
  "Logical Reasoning": 39.09,
404
-
405
- "Text": 0.00
406
  },
407
  {
408
  "model": "MMaDA",
@@ -410,19 +334,16 @@
410
  "hf": "https://huggingface.co/Gen-Verse/MMaDA-8B-MixCoT",
411
  "open_source": true,
412
  "release_date": "2025-05",
413
-
414
- "Overall": 44.00,
415
- "Style": 78.20,
416
  "World Knowledge": 52.06,
417
-
418
  "Attribute-Overall": 55.24,
419
  "Quantity": 52.78,
420
  "Expression": 33.97,
421
  "Material": 58.49,
422
  "Size": 61.11,
423
- "Shape": 45.00,
424
  "Color": 86.67,
425
-
426
  "Action-Overall": 43.44,
427
  "Hand": 24.36,
428
  "Full body": 54.35,
@@ -430,29 +351,23 @@
430
  "Non Contact": 31.63,
431
  "Contact": 29.17,
432
  "State": 67.92,
433
-
434
  "Relationship-Overall": 56.22,
435
- "Composition": 59.80,
436
  "Similarity": 52.22,
437
  "Inclusion": 60.87,
438
  "Comparison": 46.88,
439
-
440
  "Compound-Overall": 32.86,
441
  "Imagination": 39.29,
442
- "Feature matching": 26.30,
443
-
444
  "Grammar-Overall": 58.56,
445
  "Pronoun Reference": 59.93,
446
- "Consistency": 46.30,
447
  "Negation": 67.31,
448
-
449
  "Layout-Overall": 37.31,
450
  "2D": 38.97,
451
  "3D": 35.61,
452
-
453
  "Logical Reasoning": 26.14,
454
-
455
- "Text": 0.00
456
  },
457
  {
458
  "model": "OmniGen2",
@@ -460,48 +375,39 @@
460
  "hf": "https://huggingface.co/OmniGen2/OmniGen2",
461
  "open_source": true,
462
  "release_date": "2025-06",
463
-
464
- "Overall": 63.20,
465
- "Style": 93.00,
466
  "World Knowledge": 86.39,
467
-
468
  "Attribute-Overall": 75.43,
469
  "Quantity": 67.36,
470
  "Expression": 69.87,
471
- "Material": 78.30,
472
  "Size": 77.78,
473
  "Shape": 68.75,
474
  "Color": 93.33,
475
-
476
  "Action-Overall": 66.54,
477
- "Hand": 64.10,
478
  "Full body": 69.57,
479
  "Animal": 74.26,
480
  "Non Contact": 61.73,
481
  "Contact": 55.95,
482
  "State": 73.58,
483
-
484
  "Relationship-Overall": 70.69,
485
  "Composition": 77.03,
486
  "Similarity": 66.67,
487
  "Inclusion": 71.74,
488
  "Comparison": 60.16,
489
-
490
  "Compound-Overall": 59.92,
491
  "Imagination": 66.33,
492
  "Feature matching": 53.39,
493
-
494
  "Grammar-Overall": 65.64,
495
  "Pronoun Reference": 71.69,
496
- "Consistency": 71.30,
497
  "Negation": 54.62,
498
-
499
  "Layout-Overall": 69.96,
500
  "2D": 76.84,
501
  "3D": 62.88,
502
-
503
  "Logical Reasoning": 44.09,
504
-
505
  "Text": 0.29
506
  },
507
  {
@@ -510,99 +416,80 @@
510
  "hf": "https://huggingface.co/onecat-ai/OneCAT-3B",
511
  "open_source": true,
512
  "release_date": "2025-09",
513
-
514
- "Overall": 58.50,
515
- "Style": 94.40,
516
- "World Knowledge": 86.55,
517
-
518
- "Attribute-Overall": 63.89,
519
- "Quantity": 56.94,
520
- "Expression": 66.03,
521
- "Material": 73.58,
522
- "Size": 65.28,
523
- "Shape": 38.75,
524
- "Color": 84.17,
525
-
526
- "Action-Overall": 63.12,
527
- "Hand": 42.31,
528
- "Full body": 75.00,
529
- "Animal": 80.88,
530
- "Non Contact": 61.22,
531
- "Contact": 44.05,
532
- "State": 73.58,
533
-
534
- "Relationship-Overall": 67.39,
535
- "Composition": 72.64,
536
- "Similarity": 61.67,
537
- "Inclusion": 69.57,
538
- "Comparison": 60.16,
539
-
540
- "Compound-Overall": 51.55,
541
- "Imagination": 63.52,
542
- "Feature matching": 39.32,
543
-
544
- "Grammar-Overall": 59.00,
545
- "Pronoun Reference": 64.34,
546
- "Consistency": 60.19,
547
- "Negation": 52.69,
548
-
549
- "Layout-Overall": 60.45,
550
- "2D": 61.76,
551
- "3D": 59.09,
552
-
553
- "Logical Reasoning": 38.64,
554
-
555
- "Text": 0.00
556
-
557
  },
558
- {
559
  "model": "X-Omni",
560
  "link": "https://arxiv.org/pdf/2507.22058",
561
  "hf": "https://huggingface.co/X-Omni/X-Omni-Zh",
562
  "open_source": true,
563
  "release_date": "2025-08",
564
-
565
  "Overall": 53.69,
566
  "Style": 70.07,
567
  "World Knowledge": 71.52,
568
-
569
  "Attribute-Overall": 63.85,
570
  "Quantity": 61.81,
571
  "Expression": 52.56,
572
  "Material": 63.51,
573
  "Size": 67.36,
574
- "Shape": 57.50,
575
  "Color": 85.83,
576
-
577
  "Action-Overall": 58.37,
578
  "Hand": 48.72,
579
  "Full body": 68.48,
580
  "Animal": 63.97,
581
- "Non Contact": 56.53,
582
  "Contact": 43.45,
583
  "State": 66.51,
584
-
585
  "Relationship-Overall": 59.77,
586
  "Composition": 60.14,
587
- "Similarity": 60.00,
588
- "Inclusion": 62.50,
589
  "Comparison": 54.69,
590
-
591
  "Compound-Overall": 41.75,
592
  "Imagination": 48.72,
593
  "Feature matching": 34.64,
594
-
595
  "Grammar-Overall": 56.28,
596
  "Pronoun Reference": 63.97,
597
- "Consistency": 53.70,
598
  "Negation": 50.38,
599
-
600
  "Layout-Overall": 59.51,
601
  "2D": 66.91,
602
  "3D": 51.89,
603
-
604
  "Logical Reasoning": 34.77,
605
-
606
  "Text": 20.98
607
  },
608
  {
@@ -611,48 +498,39 @@
611
  "hf": "https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT",
612
  "open_source": true,
613
  "release_date": "2025-05",
614
-
615
  "Overall": 65.69,
616
- "Style": 92.30,
617
  "World Knowledge": 86.71,
618
-
619
  "Attribute-Overall": 75.21,
620
  "Quantity": 64.58,
621
  "Expression": 63.46,
622
  "Material": 83.49,
623
  "Size": 79.86,
624
  "Shape": 66.25,
625
- "Color": 95.00,
626
-
627
  "Action-Overall": 65.78,
628
  "Hand": 61.54,
629
  "Full body": 63.59,
630
  "Animal": 75.74,
631
  "Non Contact": 65.31,
632
- "Contact": 61.90,
633
  "State": 67.92,
634
-
635
  "Relationship-Overall": 75.38,
636
- "Composition": 77.70,
637
  "Similarity": 67.78,
638
  "Inclusion": 82.07,
639
  "Comparison": 71.09,
640
-
641
  "Compound-Overall": 69.85,
642
  "Imagination": 79.59,
643
- "Feature matching": 59.90,
644
-
645
  "Grammar-Overall": 69.52,
646
  "Pronoun Reference": 73.16,
647
- "Consistency": 75.00,
648
  "Negation": 61.15,
649
-
650
  "Layout-Overall": 77.61,
651
  "2D": 82.72,
652
  "3D": 72.35,
653
-
654
  "Logical Reasoning": 37.95,
655
-
656
  "Text": 6.61
657
  },
658
  {
@@ -661,19 +539,16 @@
661
  "hf": "https://huggingface.co/HiDream-ai/HiDream-I1-Full",
662
  "open_source": true,
663
  "release_date": "2025-05",
664
-
665
  "Overall": 50.65,
666
- "Style": 83.30,
667
  "World Knowledge": 78.32,
668
-
669
  "Attribute-Overall": 62.18,
670
  "Quantity": 69.44,
671
  "Expression": 45.51,
672
  "Material": 55.66,
673
  "Size": 70.14,
674
- "Shape": 55.00,
675
  "Color": 86.67,
676
-
677
  "Action-Overall": 53.71,
678
  "Hand": 44.23,
679
  "Full body": 57.61,
@@ -681,29 +556,23 @@
681
  "Non Contact": 53.06,
682
  "Contact": 47.62,
683
  "State": 61.32,
684
-
685
  "Relationship-Overall": 57.23,
686
  "Composition": 57.77,
687
  "Similarity": 52.78,
688
  "Inclusion": 63.04,
689
  "Comparison": 53.91,
690
-
691
  "Compound-Overall": 34.54,
692
  "Imagination": 38.01,
693
  "Feature matching": 30.99,
694
-
695
  "Grammar-Overall": 53.88,
696
  "Pronoun Reference": 62.13,
697
  "Consistency": 51.85,
698
  "Negation": 46.92,
699
-
700
- "Layout-Overall": 59.70,
701
- "2D": 63.60,
702
  "3D": 55.68,
703
-
704
  "Logical Reasoning": 23.64,
705
-
706
- "Text": 0.00
707
  },
708
  {
709
  "model": "Hunyuan-Image-2.1",
@@ -711,18 +580,16 @@
711
  "hf": "https://huggingface.co/spaces/tencent/HunyuanImage-2.1",
712
  "open_source": true,
713
  "release_date": "2025-09",
714
-
715
  "Overall": 77.76,
716
- "Style": 92.20,
717
  "World Knowledge": 90.51,
718
  "Attribute-Overall": 84.19,
719
- "Quantity": 87.50,
720
  "Expression": 80.77,
721
  "Material": 82.55,
722
  "Size": 86.11,
723
- "Shape": 75.00,
724
- "Color": 97.50,
725
-
726
  "Action-Overall": 80.51,
727
  "Hand": 76.28,
728
  "Full body": 84.24,
@@ -730,48 +597,40 @@
730
  "Non Contact": 78.06,
731
  "Contact": 79.17,
732
  "State": 80.66,
733
-
734
  "Relationship-Overall": 82.74,
735
  "Composition": 80.74,
736
  "Similarity": 80.56,
737
- "Inclusion": 87.50,
738
  "Comparison": 83.59,
739
-
740
  "Compound-Overall": 70.62,
741
  "Imagination": 71.68,
742
  "Feature matching": 69.53,
743
-
744
- "Grammar-Overall": 61.50,
745
  "Pronoun Reference": 80.15,
746
  "Consistency": 67.13,
747
  "Negation": 37.31,
748
-
749
  "Layout-Overall": 85.45,
750
  "2D": 88.24,
751
  "3D": 82.58,
752
-
753
  "Logical Reasoning": 50.23,
754
-
755
- "Text": 79.60
756
- },
757
  {
758
- "model": "BLIP3-o",
759
  "link": "https://arxiv.org/pdf/2505.09568",
760
  "hf": "https://huggingface.co/BLIP3o/BLIP3o-NEXT-SFT-3B",
761
  "open_source": true,
762
  "release_date": "2025-08",
763
-
764
  "Overall": 59.25,
765
- "Style": 92.60,
766
  "World Knowledge": 81.17,
767
  "Attribute-Overall": 66.56,
768
  "Quantity": 57.64,
769
  "Expression": 65.38,
770
  "Material": 67.92,
771
  "Size": 77.08,
772
- "Shape": 47.50,
773
  "Color": 89.17,
774
-
775
  "Action-Overall": 64.35,
776
  "Hand": 57.69,
777
  "Full body": 73.37,
@@ -779,40 +638,33 @@
779
  "Non Contact": 59.18,
780
  "Contact": 55.95,
781
  "State": 70.28,
782
-
783
  "Relationship-Overall": 65.36,
784
  "Composition": 69.26,
785
  "Similarity": 58.33,
786
  "Inclusion": 63.04,
787
  "Comparison": 69.53,
788
-
789
- "Compound-Overall": 51.80,
790
  "Imagination": 61.99,
791
  "Feature matching": 41.41,
792
-
793
  "Grammar-Overall": 63.37,
794
  "Pronoun Reference": 70.22,
795
  "Consistency": 57.41,
796
  "Negation": 61.16,
797
-
798
  "Layout-Overall": 65.67,
799
  "2D": 69.12,
800
  "3D": 62.12,
801
-
802
  "Logical Reasoning": 41.59,
803
-
804
- "Text": 0.00
805
  },
806
  {
807
- "model": "BLIP3-o-Next",
808
  "link": "https://arxiv.org/pdf/2505.09568",
809
  "hf": "https://huggingface.co/BLIP3o/BLIP3o-NEXT-SFT-3B",
810
  "open_source": true,
811
  "release_date": "2025-08",
812
-
813
  "Overall": 44.48,
814
- "Style": 74.60,
815
- "World Knowledge": 50.00,
816
  "Attribute-Overall": 55.98,
817
  "Quantity": 44.44,
818
  "Expression": 57.69,
@@ -820,37 +672,30 @@
820
  "Size": 63.89,
821
  "Shape": 48.12,
822
  "Color": 68.33,
823
-
824
  "Action-Overall": 47.62,
825
  "Hand": 37.82,
826
  "Full body": 61.41,
827
  "Animal": 45.59,
828
  "Non Contact": 45.41,
829
- "Contact": 36.90,
830
  "State": 54.72,
831
-
832
  "Relationship-Overall": 53.55,
833
  "Composition": 54.05,
834
  "Similarity": 48.33,
835
- "Inclusion": 50.00,
836
  "Comparison": 64.84,
837
-
838
  "Compound-Overall": 26.55,
839
  "Imagination": 32.14,
840
  "Feature matching": 20.83,
841
-
842
  "Grammar-Overall": 54.14,
843
  "Pronoun Reference": 65.07,
844
  "Consistency": 49.54,
845
  "Negation": 46.54,
846
-
847
  "Layout-Overall": 54.85,
848
  "2D": 58.82,
849
  "3D": 50.76,
850
-
851
- "Logical Reasoning": 27.50,
852
-
853
- "Text": 0.00
854
  },
855
  {
856
  "model": "Janus-flow",
@@ -858,51 +703,40 @@
858
  "hf": "https://huggingface.co/deepseek-ai/JanusFlow-1.3B",
859
  "open_source": true,
860
  "release_date": "2024-11",
861
-
862
  "Overall": 20.93,
863
-
864
- "Style": 58.50,
865
-
866
  "World Knowledge": 18.67,
867
-
868
  "Attribute-Overall": 19.23,
869
  "Quantity": 22.92,
870
- "Expression": 10.90,
871
- "Material": 21.70,
872
  "Size": 24.31,
873
  "Shape": 8.12,
874
- "Color": 30.00,
875
-
876
  "Action-Overall": 22.05,
877
  "Hand": 4.49,
878
  "Full body": 31.52,
879
  "Animal": 22.06,
880
- "Non Contact": 14.80,
881
  "Contact": 19.05,
882
  "State": 35.85,
883
-
884
  "Relationship-Overall": 19.54,
885
  "Composition": 23.65,
886
  "Similarity": 16.11,
887
  "Inclusion": 20.11,
888
  "Comparison": 14.06,
889
-
890
- "Compound-Overall": 10.70,
891
  "Imagination": 19.13,
892
  "Feature matching": 2.08,
893
-
894
  "Grammar-Overall": 35.03,
895
  "Pronoun Reference": 32.72,
896
  "Consistency": 16.67,
897
  "Negation": 52.69,
898
-
899
  "Layout-Overall": 14.93,
900
  "2D": 12.13,
901
- "3D": 17.80,
902
-
903
  "Logical Reasoning": 10.68,
904
-
905
- "Text": 0.00
906
  },
907
  {
908
  "model": "CogView4",
@@ -910,48 +744,40 @@
910
  "hf": "https://huggingface.co/zai-org/CogView4-6B",
911
  "open_source": true,
912
  "release_date": "2024-03",
913
-
914
  "Overall": 55.14,
915
- "Style": 82.40,
916
  "World Knowledge": 84.18,
917
  "Attribute-Overall": 63.35,
918
  "Quantity": 68.75,
919
  "Expression": 44.87,
920
- "Material": 56.60,
921
  "Size": 72.92,
922
  "Shape": 53.75,
923
  "Color": 94.17,
924
-
925
  "Action-Overall": 61.69,
926
  "Hand": 61.54,
927
- "Full body": 66.30,
928
  "Animal": 64.71,
929
  "Non Contact": 52.04,
930
  "Contact": 54.76,
931
  "State": 70.28,
932
-
933
  "Relationship-Overall": 61.68,
934
  "Composition": 61.82,
935
  "Similarity": 62.22,
936
  "Inclusion": 63.59,
937
  "Comparison": 57.81,
938
-
939
  "Compound-Overall": 45.75,
940
  "Imagination": 51.02,
941
  "Feature matching": 40.36,
942
-
943
  "Grammar-Overall": 54.55,
944
  "Pronoun Reference": 67.65,
945
  "Consistency": 57.41,
946
  "Negation": 38.46,
947
-
948
- "Layout-Overall": 65.30,
949
- "2D": 75.00,
950
- "3D": 55.30,
951
-
952
  "Logical Reasoning": 30.23,
953
-
954
- "Text": 2.30
955
  },
956
  {
957
  "model": "Janus",
@@ -959,51 +785,40 @@
959
  "hf": "https://huggingface.co/deepseek-ai/Janus-1.3B",
960
  "open_source": true,
961
  "release_date": "2024-10",
962
-
963
  "Overall": 30.98,
964
-
965
- "Style": 78.10,
966
-
967
  "World Knowledge": 27.85,
968
-
969
  "Attribute-Overall": 30.88,
970
  "Quantity": 29.17,
971
  "Expression": 17.31,
972
  "Material": 35.85,
973
  "Size": 45.83,
974
  "Shape": 14.37,
975
- "Color": 17.31,
976
-
977
  "Action-Overall": 31.37,
978
- "Hand": 14.10,
979
  "Full body": 38.59,
980
  "Animal": 42.65,
981
  "Non Contact": 24.49,
982
  "Contact": 23.21,
983
- "State": 43.40,
984
-
985
  "Relationship-Overall": 30.58,
986
  "Composition": 32.43,
987
  "Similarity": 32.22,
988
  "Inclusion": 27.72,
989
  "Comparison": 28.12,
990
-
991
  "Compound-Overall": 17.53,
992
  "Imagination": 25.26,
993
  "Feature matching": 9.64,
994
-
995
- "Grammar-Overall": 48.40,
996
  "Pronoun Reference": 48.53,
997
  "Consistency": 33.33,
998
  "Negation": 60.77,
999
-
1000
  "Layout-Overall": 31.72,
1001
  "2D": 31.25,
1002
- "3D": 32.20,
1003
-
1004
  "Logical Reasoning": 13.41,
1005
-
1006
- "Text": 0.00
1007
  },
1008
  {
1009
  "model": "Janus-Pro",
@@ -1011,9 +826,8 @@
1011
  "hf": "https://huggingface.co/deepseek-ai/Janus-Pro-7B",
1012
  "open_source": true,
1013
  "release_date": "2025-01",
1014
-
1015
  "Overall": 30.83,
1016
- "Style": 75.60,
1017
  "World Knowledge": 39.08,
1018
  "Attribute-Overall": 33.12,
1019
  "Quantity": 24.31,
@@ -1021,8 +835,7 @@
1021
  "Material": 43.87,
1022
  "Size": 45.14,
1023
  "Shape": 18.75,
1024
- "Color": 47.50,
1025
-
1026
  "Action-Overall": 26.33,
1027
  "Hand": 13.46,
1028
  "Full body": 26.09,
@@ -1030,41 +843,33 @@
1030
  "Non Contact": 22.45,
1031
  "Contact": 20.83,
1032
  "State": 38.68,
1033
-
1034
  "Relationship-Overall": 32.74,
1035
  "Composition": 38.85,
1036
  "Similarity": 35.56,
1037
  "Inclusion": 26.09,
1038
  "Comparison": 24.22,
1039
-
1040
  "Compound-Overall": 24.48,
1041
  "Imagination": 33.42,
1042
  "Feature matching": 15.36,
1043
-
1044
  "Grammar-Overall": 36.63,
1045
  "Pronoun Reference": 36.76,
1046
  "Consistency": 31.94,
1047
  "Negation": 40.38,
1048
-
1049
  "Layout-Overall": 30.04,
1050
  "2D": 29.78,
1051
- "3D": 30.30,
1052
-
1053
  "Logical Reasoning": 10.23,
1054
-
1055
- "Text": 0.00
1056
  },
1057
- {
1058
  "model": "Kolors",
1059
  "link": "https://github.com/Kwai-Kolors/Kolors/blob/master/imgs/Kolors_paper.pdf",
1060
  "hf": "https://huggingface.co/Kwai-Kolors/Kolors",
1061
  "open_source": true,
1062
  "release_date": "2024-7",
1063
-
1064
- "Overall": 58.80,
1065
- "Style": 85.20,
1066
  "World Knowledge": 86.23,
1067
-
1068
  "Attribute-Overall": 69.34,
1069
  "Quantity": 70.14,
1070
  "Expression": 51.92,
@@ -1072,36 +877,29 @@
1072
  "Size": 77.78,
1073
  "Shape": 56.25,
1074
  "Color": 91.67,
1075
-
1076
  "Action-Overall": 65.02,
1077
  "Hand": 58.33,
1078
  "Full body": 59.24,
1079
  "Animal": 71.32,
1080
  "Non Contact": 63.78,
1081
- "Contact": 57.54,
1082
  "State": 77.83,
1083
-
1084
  "Relationship-Overall": 67.13,
1085
  "Composition": 71.96,
1086
  "Similarity": 69.44,
1087
  "Inclusion": 67.39,
1088
  "Comparison": 52.34,
1089
-
1090
- "Compound-Overall": 66.03,
1091
- "Imagination": 64.80,
1092
  "Feature matching": 45.05,
1093
-
1094
  "Grammar-Overall": 56.68,
1095
  "Pronoun Reference": 67.28,
1096
  "Consistency": 59.26,
1097
  "Negation": 43.46,
1098
-
1099
  "Layout-Overall": 62.31,
1100
  "2D": 58.82,
1101
  "3D": 65.91,
1102
-
1103
  "Logical Reasoning": 36.14,
1104
-
1105
  "Text": 4.89
1106
  },
1107
  {
@@ -1110,21 +908,16 @@
1110
  "hf": "-",
1111
  "open_source": false,
1112
  "release_date": "2025-09",
1113
-
1114
  "Overall": 87.31,
1115
-
1116
- "Style": 99.00,
1117
-
1118
  "World Knowledge": 94.94,
1119
-
1120
  "Attribute-Overall": 90.06,
1121
  "Quantity": 86.81,
1122
- "Expression": 85.90,
1123
  "Material": 97.64,
1124
  "Size": 86.81,
1125
  "Shape": 83.12,
1126
  "Color": 99.17,
1127
-
1128
  "Action-Overall": 87.55,
1129
  "Hand": 82.69,
1130
  "Full body": 90.22,
@@ -1132,80 +925,63 @@
1132
  "Non Contact": 84.69,
1133
  "Contact": 82.74,
1134
  "State": 92.45,
1135
-
1136
  "Relationship-Overall": 88.58,
1137
  "Composition": 85.14,
1138
  "Similarity": 84.44,
1139
  "Inclusion": 95.65,
1140
  "Comparison": 92.19,
1141
-
1142
  "Compound-Overall": 81.57,
1143
- "Imagination": 85.20,
1144
  "Feature matching": 77.86,
1145
-
1146
  "Grammar-Overall": 78.48,
1147
  "Pronoun Reference": 89.71,
1148
- "Consistency": 75.00,
1149
  "Negation": 69.62,
1150
-
1151
- "Layout-Overall": 90.30,
1152
  "2D": 90.81,
1153
  "3D": 89.77,
1154
-
1155
  "Logical Reasoning": 68.64,
1156
-
1157
  "Text": 93.97
1158
- },
1159
- {
1160
  "model": "Imagen-4.0-generate-preview-06-06",
1161
  "link": "https://deepmind.google/models/imagen/",
1162
  "hf": "-",
1163
  "open_source": false,
1164
  "release_date": "2025-01",
1165
-
1166
  "Overall": 79.52,
1167
-
1168
- "Style": 97.50,
1169
-
1170
  "World Knowledge": 96.84,
1171
-
1172
  "Attribute-Overall": 86.22,
1173
  "Quantity": 83.33,
1174
  "Expression": 77.56,
1175
  "Material": 92.92,
1176
  "Size": 93.75,
1177
- "Shape": 72.50,
1178
  "Color": 98.33,
1179
-
1180
- "Action-Overall": 90.40,
1181
- "Hand": 89.10,
1182
  "Full body": 89.67,
1183
  "Animal": 93.38,
1184
  "Non Contact": 86.73,
1185
  "Contact": 90.48,
1186
- "State": 93.40,
1187
-
1188
  "Relationship-Overall": 90.74,
1189
  "Composition": 91.55,
1190
  "Similarity": 83.33,
1191
  "Inclusion": 94.57,
1192
  "Comparison": 93.75,
1193
-
1194
- "Compound-Overall": 85.70,
1195
- "Imagination": 92.60,
1196
  "Feature matching": 78.65,
1197
-
1198
  "Grammar-Overall": 82.89,
1199
  "Pronoun Reference": 92.65,
1200
  "Consistency": 82.87,
1201
  "Negation": 72.69,
1202
-
1203
  "Layout-Overall": 89.18,
1204
  "2D": 91.54,
1205
  "3D": 86.74,
1206
-
1207
  "Logical Reasoning": 73.18,
1208
-
1209
  "Text": 2.59
1210
  },
1211
  {
@@ -1214,271 +990,214 @@
1214
  "hf": "-",
1215
  "open_source": false,
1216
  "release_date": "2024-11",
1217
-
1218
  "Overall": 54.93,
1219
-
1220
  "Style": 64.75,
1221
-
1222
  "World Knowledge": 71.05,
1223
-
1224
  "Attribute-Overall": 60.43,
1225
  "Quantity": 54.29,
1226
  "Expression": 46.05,
1227
- "Material": 72.60,
1228
  "Size": 57.64,
1229
  "Shape": 50.62,
1230
- "Color": 81.90,
1231
-
1232
  "Action-Overall": 60.42,
1233
  "Hand": 52.63,
1234
  "Full body": 65.22,
1235
- "Animal": 75.00,
1236
  "Non Contact": 51.56,
1237
  "Contact": 54.37,
1238
  "State": 65.09,
1239
-
1240
- "Relationship-Overall": 65.90,
1241
  "Composition": 66.89,
1242
  "Similarity": 51.11,
1243
  "Inclusion": 74.43,
1244
  "Comparison": 72.66,
1245
-
1246
- "Compound-Overall": 61.00,
1247
  "Imagination": 68.22,
1248
  "Feature matching": 53.49,
1249
-
1250
  "Grammar-Overall": 58.38,
1251
  "Pronoun Reference": 55.38,
1252
  "Consistency": 55.09,
1253
  "Negation": 64.29,
1254
-
1255
  "Layout-Overall": 64.71,
1256
  "2D": 59.93,
1257
  "3D": 69.62,
1258
-
1259
  "Logical Reasoning": 42.03,
1260
-
1261
  "Text": 0.59
1262
  },
1263
- {
1264
  "model": "HiDream_v2L",
1265
  "link": "https://hidreamai.com/doc/txt2img/request",
1266
  "hf": "-",
1267
  "open_source": false,
1268
  "release_date": "2025-07",
1269
-
1270
- "Overall": 59.95,
1271
-
1272
- "Style": 89.34,
1273
-
1274
- "World Knowledge": 91.02,
1275
-
1276
  "Attribute-Overall": 67.87,
1277
  "Quantity": 71.43,
1278
- "Expression": 42.31,
1279
- "Material": 70.59,
1280
- "Size": 70.00,
1281
- "Shape": 64.52,
1282
  "Color": 94.17,
1283
-
1284
- "Action-Overall": 64.90,
1285
- "Hand": 48.72,
1286
- "Full body": 65.22,
1287
- "Animal": 75.00,
1288
- "Non Contact": 71.88,
1289
- "Contact": 55.95,
1290
- "State": 71.15,
1291
-
1292
- "Relationship-Overall": 72.67,
1293
- "Composition": 78.82,
1294
- "Similarity": 65.00,
1295
- "Inclusion": 75.56,
1296
- "Comparison": 65.32,
1297
-
1298
- "Compound-Overall": 53.19,
1299
- "Imagination": 62.63,
1300
- "Feature matching": 43.55,
1301
-
1302
- "Grammar-Overall": 62.57,
1303
- "Pronoun Reference": 75.38,
1304
  "Consistency": 68.75,
1305
- "Negation": 44.53,
1306
-
1307
- "Layout-Overall": 64.77,
1308
- "2D": 66.29,
1309
- "3D": 63.26,
1310
-
1311
- "Logical Reasoning": 32.01,
1312
-
1313
- "Text": 1.16
1314
- },
1315
- {
1316
  "model": "FLUX-kontext-pro",
1317
  "link": "https://bfl.ai/models/flux-kontext",
1318
  "hf": "-",
1319
  "open_source": false,
1320
  "release_date": "2025-05",
1321
-
1322
  "Overall": 1.27,
1323
  "Style": 6.93,
1324
- "World Knowledge": 0.00,
1325
-
1326
  "Attribute-Overall": 0.11,
1327
  "Quantity": 0.69,
1328
- "Expression": 0.00,
1329
- "Material": 0.00,
1330
- "Size": 0.00,
1331
- "Shape": 0.00,
1332
- "Color": 0.00,
1333
-
1334
  "Action-Overall": 0.29,
1335
- "Hand": 0.00,
1336
- "Full body": 0.00,
1337
- "Animal": 0.00,
1338
- "Non Contact": 0.00,
1339
- "Contact": 0.00,
1340
  "State": 1.47,
1341
-
1342
  "Relationship-Overall": 0.13,
1343
  "Composition": 0.34,
1344
- "Similarity": 0.00,
1345
- "Inclusion": 0.00,
1346
- "Comparison": 0.00,
1347
-
1348
- "Compound-Overall": 0.00,
1349
- "Imagination": 0.00,
1350
- "Feature matching": 0.00,
1351
-
1352
  "Grammar-Overall": 5.24,
1353
- "Pronoun Reference": 0.00,
1354
  "Consistency": 2.31,
1355
  "Negation": 13.28,
1356
-
1357
- "Layout-Overall": 0.00,
1358
- "2D": 0.00,
1359
- "3D": 0.00,
1360
-
1361
- "Logical Reasoning": 0.00,
1362
-
1363
- "Text": 0.00
1364
- },
1365
- {
1366
  "model": "FLUX-pro-1.1-Ultra",
1367
  "link": "https://bfl.ai/",
1368
  "hf": "-",
1369
  "open_source": false,
1370
  "release_date": "2024-11",
1371
-
1372
  "Overall": 1.31,
1373
-
1374
  "Style": 7.63,
1375
-
1376
  "World Knowledge": 0.32,
1377
-
1378
  "Attribute-Overall": 0.32,
1379
- "Quantity": 0.00,
1380
- "Expression": 0.00,
1381
  "Material": 0.94,
1382
- "Size": 0.00,
1383
- "Shape": 0.00,
1384
  "Color": 0.83,
1385
-
1386
- "Action-Overall": 0.00,
1387
- "Hand": 0.00,
1388
- "Full body": 0.00,
1389
- "Animal": 0.00,
1390
- "Non Contact": 0.00,
1391
- "Contact": 0.00,
1392
- "State": 0.00,
1393
-
1394
  "Relationship-Overall": 0.26,
1395
  "Composition": 0.34,
1396
  "Similarity": 0.57,
1397
- "Inclusion": 0.00,
1398
- "Comparison": 0.00,
1399
-
1400
- "Compound-Overall": 0.00,
1401
- "Imagination": 0.00,
1402
- "Feature matching": 0.00,
1403
-
1404
- "Grammar-Overall": 4.30,
1405
  "Pronoun Reference": 0.37,
1406
  "Consistency": 1.39,
1407
  "Negation": 10.94,
1408
-
1409
  "Layout-Overall": 0.19,
1410
  "2D": 0.37,
1411
- "3D": 0.00,
1412
-
1413
- "Logical Reasoning": 0.00,
1414
-
1415
- "Text": 0.00
1416
- },
1417
- {
1418
  "model": "DALL-E-3",
1419
  "link": "https://openai.com/zh-Hans-CN/index/dall-e-3/",
1420
  "hf": "-",
1421
  "open_source": false,
1422
  "release_date": "2023-09",
1423
-
1424
  "Overall": 67.93,
1425
-
1426
- "Style": 95.90,
1427
-
1428
  "World Knowledge": 93.04,
1429
-
1430
  "Attribute-Overall": 78.42,
1431
  "Quantity": 60.42,
1432
  "Expression": 68.59,
1433
  "Material": 91.04,
1434
  "Size": 90.28,
1435
- "Shape": 65.00,
1436
  "Color": 94.17,
1437
-
1438
  "Action-Overall": 72.24,
1439
  "Hand": 69.87,
1440
  "Full body": 77.17,
1441
  "Animal": 82.35,
1442
  "Non Contact": 66.33,
1443
- "Contact": 61.90,
1444
  "State": 76.89,
1445
-
1446
  "Relationship-Overall": 79.95,
1447
  "Composition": 81.76,
1448
  "Similarity": 77.78,
1449
- "Inclusion": 87.50,
1450
  "Comparison": 67.97,
1451
-
1452
  "Compound-Overall": 72.94,
1453
  "Imagination": 82.14,
1454
  "Feature matching": 63.54,
1455
-
1456
  "Grammar-Overall": 71.52,
1457
  "Pronoun Reference": 79.78,
1458
  "Consistency": 76.39,
1459
  "Negation": 58.85,
1460
-
1461
- "Layout-Overall": 62.50,
1462
  "2D": 54.41,
1463
  "3D": 70.83,
1464
-
1465
  "Logical Reasoning": 51.59,
1466
-
1467
  "Text": 1.15
1468
- },
1469
- {
1470
  "model": "Qwen-Image",
1471
  "link": "https://arxiv.org/pdf/2508.02324",
1472
  "hf": "https://huggingface.co/Qwen/Qwen-Image",
1473
  "open_source": true,
1474
  "release_date": "2025-08",
1475
-
1476
  "Overall": 81.04,
1477
-
1478
- "Style": 95.50,
1479
-
1480
  "World Knowledge": 92.41,
1481
-
1482
  "Attribute-Overall": 91.88,
1483
  "Quantity": 88.89,
1484
  "Expression": 91.03,
@@ -1486,211 +1205,170 @@
1486
  "Size": 90.28,
1487
  "Shape": 86.25,
1488
  "Color": 98.33,
1489
-
1490
  "Action-Overall": 85.74,
1491
  "Hand": 83.33,
1492
- "Full body": 87.50,
1493
  "Animal": 89.71,
1494
  "Non Contact": 81.63,
1495
  "Contact": 82.14,
1496
  "State": 90.09,
1497
-
1498
  "Relationship-Overall": 82.99,
1499
  "Composition": 85.47,
1500
  "Similarity": 73.33,
1501
  "Inclusion": 90.76,
1502
  "Comparison": 79.69,
1503
-
1504
  "Compound-Overall": 76.16,
1505
- "Imagination": 80.10,
1506
  "Feature matching": 72.14,
1507
-
1508
  "Grammar-Overall": 62.83,
1509
  "Pronoun Reference": 83.46,
1510
  "Consistency": 74.07,
1511
  "Negation": 31.92,
1512
-
1513
  "Layout-Overall": 82.65,
1514
  "2D": 84.93,
1515
- "3D": 80.30,
1516
-
1517
  "Logical Reasoning": 57.73,
1518
-
1519
  "Text": 82.47
1520
- },
1521
- {
1522
  "model": "wan2.2-t2i-plus",
1523
  "link": "https://help.aliyun.com/zh/model-studio/text-to-image-v2-api-reference",
1524
  "hf": "-",
1525
  "open_source": false,
1526
  "release_date": "2025-07",
1527
-
1528
  "Overall": 66.96,
1529
  "Style": 91.06,
1530
  "World Knowledge": 84.39,
1531
-
1532
  "Attribute-Overall": 73.93,
1533
- "Quantity": 75.00,
1534
  "Expression": 67.31,
1535
  "Material": 74.06,
1536
  "Size": 74.31,
1537
  "Shape": 66.25,
1538
  "Color": 90.83,
1539
-
1540
  "Action-Overall": 72.52,
1541
  "Hand": 69.23,
1542
- "Full body": 80.00,
1543
  "Animal": 84.56,
1544
  "Non Contact": 65.31,
1545
- "Contact": 61.90,
1546
  "State": 75.94,
1547
-
1548
  "Relationship-Overall": 76.78,
1549
  "Composition": 71.28,
1550
  "Similarity": 72.78,
1551
  "Inclusion": 85.87,
1552
  "Comparison": 82.03,
1553
-
1554
  "Compound-Overall": 64.77,
1555
  "Imagination": 74.23,
1556
- "Feature matching": 55.00,
1557
-
1558
  "Grammar-Overall": 70.59,
1559
  "Pronoun Reference": 77.21,
1560
  "Consistency": 63.43,
1561
  "Negation": 69.62,
1562
-
1563
  "Layout-Overall": 71.83,
1564
  "2D": 73.16,
1565
  "3D": 70.45,
1566
-
1567
  "Logical Reasoning": 51.82,
1568
-
1569
  "Text": 11.92
1570
- },
1571
- {
1572
  "model": "FLUX.1-dev",
1573
  "link": "https://bfl.ai/blog/24-08-01-bfl",
1574
  "hf": "https://huggingface.co/black-forest-labs/FLUX.1-dev",
1575
  "open_source": true,
1576
  "release_date": "2024-08",
1577
-
1578
  "Overall": 2.74,
1579
- "Style": 10.50,
1580
  "World Knowledge": 0.63,
1581
-
1582
  "Attribute-Overall": 0.43,
1583
- "Quantity": 0.00,
1584
  "Expression": 1.92,
1585
  "Material": 0.47,
1586
- "Size": 0.00,
1587
- "Shape": 0.00,
1588
- "Color": 0.00,
1589
-
1590
  "Action-Overall": 0.95,
1591
  "Hand": 1.92,
1592
  "Full body": 0.54,
1593
  "Animal": 0.74,
1594
- "Non Contact": 0.00,
1595
- "Contact": 0.00,
1596
  "State": 2.36,
1597
-
1598
  "Relationship-Overall": 0.13,
1599
  "Composition": 0.34,
1600
- "Similarity": 0.00,
1601
- "Inclusion": 0.00,
1602
- "Comparison": 0.00,
1603
-
1604
  "Compound-Overall": 0.13,
1605
  "Imagination": 0.26,
1606
- "Feature matching": 0.00,
1607
-
1608
  "Grammar-Overall": 7.75,
1609
  "Pronoun Reference": 0.74,
1610
  "Consistency": 1.39,
1611
  "Negation": 20.38,
1612
-
1613
  "Layout-Overall": 0.19,
1614
- "2D": 0.00,
1615
  "3D": 0.38,
1616
-
1617
- "Logical Reasoning": 0.00,
1618
-
1619
- "Text": 0.00
1620
  },
1621
- {
1622
  "model": "Nano Banana",
1623
  "link": "https://ainanobanana.io/",
1624
  "hf": "-",
1625
  "open_source": false,
1626
  "release_date": "2025-08",
1627
-
1628
- "Overall": 80.91,
1629
-
1630
- "Style": 99.27,
1631
-
1632
- "World Knowledge": 96.47,
1633
-
1634
- "Attribute-Overall": 87.76,
1635
- "Quantity": 81.62,
1636
- "Expression": 80.79,
1637
- "Material": 89.66,
1638
  "Size": 95.74,
1639
- "Shape": 82.05,
1640
  "Color": 98.33,
1641
-
1642
- "Action-Overall": 86.99,
1643
- "Hand": 86.54,
1644
- "Full body": 91.38,
1645
- "Animal": 90.44,
1646
- "Non Contact": 81.96,
1647
- "Contact": 81.44,
1648
- "State": 90.64,
1649
-
1650
- "Relationship-Overall": 91.39,
1651
- "Composition": 92.33,
1652
- "Similarity": 83.89,
1653
- "Inclusion": 93.44,
1654
- "Comparison": 96.88,
1655
-
1656
- "Compound-Overall": 86.89,
1657
- "Imagination": 90.40,
1658
- "Feature matching": 83.42,
1659
-
1660
- "Grammar-Overall": 83.33,
1661
- "Pronoun Reference": 87.27,
1662
- "Consistency": 84.69,
1663
- "Negation": 78.12,
1664
-
1665
- "Layout-Overall": 88.80,
1666
- "2D": 91.82,
1667
- "3D": 85.66,
1668
-
1669
- "Logical Reasoning": 76.10,
1670
-
1671
- "Text": 12.06
1672
- },
1673
- {
1674
  "model": "Hunyuan-DiT",
1675
  "link": "https://arxiv.org/pdf/2405.08748",
1676
  "hf": "https://huggingface.co/Tencent-Hunyuan/HunyuanDiT",
1677
  "open_source": true,
1678
  "release_date": "2024-05",
1679
-
1680
  "Overall": 53.36,
1681
-
1682
- "Style": 92.50,
1683
-
1684
  "World Knowledge": 84.97,
1685
-
1686
  "Attribute-Overall": 62.93,
1687
  "Quantity": 63.19,
1688
  "Expression": 46.15,
1689
  "Material": 72.17,
1690
  "Size": 63.89,
1691
  "Shape": 49.38,
1692
- "Color": 85.00,
1693
-
1694
  "Action-Overall": 57.22,
1695
  "Hand": 45.51,
1696
  "Full body": 67.93,
@@ -1698,41 +1376,33 @@
1698
  "Non Contact": 48.47,
1699
  "Contact": 47.02,
1700
  "State": 69.81,
1701
-
1702
  "Relationship-Overall": 59.39,
1703
  "Composition": 65.88,
1704
  "Similarity": 64.44,
1705
  "Inclusion": 56.52,
1706
  "Comparison": 41.41,
1707
-
1708
  "Compound-Overall": 44.59,
1709
  "Imagination": 52.04,
1710
  "Feature matching": 36.98,
1711
-
1712
  "Grammar-Overall": 54.68,
1713
  "Pronoun Reference": 59.93,
1714
  "Consistency": 62.04,
1715
  "Negation": 43.08,
1716
-
1717
  "Layout-Overall": 47.76,
1718
  "2D": 39.71,
1719
  "3D": 56.06,
1720
-
1721
  "Logical Reasoning": 29.55,
1722
-
1723
- "Text": 0.00
1724
- },
1725
- {
1726
  "model": "Recraft",
1727
  "link": "https://www.recraft.ai/docs#generate-image",
1728
  "hf": "-",
1729
  "open_source": false,
1730
  "release_date": "2024-12",
1731
-
1732
  "Overall": 57.67,
1733
- "Style": 87.70,
1734
  "World Knowledge": 90.03,
1735
-
1736
  "Attribute-Overall": 69.34,
1737
  "Quantity": 66.67,
1738
  "Expression": 59.62,
@@ -1740,7 +1410,6 @@
1740
  "Size": 73.61,
1741
  "Shape": 61.25,
1742
  "Color": 95.83,
1743
-
1744
  "Action-Overall": 63.88,
1745
  "Hand": 50.64,
1746
  "Full body": 72.28,
@@ -1748,95 +1417,74 @@
1748
  "Non Contact": 63.78,
1749
  "Contact": 45.24,
1750
  "State": 72.17,
1751
-
1752
  "Relationship-Overall": 64.47,
1753
  "Composition": 65.54,
1754
  "Similarity": 58.89,
1755
  "Inclusion": 65.22,
1756
  "Comparison": 68.75,
1757
-
1758
  "Compound-Overall": 43.94,
1759
  "Imagination": 45.92,
1760
  "Feature matching": 41.93,
1761
-
1762
  "Grammar-Overall": 60.56,
1763
  "Pronoun Reference": 62.87,
1764
  "Consistency": 59.26,
1765
  "Negation": 59.23,
1766
-
1767
- "Layout-Overall": 58.40,
1768
  "2D": 55.15,
1769
  "3D": 61.74,
1770
-
1771
  "Logical Reasoning": 34.09,
1772
-
1773
  "Text": 4.31
1774
- },
1775
- {
1776
  "model": "Imagen-3.0-generate-002",
1777
  "link": "https://arxiv.org/pdf/2408.07009",
1778
  "hf": "-",
1779
  "open_source": false,
1780
  "release_date": "2025-02",
1781
-
1782
- "Overall": 13.62,
1783
-
1784
- "Style": 8.30,
1785
-
1786
- "World Knowledge": 0.00,
1787
-
1788
- "Attribute-Overall": 0.00,
1789
- "Quantity": 0.00,
1790
- "Expression": 0.00,
1791
- "Material": 0.00,
1792
- "Size": 0.00,
1793
- "Shape": 0.00,
1794
- "Color": 0.00,
1795
-
1796
  "Action-Overall": 0.38,
1797
- "Hand": 0.00,
1798
  "Full body": 0.54,
1799
- "Animal": 0.00,
1800
- "Non Contact": 0.00,
1801
- "Contact": 0.00,
1802
  "State": 1.42,
1803
-
1804
  "Relationship-Overall": 0.13,
1805
  "Composition": 0.34,
1806
- "Similarity": 0.00,
1807
- "Inclusion": 0.00,
1808
- "Comparison": 0.00,
1809
-
1810
- "Compound-Overall": 0.00,
1811
- "Imagination": 0.00,
1812
- "Feature matching": 0.00,
1813
-
1814
  "Grammar-Overall": 4.81,
1815
- "Pronoun Reference": 0.00,
1816
  "Consistency": 0.46,
1817
  "Negation": 13.46,
1818
-
1819
- "Layout-Overall": 0.00,
1820
- "2D": 0.00,
1821
- "3D": 0.00,
1822
-
1823
- "Logical Reasoning": 0.00,
1824
-
1825
- "Text": 0.00
1826
- },
1827
- {
1828
  "model": "Imagen-4.0-Fast-preview-06-06",
1829
  "link": "https://deepmind.google/models/imagen/",
1830
  "hf": "-",
1831
  "open_source": false,
1832
  "release_date": "2025-06",
1833
-
1834
- "Overall": 71.60,
1835
-
1836
- "Style": 93.30,
1837
-
1838
- "World Knowledge": 91.30,
1839
-
1840
  "Attribute-Overall": 80.98,
1841
  "Quantity": 76.39,
1842
  "Expression": 66.03,
@@ -1844,49 +1492,40 @@
1844
  "Size": 88.19,
1845
  "Shape": 78.75,
1846
  "Color": 95.83,
1847
-
1848
  "Action-Overall": 79.28,
1849
  "Hand": 74.36,
1850
  "Full body": 79.35,
1851
  "Animal": 83.82,
1852
  "Non Contact": 73.47,
1853
- "Contact": 75.60,
1854
  "State": 88.21,
1855
-
1856
  "Relationship-Overall": 82.49,
1857
  "Composition": 82.09,
1858
  "Similarity": 78.33,
1859
  "Inclusion": 88.04,
1860
  "Comparison": 81.25,
1861
-
1862
  "Compound-Overall": 73.97,
1863
  "Imagination": 83.67,
1864
  "Feature matching": 64.06,
1865
-
1866
  "Grammar-Overall": 77.41,
1867
  "Pronoun Reference": 83.82,
1868
  "Consistency": 78.24,
1869
- "Negation": 70.00,
1870
-
1871
  "Layout-Overall": 78.73,
1872
  "2D": 80.51,
1873
  "3D": 76.89,
1874
-
1875
  "Logical Reasoning": 54.77,
1876
-
1877
  "Text": 3.74
1878
  },
1879
- {
1880
  "model": "FLUX-kontext-max",
1881
  "link": "https://bfl.ai/models/flux-kontext",
1882
  "hf": "-",
1883
  "open_source": false,
1884
  "release_date": "2025-05",
1885
-
1886
  "Overall": 71.85,
1887
  "Style": 96.38,
1888
  "World Knowledge": 92.83,
1889
-
1890
  "Attribute-Overall": 76.41,
1891
  "Quantity": 65.97,
1892
  "Expression": 69.44,
@@ -1894,141 +1533,112 @@
1894
  "Size": 84.72,
1895
  "Shape": 66.67,
1896
  "Color": 93.33,
1897
-
1898
  "Action-Overall": 78.59,
1899
  "Hand": 76.32,
1900
  "Full body": 83.15,
1901
  "Animal": 83.33,
1902
- "Non Contact": 69.90,
1903
  "Contact": 73.17,
1904
  "State": 85.78,
1905
-
1906
  "Relationship-Overall": 83.97,
1907
  "Composition": 85.14,
1908
  "Similarity": 74.43,
1909
  "Inclusion": 91.67,
1910
  "Comparison": 83.59,
1911
-
1912
  "Compound-Overall": 75.13,
1913
  "Imagination": 82.65,
1914
  "Feature matching": 67.12,
1915
-
1916
  "Grammar-Overall": 75.68,
1917
  "Pronoun Reference": 79.85,
1918
  "Consistency": 75.46,
1919
  "Negation": 71.48,
1920
-
1921
  "Layout-Overall": 81.34,
1922
  "2D": 81.62,
1923
  "3D": 81.06,
1924
-
1925
  "Logical Reasoning": 56.48,
1926
-
1927
  "Text": 1.72
1928
- },
1929
- {
1930
  "model": "Seedream-3.0",
1931
  "link": "https://www.byteplus.com/en/product/Seedream",
1932
  "hf": "-",
1933
  "open_source": false,
1934
  "release_date": "2025-06",
1935
-
1936
  "Overall": 81.68,
1937
-
1938
- "Style": 97.50,
1939
-
1940
  "World Knowledge": 93.99,
1941
-
1942
  "Attribute-Overall": 88.03,
1943
  "Quantity": 84.03,
1944
  "Expression": 82.69,
1945
  "Material": 94.34,
1946
  "Size": 89.58,
1947
- "Shape": 80.00,
1948
- "Color": 97.50,
1949
-
1950
  "Action-Overall": 86.98,
1951
  "Hand": 85.26,
1952
  "Full body": 90.76,
1953
  "Animal": 89.71,
1954
- "Non Contact": 85.20,
1955
  "Contact": 80.36,
1956
  "State": 90.09,
1957
-
1958
  "Relationship-Overall": 84.39,
1959
  "Composition": 86.82,
1960
  "Similarity": 74.44,
1961
  "Inclusion": 90.22,
1962
  "Comparison": 84.38,
1963
-
1964
  "Compound-Overall": 76.68,
1965
  "Imagination": 82.14,
1966
  "Feature matching": 71.09,
1967
-
1968
  "Grammar-Overall": 67.25,
1969
  "Pronoun Reference": 84.19,
1970
  "Consistency": 79.17,
1971
  "Negation": 39.62,
1972
-
1973
  "Layout-Overall": 84.14,
1974
  "2D": 89.34,
1975
  "3D": 78.79,
1976
-
1977
  "Logical Reasoning": 59.09,
1978
-
1979
  "Text": 78.74
1980
- },
1981
- {
1982
  "model": "Imagen-4.0-Ultra-preview-06-06",
1983
  "link": "https://deepmind.google/models/imagen/",
1984
  "hf": "-",
1985
  "open_source": false,
1986
  "release_date": "2025-06",
1987
-
1988
- "Overall": 83.21,
1989
-
1990
- "Style": 98.90,
1991
-
1992
- "World Knowledge": 97.94,
1993
-
1994
- "Attribute-Overall": 90.71,
1995
- "Quantity": 88.89,
1996
- "Expression": 79.49,
1997
- "Material": 94.81,
1998
- "Size": 93.75,
1999
- "Shape": 88.12,
2000
  "Color": 100.0,
2001
-
2002
- "Action-Overall": 93.82,
2003
  "Hand": 94.87,
2004
- "Full body": 92.93,
2005
- "Animal": 95.59,
2006
- "Non Contact": 87.76,
2007
- "Contact": 95.24,
2008
- "State": 97.17,
2009
-
2010
- "Relationship-Overall": 92.13,
2011
- "Composition": 91.22,
2012
  "Similarity": 87.22,
2013
- "Inclusion": 97.83,
2014
- "Comparison": 92.97,
2015
-
2016
  "Compound-Overall": 89.95,
2017
- "Imagination": 94.90,
2018
- "Feature matching": 84.90,
2019
-
2020
- "Grammar-Overall": 87.43,
2021
- "Pronoun Reference": 93.01,
2022
- "Consistency": 85.65,
2023
- "Negation": 83.08,
2024
-
2025
- "Layout-Overall": 92.16,
2026
- "2D": 93.75,
2027
- "3D": 90.53,
2028
-
2029
- "Logical Reasoning": 79.32,
2030
-
2031
- "Text": 9.77
2032
  },
2033
  {
2034
  "model": "GPT-4o",
@@ -2036,13 +1646,9 @@
2036
  "hf": "-",
2037
  "open_source": false,
2038
  "release_date": "2025-03",
2039
-
2040
  "Overall": 91.02,
2041
-
2042
  "Style": 99.39,
2043
-
2044
  "World Knowledge": 98.72,
2045
-
2046
  "Attribute-Overall": 94.99,
2047
  "Quantity": 93.62,
2048
  "Expression": 94.59,
@@ -2050,7 +1656,6 @@
2050
  "Size": 93.06,
2051
  "Shape": 92.95,
2052
  "Color": 100.0,
2053
-
2054
  "Action-Overall": 92.34,
2055
  "Hand": 94.08,
2056
  "Full body": 97.28,
@@ -2058,29 +1663,23 @@
2058
  "Non Contact": 90.31,
2059
  "Contact": 88.34,
2060
  "State": 92.65,
2061
-
2062
  "Relationship-Overall": 95.77,
2063
- "Composition": 97.30,
2064
  "Similarity": 93.18,
2065
  "Inclusion": 96.69,
2066
  "Comparison": 94.53,
2067
-
2068
  "Compound-Overall": 93.91,
2069
  "Imagination": 95.92,
2070
  "Feature matching": 91.74,
2071
-
2072
  "Grammar-Overall": 91.02,
2073
  "Pronoun Reference": 95.15,
2074
  "Consistency": 89.35,
2075
  "Negation": 88.05,
2076
-
2077
  "Layout-Overall": 89.27,
2078
  "2D": 89.18,
2079
  "3D": 89.35,
2080
-
2081
  "Logical Reasoning": 91.44,
2082
-
2083
  "Text": 63.37
2084
  }
2085
  ]
2086
- }
 
6
  "hf": "https://huggingface.co/Tongyi-MAI/Z-Image-Turbo",
7
  "open_source": true,
8
  "release_date": "2025-11",
 
9
  "Overall": 74.18,
10
+ "Style": 91.7,
11
  "World Knowledge": 90.98,
 
12
  "Attribute-Overall": 76.92,
13
  "Quantity": 75.69,
14
  "Expression": 66.03,
15
  "Material": 88.21,
16
  "Size": 77.78,
17
+ "Shape": 60.0,
18
  "Color": 94.17,
 
19
  "Action-Overall": 74.71,
20
  "Hand": 71.15,
21
  "Full body": 79.89,
 
23
  "Non Contact": 69.39,
24
  "Contact": 72.02,
25
  "State": 76.42,
 
26
  "Relationship-Overall": 72.08,
27
+ "Composition": 75.0,
28
  "Similarity": 61.11,
29
  "Inclusion": 77.17,
30
  "Comparison": 73.44,
 
31
  "Compound-Overall": 65.85,
32
  "Imagination": 69.39,
33
+ "Feature matching": 62.24,
 
34
  "Grammar-Overall": 65.51,
35
  "Pronoun Reference": 79.04,
36
  "Consistency": 64.35,
37
  "Negation": 52.31,
 
38
  "Layout-Overall": 80.97,
39
  "2D": 82.72,
40
  "3D": 79.17,
 
41
  "Logical Reasoning": 50.69,
 
42
  "Text": 72.41
43
  },
44
  {
 
47
  "hf": "https://huggingface.co/black-forest-labs/FLUX.2-dev",
48
  "open_source": true,
49
  "release_date": "2025-11",
 
50
  "Overall": 81.44,
51
+ "Style": 95.7,
52
+ "World Knowledge": 93.2,
 
53
  "Attribute-Overall": 90.49,
54
  "Quantity": 86.81,
55
  "Expression": 83.97,
 
57
  "Size": 89.58,
58
  "Shape": 86.25,
59
  "Color": 100.0,
 
60
  "Action-Overall": 87.55,
61
  "Hand": 87.18,
62
+ "Full body": 91.3,
63
+ "Animal": 87.5,
64
  "Non Contact": 82.14,
65
+ "Contact": 86.9,
66
  "State": 90.09,
 
67
  "Relationship-Overall": 89.34,
68
  "Composition": 94.26,
69
  "Similarity": 82.78,
70
  "Inclusion": 93.48,
71
  "Comparison": 81.25,
 
72
  "Compound-Overall": 84.02,
73
  "Imagination": 86.73,
74
  "Feature matching": 81.25,
75
+ "Grammar-Overall": 76.2,
 
76
  "Pronoun Reference": 90.81,
77
  "Consistency": 82.41,
78
  "Negation": 55.77,
 
79
  "Layout-Overall": 90.49,
80
  "2D": 91.54,
81
  "3D": 89.39,
 
82
  "Logical Reasoning": 68.35,
 
83
  "Text": 39.08
84
  },
85
  {
 
88
  "hf": "-",
89
  "open_source": false,
90
  "release_date": "2025-11",
 
91
  "Overall": 93.82,
92
+ "Style": 99.5,
 
 
93
  "World Knowledge": 97.47,
 
94
  "Attribute-Overall": 94.55,
95
  "Quantity": 90.97,
96
  "Expression": 96.15,
 
98
  "Size": 95.14,
99
  "Shape": 91.25,
100
  "Color": 98.33,
 
101
  "Action-Overall": 94.96,
102
  "Hand": 94.23,
103
  "Full body": 94.57,
104
  "Animal": 97.06,
105
  "Non Contact": 92.35,
106
  "Contact": 95.24,
107
+ "State": 96.7,
 
108
  "Relationship-Overall": 96.07,
109
  "Composition": 96.96,
110
  "Similarity": 91.67,
111
  "Inclusion": 97.83,
112
  "Comparison": 97.66,
113
+ "Compound-Overall": 94.2,
 
114
  "Imagination": 96.68,
115
  "Feature matching": 91.67,
 
116
  "Grammar-Overall": 89.04,
117
  "Pronoun Reference": 94.49,
118
  "Consistency": 90.74,
119
  "Negation": 81.92,
120
+ "Layout-Overall": 94.4,
 
121
  "2D": 96.32,
122
  "3D": 92.42,
 
123
  "Logical Reasoning": 82.34,
 
124
  "Text": 95.69
125
+ },
126
  {
127
  "model": "wan2.5-t2i-preview",
128
  "link": "https://help.aliyun.com/zh/model-studio/text-to-image-v2-api-reference",
129
  "hf": "-",
130
  "open_source": false,
131
  "release_date": "2025-09",
132
+ "Overall": 78.86,
133
+ "Style": 93.8,
134
+ "World Knowledge": 93.04,
135
+ "Attribute-Overall": 83.97,
136
+ "Quantity": 79.86,
 
 
137
  "Expression": 75.64,
138
+ "Material": 91.04,
139
  "Size": 84.72,
140
+ "Shape": 75.62,
141
+ "Color": 97.5,
142
+ "Action-Overall": 76.33,
143
+ "Hand": 72.44,
144
+ "Full body": 76.09,
 
145
  "Animal": 81.62,
146
+ "Non Contact": 72.45,
147
+ "Contact": 75.0,
148
+ "State": 80.66,
149
+ "Relationship-Overall": 84.14,
150
+ "Composition": 83.78,
151
+ "Similarity": 75.56,
152
+ "Inclusion": 88.59,
153
+ "Comparison": 90.62,
 
 
154
  "Compound-Overall": 78.74,
155
+ "Imagination": 84.69,
156
+ "Feature matching": 72.66,
157
+ "Grammar-Overall": 72.45,
158
+ "Pronoun Reference": 83.09,
159
+ "Consistency": 68.52,
160
+ "Negation": 64.45,
161
+ "Layout-Overall": 76.12,
162
+ "2D": 77.94,
163
+ "3D": 74.24,
164
+ "Logical Reasoning": 63.99,
165
+ "Text": 65.98
 
 
 
 
166
  },
167
  {
168
  "model": "Echo-4o",
 
170
  "hf": "https://huggingface.co/Yejy53/Echo-4o",
171
  "open_source": true,
172
  "release_date": "2025-8",
173
+ "Overall": 72.4,
174
+ "Style": 92.8,
 
175
  "World Knowledge": 87.66,
 
176
  "Attribute-Overall": 84.29,
177
  "Quantity": 72.92,
178
  "Expression": 77.56,
179
  "Material": 89.15,
180
  "Size": 88.19,
181
+ "Shape": 80.0,
182
  "Color": 99.17,
 
183
  "Action-Overall": 76.05,
184
  "Hand": 73.08,
185
  "Full body": 83.15,
186
  "Animal": 85.29,
187
+ "Non Contact": 75.0,
188
  "Contact": 65.48,
189
  "State": 75.47,
 
190
  "Relationship-Overall": 82.23,
191
  "Composition": 85.81,
192
+ "Similarity": 75.0,
193
  "Inclusion": 88.04,
194
  "Comparison": 75.78,
 
195
  "Compound-Overall": 77.96,
196
  "Imagination": 82.91,
197
  "Feature matching": 72.92,
198
+ "Grammar-Overall": 75.4,
 
199
  "Pronoun Reference": 80.15,
200
  "Consistency": 77.31,
201
  "Negation": 68.85,
 
202
  "Layout-Overall": 83.02,
203
  "2D": 84.19,
204
  "3D": 81.82,
 
205
  "Logical Reasoning": 56.82,
 
206
  "Text": 7.76
207
  },
208
  {
 
211
  "hf": "https://huggingface.co/BAAI/Emu3-Gen",
212
  "open_source": true,
213
  "release_date": "2024-09",
 
214
  "Overall": 33.91,
 
215
  "Style": 78.08,
 
216
  "World Knowledge": 55.54,
 
217
  "Attribute-Overall": 38.29,
218
  "Quantity": 27.78,
219
  "Expression": 30.13,
 
221
  "Size": 32.64,
222
  "Shape": 27.67,
223
  "Color": 71.67,
 
224
  "Action-Overall": 31.18,
225
  "Hand": 16.67,
226
  "Full body": 36.96,
 
228
  "Non Contact": 26.02,
229
  "Contact": 17.86,
230
  "State": 40.57,
 
231
  "Relationship-Overall": 36.68,
232
  "Composition": 43.58,
233
  "Similarity": 31.67,
234
  "Inclusion": 38.04,
235
  "Comparison": 25.78,
 
236
  "Compound-Overall": 21.65,
237
  "Imagination": 29.85,
238
  "Feature matching": 13.28,
 
239
  "Grammar-Overall": 41.31,
240
  "Pronoun Reference": 41.91,
241
  "Consistency": 38.89,
242
  "Negation": 42.69,
 
243
  "Layout-Overall": 22.43,
244
  "2D": 17.71,
245
  "3D": 27.27,
246
+ "Logical Reasoning": 13.9,
247
+ "Text": 0.0
 
 
248
  },
249
  {
250
  "model": "UniWorld-V1",
 
252
  "hf": "https://huggingface.co/LanguageBind/UniWorld-V1",
253
  "open_source": true,
254
  "release_date": "2025-06",
 
255
  "Overall": 15.21,
256
+ "Style": 49.4,
257
  "World Knowledge": 16.61,
 
258
  "Attribute-Overall": 15.06,
259
  "Quantity": 14.58,
260
  "Expression": 19.87,
261
  "Material": 8.02,
262
  "Size": 13.19,
263
+ "Shape": 5.0,
264
+ "Color": 37.5,
 
265
  "Action-Overall": 14.64,
266
  "Hand": 9.62,
267
  "Full body": 17.93,
 
269
  "Non Contact": 9.69,
270
  "Contact": 6.55,
271
  "State": 24.06,
272
+ "Relationship-Overall": 11.8,
 
273
  "Composition": 16.55,
274
  "Similarity": 6.67,
275
+ "Inclusion": 12.5,
276
  "Comparison": 7.03,
 
277
  "Compound-Overall": 4.38,
278
  "Imagination": 6.63,
279
  "Feature matching": 2.08,
 
280
  "Grammar-Overall": 27.81,
281
  "Pronoun Reference": 19.85,
282
+ "Consistency": 16.2,
283
  "Negation": 45.77,
 
284
  "Layout-Overall": 9.14,
285
  "2D": 8.09,
286
  "3D": 10.23,
 
287
  "Logical Reasoning": 2.95,
 
288
  "Text": 0.29
289
  },
290
  {
 
293
  "hf": "https://huggingface.co/Alpha-VLLM/Lumina-DiMOO",
294
  "open_source": true,
295
  "release_date": "2025-09",
 
296
  "Overall": 58.35,
297
+ "Style": 80.9,
298
  "World Knowledge": 69.46,
 
299
  "Attribute-Overall": 75.64,
300
+ "Quantity": 62.5,
301
  "Expression": 71.79,
302
  "Material": 77.83,
303
  "Size": 78.47,
304
+ "Shape": 70.0,
305
  "Color": 96.67,
 
306
  "Action-Overall": 61.12,
307
  "Hand": 42.95,
308
  "Full body": 61.41,
 
310
  "Non Contact": 58.67,
311
  "Contact": 51.79,
312
  "State": 74.06,
 
313
  "Relationship-Overall": 67.13,
314
  "Composition": 68.58,
315
  "Similarity": 62.78,
316
  "Inclusion": 76.09,
317
  "Comparison": 57.03,
 
318
  "Compound-Overall": 56.06,
319
+ "Imagination": 59.69,
320
  "Feature matching": 52.34,
 
321
  "Grammar-Overall": 64.84,
322
+ "Pronoun Reference": 76.1,
323
  "Consistency": 70.37,
324
  "Negation": 48.46,
 
325
  "Layout-Overall": 69.22,
326
  "2D": 73.53,
327
  "3D": 64.77,
 
328
  "Logical Reasoning": 39.09,
329
+ "Text": 0.0
 
330
  },
331
  {
332
  "model": "MMaDA",
 
334
  "hf": "https://huggingface.co/Gen-Verse/MMaDA-8B-MixCoT",
335
  "open_source": true,
336
  "release_date": "2025-05",
337
+ "Overall": 44.0,
338
+ "Style": 78.2,
 
339
  "World Knowledge": 52.06,
 
340
  "Attribute-Overall": 55.24,
341
  "Quantity": 52.78,
342
  "Expression": 33.97,
343
  "Material": 58.49,
344
  "Size": 61.11,
345
+ "Shape": 45.0,
346
  "Color": 86.67,
 
347
  "Action-Overall": 43.44,
348
  "Hand": 24.36,
349
  "Full body": 54.35,
 
351
  "Non Contact": 31.63,
352
  "Contact": 29.17,
353
  "State": 67.92,
 
354
  "Relationship-Overall": 56.22,
355
+ "Composition": 59.8,
356
  "Similarity": 52.22,
357
  "Inclusion": 60.87,
358
  "Comparison": 46.88,
 
359
  "Compound-Overall": 32.86,
360
  "Imagination": 39.29,
361
+ "Feature matching": 26.3,
 
362
  "Grammar-Overall": 58.56,
363
  "Pronoun Reference": 59.93,
364
+ "Consistency": 46.3,
365
  "Negation": 67.31,
 
366
  "Layout-Overall": 37.31,
367
  "2D": 38.97,
368
  "3D": 35.61,
 
369
  "Logical Reasoning": 26.14,
370
+ "Text": 0.0
 
371
  },
372
  {
373
  "model": "OmniGen2",
 
375
  "hf": "https://huggingface.co/OmniGen2/OmniGen2",
376
  "open_source": true,
377
  "release_date": "2025-06",
378
+ "Overall": 63.2,
379
+ "Style": 93.0,
 
380
  "World Knowledge": 86.39,
 
381
  "Attribute-Overall": 75.43,
382
  "Quantity": 67.36,
383
  "Expression": 69.87,
384
+ "Material": 78.3,
385
  "Size": 77.78,
386
  "Shape": 68.75,
387
  "Color": 93.33,
 
388
  "Action-Overall": 66.54,
389
+ "Hand": 64.1,
390
  "Full body": 69.57,
391
  "Animal": 74.26,
392
  "Non Contact": 61.73,
393
  "Contact": 55.95,
394
  "State": 73.58,
 
395
  "Relationship-Overall": 70.69,
396
  "Composition": 77.03,
397
  "Similarity": 66.67,
398
  "Inclusion": 71.74,
399
  "Comparison": 60.16,
 
400
  "Compound-Overall": 59.92,
401
  "Imagination": 66.33,
402
  "Feature matching": 53.39,
 
403
  "Grammar-Overall": 65.64,
404
  "Pronoun Reference": 71.69,
405
+ "Consistency": 71.3,
406
  "Negation": 54.62,
 
407
  "Layout-Overall": 69.96,
408
  "2D": 76.84,
409
  "3D": 62.88,
 
410
  "Logical Reasoning": 44.09,
 
411
  "Text": 0.29
412
  },
413
  {
 
416
  "hf": "https://huggingface.co/onecat-ai/OneCAT-3B",
417
  "open_source": true,
418
  "release_date": "2025-09",
419
+ "Overall": 56.77,
420
+ "Style": 94.9,
421
+ "World Knowledge": 87.34,
422
+ "Attribute-Overall": 64.32,
423
+ "Quantity": 62.5,
424
+ "Expression": 71.79,
425
+ "Material": 68.4,
426
+ "Size": 63.89,
427
+ "Shape": 36.88,
428
+ "Color": 86.67,
429
+ "Action-Overall": 57.13,
430
+ "Hand": 37.18,
431
+ "Full body": 69.02,
432
+ "Animal": 76.47,
433
+ "Non Contact": 57.14,
434
+ "Contact": 39.29,
435
+ "State": 63.21,
436
+ "Relationship-Overall": 61.8,
437
+ "Composition": 68.58,
438
+ "Similarity": 57.78,
439
+ "Inclusion": 60.33,
440
+ "Comparison": 53.91,
441
+ "Compound-Overall": 46.78,
442
+ "Imagination": 58.16,
443
+ "Feature matching": 35.16,
444
+ "Grammar-Overall": 60.83,
445
+ "Pronoun Reference": 66.91,
446
+ "Consistency": 62.5,
447
+ "Negation": 53.08,
448
+ "Layout-Overall": 60.26,
449
+ "2D": 63.24,
450
+ "3D": 57.2,
451
+ "Logical Reasoning": 34.32,
452
+ "Text": 0.0
 
 
 
 
 
 
 
 
 
 
453
  },
454
+ {
455
  "model": "X-Omni",
456
  "link": "https://arxiv.org/pdf/2507.22058",
457
  "hf": "https://huggingface.co/X-Omni/X-Omni-Zh",
458
  "open_source": true,
459
  "release_date": "2025-08",
 
460
  "Overall": 53.69,
461
  "Style": 70.07,
462
  "World Knowledge": 71.52,
 
463
  "Attribute-Overall": 63.85,
464
  "Quantity": 61.81,
465
  "Expression": 52.56,
466
  "Material": 63.51,
467
  "Size": 67.36,
468
+ "Shape": 57.5,
469
  "Color": 85.83,
 
470
  "Action-Overall": 58.37,
471
  "Hand": 48.72,
472
  "Full body": 68.48,
473
  "Animal": 63.97,
474
+ "Non Contact": 56.63,
475
  "Contact": 43.45,
476
  "State": 66.51,
 
477
  "Relationship-Overall": 59.77,
478
  "Composition": 60.14,
479
+ "Similarity": 60.0,
480
+ "Inclusion": 62.5,
481
  "Comparison": 54.69,
 
482
  "Compound-Overall": 41.75,
483
  "Imagination": 48.72,
484
  "Feature matching": 34.64,
 
485
  "Grammar-Overall": 56.28,
486
  "Pronoun Reference": 63.97,
487
+ "Consistency": 53.7,
488
  "Negation": 50.38,
 
489
  "Layout-Overall": 59.51,
490
  "2D": 66.91,
491
  "3D": 51.89,
 
492
  "Logical Reasoning": 34.77,
 
493
  "Text": 20.98
494
  },
495
  {
 
498
  "hf": "https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT",
499
  "open_source": true,
500
  "release_date": "2025-05",
 
501
  "Overall": 65.69,
502
+ "Style": 92.3,
503
  "World Knowledge": 86.71,
 
504
  "Attribute-Overall": 75.21,
505
  "Quantity": 64.58,
506
  "Expression": 63.46,
507
  "Material": 83.49,
508
  "Size": 79.86,
509
  "Shape": 66.25,
510
+ "Color": 95.0,
 
511
  "Action-Overall": 65.78,
512
  "Hand": 61.54,
513
  "Full body": 63.59,
514
  "Animal": 75.74,
515
  "Non Contact": 65.31,
516
+ "Contact": 61.9,
517
  "State": 67.92,
 
518
  "Relationship-Overall": 75.38,
519
+ "Composition": 77.7,
520
  "Similarity": 67.78,
521
  "Inclusion": 82.07,
522
  "Comparison": 71.09,
 
523
  "Compound-Overall": 69.85,
524
  "Imagination": 79.59,
525
+ "Feature matching": 59.9,
 
526
  "Grammar-Overall": 69.52,
527
  "Pronoun Reference": 73.16,
528
+ "Consistency": 75.0,
529
  "Negation": 61.15,
 
530
  "Layout-Overall": 77.61,
531
  "2D": 82.72,
532
  "3D": 72.35,
 
533
  "Logical Reasoning": 37.95,
 
534
  "Text": 6.61
535
  },
536
  {
 
539
  "hf": "https://huggingface.co/HiDream-ai/HiDream-I1-Full",
540
  "open_source": true,
541
  "release_date": "2025-05",
 
542
  "Overall": 50.65,
543
+ "Style": 83.3,
544
  "World Knowledge": 78.32,
 
545
  "Attribute-Overall": 62.18,
546
  "Quantity": 69.44,
547
  "Expression": 45.51,
548
  "Material": 55.66,
549
  "Size": 70.14,
550
+ "Shape": 55.0,
551
  "Color": 86.67,
 
552
  "Action-Overall": 53.71,
553
  "Hand": 44.23,
554
  "Full body": 57.61,
 
556
  "Non Contact": 53.06,
557
  "Contact": 47.62,
558
  "State": 61.32,
 
559
  "Relationship-Overall": 57.23,
560
  "Composition": 57.77,
561
  "Similarity": 52.78,
562
  "Inclusion": 63.04,
563
  "Comparison": 53.91,
 
564
  "Compound-Overall": 34.54,
565
  "Imagination": 38.01,
566
  "Feature matching": 30.99,
 
567
  "Grammar-Overall": 53.88,
568
  "Pronoun Reference": 62.13,
569
  "Consistency": 51.85,
570
  "Negation": 46.92,
571
+ "Layout-Overall": 59.7,
572
+ "2D": 63.6,
 
573
  "3D": 55.68,
 
574
  "Logical Reasoning": 23.64,
575
+ "Text": 0.0
 
576
  },
577
  {
578
  "model": "Hunyuan-Image-2.1",
 
580
  "hf": "https://huggingface.co/spaces/tencent/HunyuanImage-2.1",
581
  "open_source": true,
582
  "release_date": "2025-09",
 
583
  "Overall": 77.76,
584
+ "Style": 92.2,
585
  "World Knowledge": 90.51,
586
  "Attribute-Overall": 84.19,
587
+ "Quantity": 87.5,
588
  "Expression": 80.77,
589
  "Material": 82.55,
590
  "Size": 86.11,
591
+ "Shape": 75.0,
592
+ "Color": 97.5,
 
593
  "Action-Overall": 80.51,
594
  "Hand": 76.28,
595
  "Full body": 84.24,
 
597
  "Non Contact": 78.06,
598
  "Contact": 79.17,
599
  "State": 80.66,
 
600
  "Relationship-Overall": 82.74,
601
  "Composition": 80.74,
602
  "Similarity": 80.56,
603
+ "Inclusion": 87.5,
604
  "Comparison": 83.59,
 
605
  "Compound-Overall": 70.62,
606
  "Imagination": 71.68,
607
  "Feature matching": 69.53,
608
+ "Grammar-Overall": 61.5,
 
609
  "Pronoun Reference": 80.15,
610
  "Consistency": 67.13,
611
  "Negation": 37.31,
 
612
  "Layout-Overall": 85.45,
613
  "2D": 88.24,
614
  "3D": 82.58,
 
615
  "Logical Reasoning": 50.23,
616
+ "Text": 79.6
617
+ },
 
618
  {
619
+ "model": "BLIP3-o",
620
  "link": "https://arxiv.org/pdf/2505.09568",
621
  "hf": "https://huggingface.co/BLIP3o/BLIP3o-NEXT-SFT-3B",
622
  "open_source": true,
623
  "release_date": "2025-08",
 
624
  "Overall": 59.25,
625
+ "Style": 92.6,
626
  "World Knowledge": 81.17,
627
  "Attribute-Overall": 66.56,
628
  "Quantity": 57.64,
629
  "Expression": 65.38,
630
  "Material": 67.92,
631
  "Size": 77.08,
632
+ "Shape": 47.5,
633
  "Color": 89.17,
 
634
  "Action-Overall": 64.35,
635
  "Hand": 57.69,
636
  "Full body": 73.37,
 
638
  "Non Contact": 59.18,
639
  "Contact": 55.95,
640
  "State": 70.28,
 
641
  "Relationship-Overall": 65.36,
642
  "Composition": 69.26,
643
  "Similarity": 58.33,
644
  "Inclusion": 63.04,
645
  "Comparison": 69.53,
646
+ "Compound-Overall": 51.8,
 
647
  "Imagination": 61.99,
648
  "Feature matching": 41.41,
 
649
  "Grammar-Overall": 63.37,
650
  "Pronoun Reference": 70.22,
651
  "Consistency": 57.41,
652
  "Negation": 61.16,
 
653
  "Layout-Overall": 65.67,
654
  "2D": 69.12,
655
  "3D": 62.12,
 
656
  "Logical Reasoning": 41.59,
657
+ "Text": 0.0
 
658
  },
659
  {
660
+ "model": "BLIP3-o-Next",
661
  "link": "https://arxiv.org/pdf/2505.09568",
662
  "hf": "https://huggingface.co/BLIP3o/BLIP3o-NEXT-SFT-3B",
663
  "open_source": true,
664
  "release_date": "2025-08",
 
665
  "Overall": 44.48,
666
+ "Style": 74.6,
667
+ "World Knowledge": 50.0,
668
  "Attribute-Overall": 55.98,
669
  "Quantity": 44.44,
670
  "Expression": 57.69,
 
672
  "Size": 63.89,
673
  "Shape": 48.12,
674
  "Color": 68.33,
 
675
  "Action-Overall": 47.62,
676
  "Hand": 37.82,
677
  "Full body": 61.41,
678
  "Animal": 45.59,
679
  "Non Contact": 45.41,
680
+ "Contact": 36.9,
681
  "State": 54.72,
 
682
  "Relationship-Overall": 53.55,
683
  "Composition": 54.05,
684
  "Similarity": 48.33,
685
+ "Inclusion": 50.0,
686
  "Comparison": 64.84,
 
687
  "Compound-Overall": 26.55,
688
  "Imagination": 32.14,
689
  "Feature matching": 20.83,
 
690
  "Grammar-Overall": 54.14,
691
  "Pronoun Reference": 65.07,
692
  "Consistency": 49.54,
693
  "Negation": 46.54,
 
694
  "Layout-Overall": 54.85,
695
  "2D": 58.82,
696
  "3D": 50.76,
697
+ "Logical Reasoning": 27.5,
698
+ "Text": 0.0
 
 
699
  },
700
  {
701
  "model": "Janus-flow",
 
703
  "hf": "https://huggingface.co/deepseek-ai/JanusFlow-1.3B",
704
  "open_source": true,
705
  "release_date": "2024-11",
 
706
  "Overall": 20.93,
707
+ "Style": 58.5,
 
 
708
  "World Knowledge": 18.67,
 
709
  "Attribute-Overall": 19.23,
710
  "Quantity": 22.92,
711
+ "Expression": 10.9,
712
+ "Material": 21.7,
713
  "Size": 24.31,
714
  "Shape": 8.12,
715
+ "Color": 30.0,
 
716
  "Action-Overall": 22.05,
717
  "Hand": 4.49,
718
  "Full body": 31.52,
719
  "Animal": 22.06,
720
+ "Non Contact": 14.8,
721
  "Contact": 19.05,
722
  "State": 35.85,
 
723
  "Relationship-Overall": 19.54,
724
  "Composition": 23.65,
725
  "Similarity": 16.11,
726
  "Inclusion": 20.11,
727
  "Comparison": 14.06,
728
+ "Compound-Overall": 10.7,
 
729
  "Imagination": 19.13,
730
  "Feature matching": 2.08,
 
731
  "Grammar-Overall": 35.03,
732
  "Pronoun Reference": 32.72,
733
  "Consistency": 16.67,
734
  "Negation": 52.69,
 
735
  "Layout-Overall": 14.93,
736
  "2D": 12.13,
737
+ "3D": 17.8,
 
738
  "Logical Reasoning": 10.68,
739
+ "Text": 0.0
 
740
  },
741
  {
742
  "model": "CogView4",
 
744
  "hf": "https://huggingface.co/zai-org/CogView4-6B",
745
  "open_source": true,
746
  "release_date": "2024-03",
 
747
  "Overall": 55.14,
748
+ "Style": 82.4,
749
  "World Knowledge": 84.18,
750
  "Attribute-Overall": 63.35,
751
  "Quantity": 68.75,
752
  "Expression": 44.87,
753
+ "Material": 56.6,
754
  "Size": 72.92,
755
  "Shape": 53.75,
756
  "Color": 94.17,
 
757
  "Action-Overall": 61.69,
758
  "Hand": 61.54,
759
+ "Full body": 66.3,
760
  "Animal": 64.71,
761
  "Non Contact": 52.04,
762
  "Contact": 54.76,
763
  "State": 70.28,
 
764
  "Relationship-Overall": 61.68,
765
  "Composition": 61.82,
766
  "Similarity": 62.22,
767
  "Inclusion": 63.59,
768
  "Comparison": 57.81,
 
769
  "Compound-Overall": 45.75,
770
  "Imagination": 51.02,
771
  "Feature matching": 40.36,
 
772
  "Grammar-Overall": 54.55,
773
  "Pronoun Reference": 67.65,
774
  "Consistency": 57.41,
775
  "Negation": 38.46,
776
+ "Layout-Overall": 65.3,
777
+ "2D": 75.0,
778
+ "3D": 55.3,
 
 
779
  "Logical Reasoning": 30.23,
780
+ "Text": 2.3
 
781
  },
782
  {
783
  "model": "Janus",
 
785
  "hf": "https://huggingface.co/deepseek-ai/Janus-1.3B",
786
  "open_source": true,
787
  "release_date": "2024-10",
 
788
  "Overall": 30.98,
789
+ "Style": 78.1,
 
 
790
  "World Knowledge": 27.85,
 
791
  "Attribute-Overall": 30.88,
792
  "Quantity": 29.17,
793
  "Expression": 17.31,
794
  "Material": 35.85,
795
  "Size": 45.83,
796
  "Shape": 14.37,
797
+ "Color": 45.83,
 
798
  "Action-Overall": 31.37,
799
+ "Hand": 14.1,
800
  "Full body": 38.59,
801
  "Animal": 42.65,
802
  "Non Contact": 24.49,
803
  "Contact": 23.21,
804
+ "State": 43.4,
 
805
  "Relationship-Overall": 30.58,
806
  "Composition": 32.43,
807
  "Similarity": 32.22,
808
  "Inclusion": 27.72,
809
  "Comparison": 28.12,
 
810
  "Compound-Overall": 17.53,
811
  "Imagination": 25.26,
812
  "Feature matching": 9.64,
813
+ "Grammar-Overall": 48.4,
 
814
  "Pronoun Reference": 48.53,
815
  "Consistency": 33.33,
816
  "Negation": 60.77,
 
817
  "Layout-Overall": 31.72,
818
  "2D": 31.25,
819
+ "3D": 32.2,
 
820
  "Logical Reasoning": 13.41,
821
+ "Text": 0.0
 
822
  },
823
  {
824
  "model": "Janus-Pro",
 
826
  "hf": "https://huggingface.co/deepseek-ai/Janus-Pro-7B",
827
  "open_source": true,
828
  "release_date": "2025-01",
 
829
  "Overall": 30.83,
830
+ "Style": 75.6,
831
  "World Knowledge": 39.08,
832
  "Attribute-Overall": 33.12,
833
  "Quantity": 24.31,
 
835
  "Material": 43.87,
836
  "Size": 45.14,
837
  "Shape": 18.75,
838
+ "Color": 47.5,
 
839
  "Action-Overall": 26.33,
840
  "Hand": 13.46,
841
  "Full body": 26.09,
 
843
  "Non Contact": 22.45,
844
  "Contact": 20.83,
845
  "State": 38.68,
 
846
  "Relationship-Overall": 32.74,
847
  "Composition": 38.85,
848
  "Similarity": 35.56,
849
  "Inclusion": 26.09,
850
  "Comparison": 24.22,
 
851
  "Compound-Overall": 24.48,
852
  "Imagination": 33.42,
853
  "Feature matching": 15.36,
 
854
  "Grammar-Overall": 36.63,
855
  "Pronoun Reference": 36.76,
856
  "Consistency": 31.94,
857
  "Negation": 40.38,
 
858
  "Layout-Overall": 30.04,
859
  "2D": 29.78,
860
+ "3D": 30.3,
 
861
  "Logical Reasoning": 10.23,
862
+ "Text": 0.0
 
863
  },
864
+ {
865
  "model": "Kolors",
866
  "link": "https://github.com/Kwai-Kolors/Kolors/blob/master/imgs/Kolors_paper.pdf",
867
  "hf": "https://huggingface.co/Kwai-Kolors/Kolors",
868
  "open_source": true,
869
  "release_date": "2024-7",
870
+ "Overall": 58.8,
871
+ "Style": 85.2,
 
872
  "World Knowledge": 86.23,
 
873
  "Attribute-Overall": 69.34,
874
  "Quantity": 70.14,
875
  "Expression": 51.92,
 
877
  "Size": 77.78,
878
  "Shape": 56.25,
879
  "Color": 91.67,
 
880
  "Action-Overall": 65.02,
881
  "Hand": 58.33,
882
  "Full body": 59.24,
883
  "Animal": 71.32,
884
  "Non Contact": 63.78,
885
+ "Contact": 57.74,
886
  "State": 77.83,
 
887
  "Relationship-Overall": 67.13,
888
  "Composition": 71.96,
889
  "Similarity": 69.44,
890
  "Inclusion": 67.39,
891
  "Comparison": 52.34,
892
+ "Compound-Overall": 55.03,
893
+ "Imagination": 64.8,
 
894
  "Feature matching": 45.05,
 
895
  "Grammar-Overall": 56.68,
896
  "Pronoun Reference": 67.28,
897
  "Consistency": 59.26,
898
  "Negation": 43.46,
 
899
  "Layout-Overall": 62.31,
900
  "2D": 58.82,
901
  "3D": 65.91,
 
902
  "Logical Reasoning": 36.14,
 
903
  "Text": 4.89
904
  },
905
  {
 
908
  "hf": "-",
909
  "open_source": false,
910
  "release_date": "2025-09",
 
911
  "Overall": 87.31,
912
+ "Style": 99.0,
 
 
913
  "World Knowledge": 94.94,
 
914
  "Attribute-Overall": 90.06,
915
  "Quantity": 86.81,
916
+ "Expression": 85.9,
917
  "Material": 97.64,
918
  "Size": 86.81,
919
  "Shape": 83.12,
920
  "Color": 99.17,
 
921
  "Action-Overall": 87.55,
922
  "Hand": 82.69,
923
  "Full body": 90.22,
 
925
  "Non Contact": 84.69,
926
  "Contact": 82.74,
927
  "State": 92.45,
 
928
  "Relationship-Overall": 88.58,
929
  "Composition": 85.14,
930
  "Similarity": 84.44,
931
  "Inclusion": 95.65,
932
  "Comparison": 92.19,
 
933
  "Compound-Overall": 81.57,
934
+ "Imagination": 85.2,
935
  "Feature matching": 77.86,
 
936
  "Grammar-Overall": 78.48,
937
  "Pronoun Reference": 89.71,
938
+ "Consistency": 75.0,
939
  "Negation": 69.62,
940
+ "Layout-Overall": 90.3,
 
941
  "2D": 90.81,
942
  "3D": 89.77,
 
943
  "Logical Reasoning": 68.64,
 
944
  "Text": 93.97
945
+ },
946
+ {
947
  "model": "Imagen-4.0-generate-preview-06-06",
948
  "link": "https://deepmind.google/models/imagen/",
949
  "hf": "-",
950
  "open_source": false,
951
  "release_date": "2025-01",
 
952
  "Overall": 79.52,
953
+ "Style": 97.5,
 
 
954
  "World Knowledge": 96.84,
 
955
  "Attribute-Overall": 86.22,
956
  "Quantity": 83.33,
957
  "Expression": 77.56,
958
  "Material": 92.92,
959
  "Size": 93.75,
960
+ "Shape": 72.5,
961
  "Color": 98.33,
962
+ "Action-Overall": 90.4,
963
+ "Hand": 89.1,
 
964
  "Full body": 89.67,
965
  "Animal": 93.38,
966
  "Non Contact": 86.73,
967
  "Contact": 90.48,
968
+ "State": 93.4,
 
969
  "Relationship-Overall": 90.74,
970
  "Composition": 91.55,
971
  "Similarity": 83.33,
972
  "Inclusion": 94.57,
973
  "Comparison": 93.75,
974
+ "Compound-Overall": 85.7,
975
+ "Imagination": 92.6,
 
976
  "Feature matching": 78.65,
 
977
  "Grammar-Overall": 82.89,
978
  "Pronoun Reference": 92.65,
979
  "Consistency": 82.87,
980
  "Negation": 72.69,
 
981
  "Layout-Overall": 89.18,
982
  "2D": 91.54,
983
  "3D": 86.74,
 
984
  "Logical Reasoning": 73.18,
 
985
  "Text": 2.59
986
  },
987
  {
 
990
  "hf": "-",
991
  "open_source": false,
992
  "release_date": "2024-11",
 
993
  "Overall": 54.93,
 
994
  "Style": 64.75,
 
995
  "World Knowledge": 71.05,
 
996
  "Attribute-Overall": 60.43,
997
  "Quantity": 54.29,
998
  "Expression": 46.05,
999
+ "Material": 72.6,
1000
  "Size": 57.64,
1001
  "Shape": 50.62,
1002
+ "Color": 81.9,
 
1003
  "Action-Overall": 60.42,
1004
  "Hand": 52.63,
1005
  "Full body": 65.22,
1006
+ "Animal": 75.0,
1007
  "Non Contact": 51.56,
1008
  "Contact": 54.37,
1009
  "State": 65.09,
1010
+ "Relationship-Overall": 65.9,
 
1011
  "Composition": 66.89,
1012
  "Similarity": 51.11,
1013
  "Inclusion": 74.43,
1014
  "Comparison": 72.66,
1015
+ "Compound-Overall": 61.0,
 
1016
  "Imagination": 68.22,
1017
  "Feature matching": 53.49,
 
1018
  "Grammar-Overall": 58.38,
1019
  "Pronoun Reference": 55.38,
1020
  "Consistency": 55.09,
1021
  "Negation": 64.29,
 
1022
  "Layout-Overall": 64.71,
1023
  "2D": 59.93,
1024
  "3D": 69.62,
 
1025
  "Logical Reasoning": 42.03,
 
1026
  "Text": 0.59
1027
  },
1028
+ {
1029
  "model": "HiDream_v2L",
1030
  "link": "https://hidreamai.com/doc/txt2img/request",
1031
  "hf": "-",
1032
  "open_source": false,
1033
  "release_date": "2025-07",
1034
+ "Overall": 59.73,
1035
+ "Style": 89.55,
1036
+ "World Knowledge": 91.36,
 
 
 
 
1037
  "Attribute-Overall": 67.87,
1038
  "Quantity": 71.43,
1039
+ "Expression": 43.59,
1040
+ "Material": 68.14,
1041
+ "Size": 72.86,
1042
+ "Shape": 63.87,
1043
  "Color": 94.17,
1044
+ "Action-Overall": 64.52,
1045
+ "Hand": 47.44,
1046
+ "Full body": 66.85,
1047
+ "Animal": 70.45,
1048
+ "Non Contact": 67.71,
1049
+ "Contact": 58.33,
1050
+ "State": 73.56,
1051
+ "Relationship-Overall": 72.15,
1052
+ "Composition": 80.56,
1053
+ "Similarity": 63.89,
1054
+ "Inclusion": 76.67,
1055
+ "Comparison": 58.06,
1056
+ "Compound-Overall": 51.33,
1057
+ "Imagination": 59.47,
1058
+ "Feature matching": 43.01,
1059
+ "Grammar-Overall": 62.02,
1060
+ "Pronoun Reference": 72.69,
 
 
 
 
1061
  "Consistency": 68.75,
1062
+ "Negation": 45.7,
1063
+ "Layout-Overall": 65.53,
1064
+ "2D": 64.77,
1065
+ "3D": 66.29,
1066
+ "Logical Reasoning": 31.54,
1067
+ "Text": 1.45
1068
+ },
1069
+ {
 
 
 
1070
  "model": "FLUX-kontext-pro",
1071
  "link": "https://bfl.ai/models/flux-kontext",
1072
  "hf": "-",
1073
  "open_source": false,
1074
  "release_date": "2025-05",
 
1075
  "Overall": 1.27,
1076
  "Style": 6.93,
1077
+ "World Knowledge": 0.0,
 
1078
  "Attribute-Overall": 0.11,
1079
  "Quantity": 0.69,
1080
+ "Expression": 0.0,
1081
+ "Material": 0.0,
1082
+ "Size": 0.0,
1083
+ "Shape": 0.0,
1084
+ "Color": 0.0,
 
1085
  "Action-Overall": 0.29,
1086
+ "Hand": 0.0,
1087
+ "Full body": 0.0,
1088
+ "Animal": 0.0,
1089
+ "Non Contact": 0.0,
1090
+ "Contact": 0.0,
1091
  "State": 1.47,
 
1092
  "Relationship-Overall": 0.13,
1093
  "Composition": 0.34,
1094
+ "Similarity": 0.0,
1095
+ "Inclusion": 0.0,
1096
+ "Comparison": 0.0,
1097
+ "Compound-Overall": 0.0,
1098
+ "Imagination": 0.0,
1099
+ "Feature matching": 0.0,
 
 
1100
  "Grammar-Overall": 5.24,
1101
+ "Pronoun Reference": 0.0,
1102
  "Consistency": 2.31,
1103
  "Negation": 13.28,
1104
+ "Layout-Overall": 0.0,
1105
+ "2D": 0.0,
1106
+ "3D": 0.0,
1107
+ "Logical Reasoning": 0.0,
1108
+ "Text": 0.0
1109
+ },
1110
+ {
 
 
 
1111
  "model": "FLUX-pro-1.1-Ultra",
1112
  "link": "https://bfl.ai/",
1113
  "hf": "-",
1114
  "open_source": false,
1115
  "release_date": "2024-11",
 
1116
  "Overall": 1.31,
 
1117
  "Style": 7.63,
 
1118
  "World Knowledge": 0.32,
 
1119
  "Attribute-Overall": 0.32,
1120
+ "Quantity": 0.0,
1121
+ "Expression": 0.0,
1122
  "Material": 0.94,
1123
+ "Size": 0.0,
1124
+ "Shape": 0.0,
1125
  "Color": 0.83,
1126
+ "Action-Overall": 0.0,
1127
+ "Hand": 0.0,
1128
+ "Full body": 0.0,
1129
+ "Animal": 0.0,
1130
+ "Non Contact": 0.0,
1131
+ "Contact": 0.0,
1132
+ "State": 0.0,
 
 
1133
  "Relationship-Overall": 0.26,
1134
  "Composition": 0.34,
1135
  "Similarity": 0.57,
1136
+ "Inclusion": 0.0,
1137
+ "Comparison": 0.0,
1138
+ "Compound-Overall": 0.0,
1139
+ "Imagination": 0.0,
1140
+ "Feature matching": 0.0,
1141
+ "Grammar-Overall": 4.3,
 
 
1142
  "Pronoun Reference": 0.37,
1143
  "Consistency": 1.39,
1144
  "Negation": 10.94,
 
1145
  "Layout-Overall": 0.19,
1146
  "2D": 0.37,
1147
+ "3D": 0.0,
1148
+ "Logical Reasoning": 0.0,
1149
+ "Text": 0.0
1150
+ },
1151
+ {
 
 
1152
  "model": "DALL-E-3",
1153
  "link": "https://openai.com/zh-Hans-CN/index/dall-e-3/",
1154
  "hf": "-",
1155
  "open_source": false,
1156
  "release_date": "2023-09",
 
1157
  "Overall": 67.93,
1158
+ "Style": 95.9,
 
 
1159
  "World Knowledge": 93.04,
 
1160
  "Attribute-Overall": 78.42,
1161
  "Quantity": 60.42,
1162
  "Expression": 68.59,
1163
  "Material": 91.04,
1164
  "Size": 90.28,
1165
+ "Shape": 65.0,
1166
  "Color": 94.17,
 
1167
  "Action-Overall": 72.24,
1168
  "Hand": 69.87,
1169
  "Full body": 77.17,
1170
  "Animal": 82.35,
1171
  "Non Contact": 66.33,
1172
+ "Contact": 61.9,
1173
  "State": 76.89,
 
1174
  "Relationship-Overall": 79.95,
1175
  "Composition": 81.76,
1176
  "Similarity": 77.78,
1177
+ "Inclusion": 87.5,
1178
  "Comparison": 67.97,
 
1179
  "Compound-Overall": 72.94,
1180
  "Imagination": 82.14,
1181
  "Feature matching": 63.54,
 
1182
  "Grammar-Overall": 71.52,
1183
  "Pronoun Reference": 79.78,
1184
  "Consistency": 76.39,
1185
  "Negation": 58.85,
1186
+ "Layout-Overall": 62.5,
 
1187
  "2D": 54.41,
1188
  "3D": 70.83,
 
1189
  "Logical Reasoning": 51.59,
 
1190
  "Text": 1.15
1191
+ },
1192
+ {
1193
  "model": "Qwen-Image",
1194
  "link": "https://arxiv.org/pdf/2508.02324",
1195
  "hf": "https://huggingface.co/Qwen/Qwen-Image",
1196
  "open_source": true,
1197
  "release_date": "2025-08",
 
1198
  "Overall": 81.04,
1199
+ "Style": 95.5,
 
 
1200
  "World Knowledge": 92.41,
 
1201
  "Attribute-Overall": 91.88,
1202
  "Quantity": 88.89,
1203
  "Expression": 91.03,
 
1205
  "Size": 90.28,
1206
  "Shape": 86.25,
1207
  "Color": 98.33,
 
1208
  "Action-Overall": 85.74,
1209
  "Hand": 83.33,
1210
+ "Full body": 87.5,
1211
  "Animal": 89.71,
1212
  "Non Contact": 81.63,
1213
  "Contact": 82.14,
1214
  "State": 90.09,
 
1215
  "Relationship-Overall": 82.99,
1216
  "Composition": 85.47,
1217
  "Similarity": 73.33,
1218
  "Inclusion": 90.76,
1219
  "Comparison": 79.69,
 
1220
  "Compound-Overall": 76.16,
1221
+ "Imagination": 80.1,
1222
  "Feature matching": 72.14,
 
1223
  "Grammar-Overall": 62.83,
1224
  "Pronoun Reference": 83.46,
1225
  "Consistency": 74.07,
1226
  "Negation": 31.92,
 
1227
  "Layout-Overall": 82.65,
1228
  "2D": 84.93,
1229
+ "3D": 80.3,
 
1230
  "Logical Reasoning": 57.73,
 
1231
  "Text": 82.47
1232
+ },
1233
+ {
1234
  "model": "wan2.2-t2i-plus",
1235
  "link": "https://help.aliyun.com/zh/model-studio/text-to-image-v2-api-reference",
1236
  "hf": "-",
1237
  "open_source": false,
1238
  "release_date": "2025-07",
 
1239
  "Overall": 66.96,
1240
  "Style": 91.06,
1241
  "World Knowledge": 84.39,
 
1242
  "Attribute-Overall": 73.93,
1243
+ "Quantity": 75.0,
1244
  "Expression": 67.31,
1245
  "Material": 74.06,
1246
  "Size": 74.31,
1247
  "Shape": 66.25,
1248
  "Color": 90.83,
 
1249
  "Action-Overall": 72.52,
1250
  "Hand": 69.23,
1251
+ "Full body": 80.0,
1252
  "Animal": 84.56,
1253
  "Non Contact": 65.31,
1254
+ "Contact": 61.9,
1255
  "State": 75.94,
 
1256
  "Relationship-Overall": 76.78,
1257
  "Composition": 71.28,
1258
  "Similarity": 72.78,
1259
  "Inclusion": 85.87,
1260
  "Comparison": 82.03,
 
1261
  "Compound-Overall": 64.77,
1262
  "Imagination": 74.23,
1263
+ "Feature matching": 55.0,
 
1264
  "Grammar-Overall": 70.59,
1265
  "Pronoun Reference": 77.21,
1266
  "Consistency": 63.43,
1267
  "Negation": 69.62,
 
1268
  "Layout-Overall": 71.83,
1269
  "2D": 73.16,
1270
  "3D": 70.45,
 
1271
  "Logical Reasoning": 51.82,
 
1272
  "Text": 11.92
1273
+ },
1274
+ {
1275
  "model": "FLUX.1-dev",
1276
  "link": "https://bfl.ai/blog/24-08-01-bfl",
1277
  "hf": "https://huggingface.co/black-forest-labs/FLUX.1-dev",
1278
  "open_source": true,
1279
  "release_date": "2024-08",
 
1280
  "Overall": 2.74,
1281
+ "Style": 10.5,
1282
  "World Knowledge": 0.63,
 
1283
  "Attribute-Overall": 0.43,
1284
+ "Quantity": 0.0,
1285
  "Expression": 1.92,
1286
  "Material": 0.47,
1287
+ "Size": 0.0,
1288
+ "Shape": 0.0,
1289
+ "Color": 0.0,
 
1290
  "Action-Overall": 0.95,
1291
  "Hand": 1.92,
1292
  "Full body": 0.54,
1293
  "Animal": 0.74,
1294
+ "Non Contact": 0.0,
1295
+ "Contact": 0.0,
1296
  "State": 2.36,
 
1297
  "Relationship-Overall": 0.13,
1298
  "Composition": 0.34,
1299
+ "Similarity": 0.0,
1300
+ "Inclusion": 0.0,
1301
+ "Comparison": 0.0,
 
1302
  "Compound-Overall": 0.13,
1303
  "Imagination": 0.26,
1304
+ "Feature matching": 0.0,
 
1305
  "Grammar-Overall": 7.75,
1306
  "Pronoun Reference": 0.74,
1307
  "Consistency": 1.39,
1308
  "Negation": 20.38,
 
1309
  "Layout-Overall": 0.19,
1310
+ "2D": 0.0,
1311
  "3D": 0.38,
1312
+ "Logical Reasoning": 0.0,
1313
+ "Text": 0.0
 
 
1314
  },
1315
+ {
1316
  "model": "Nano Banana",
1317
  "link": "https://ainanobanana.io/",
1318
  "hf": "-",
1319
  "open_source": false,
1320
  "release_date": "2025-08",
1321
+ "Overall": 80.45,
1322
+ "Style": 98.95,
1323
+ "World Knowledge": 96.32,
1324
+ "Attribute-Overall": 88.31,
1325
+ "Quantity": 83.09,
1326
+ "Expression": 82.78,
1327
+ "Material": 91.13,
 
 
 
 
1328
  "Size": 95.74,
1329
+ "Shape": 80.13,
1330
  "Color": 98.33,
1331
+ "Action-Overall": 86.03,
1332
+ "Hand": 83.33,
1333
+ "Full body": 89.14,
1334
+ "Animal": 89.71,
1335
+ "Non Contact": 78.87,
1336
+ "Contact": 82.63,
1337
+ "State": 92.61,
1338
+ "Relationship-Overall": 90.87,
1339
+ "Composition": 90.94,
1340
+ "Similarity": 83.33,
1341
+ "Inclusion": 94.54,
1342
+ "Comparison": 96.09,
1343
+ "Compound-Overall": 86.09,
1344
+ "Imagination": 88.53,
1345
+ "Feature matching": 83.68,
1346
+ "Grammar-Overall": 83.9,
1347
+ "Pronoun Reference": 89.18,
1348
+ "Consistency": 85.17,
1349
+ "Negation": 77.34,
1350
+ "Layout-Overall": 89.75,
1351
+ "2D": 92.19,
1352
+ "3D": 87.21,
1353
+ "Logical Reasoning": 77.26,
1354
+ "Text": 7.06
1355
+ },
1356
+ {
 
 
 
 
 
 
 
1357
  "model": "Hunyuan-DiT",
1358
  "link": "https://arxiv.org/pdf/2405.08748",
1359
  "hf": "https://huggingface.co/Tencent-Hunyuan/HunyuanDiT",
1360
  "open_source": true,
1361
  "release_date": "2024-05",
 
1362
  "Overall": 53.36,
1363
+ "Style": 92.5,
 
 
1364
  "World Knowledge": 84.97,
 
1365
  "Attribute-Overall": 62.93,
1366
  "Quantity": 63.19,
1367
  "Expression": 46.15,
1368
  "Material": 72.17,
1369
  "Size": 63.89,
1370
  "Shape": 49.38,
1371
+ "Color": 85.0,
 
1372
  "Action-Overall": 57.22,
1373
  "Hand": 45.51,
1374
  "Full body": 67.93,
 
1376
  "Non Contact": 48.47,
1377
  "Contact": 47.02,
1378
  "State": 69.81,
 
1379
  "Relationship-Overall": 59.39,
1380
  "Composition": 65.88,
1381
  "Similarity": 64.44,
1382
  "Inclusion": 56.52,
1383
  "Comparison": 41.41,
 
1384
  "Compound-Overall": 44.59,
1385
  "Imagination": 52.04,
1386
  "Feature matching": 36.98,
 
1387
  "Grammar-Overall": 54.68,
1388
  "Pronoun Reference": 59.93,
1389
  "Consistency": 62.04,
1390
  "Negation": 43.08,
 
1391
  "Layout-Overall": 47.76,
1392
  "2D": 39.71,
1393
  "3D": 56.06,
 
1394
  "Logical Reasoning": 29.55,
1395
+ "Text": 0.0
1396
+ },
1397
+ {
 
1398
  "model": "Recraft",
1399
  "link": "https://www.recraft.ai/docs#generate-image",
1400
  "hf": "-",
1401
  "open_source": false,
1402
  "release_date": "2024-12",
 
1403
  "Overall": 57.67,
1404
+ "Style": 87.7,
1405
  "World Knowledge": 90.03,
 
1406
  "Attribute-Overall": 69.34,
1407
  "Quantity": 66.67,
1408
  "Expression": 59.62,
 
1410
  "Size": 73.61,
1411
  "Shape": 61.25,
1412
  "Color": 95.83,
 
1413
  "Action-Overall": 63.88,
1414
  "Hand": 50.64,
1415
  "Full body": 72.28,
 
1417
  "Non Contact": 63.78,
1418
  "Contact": 45.24,
1419
  "State": 72.17,
 
1420
  "Relationship-Overall": 64.47,
1421
  "Composition": 65.54,
1422
  "Similarity": 58.89,
1423
  "Inclusion": 65.22,
1424
  "Comparison": 68.75,
 
1425
  "Compound-Overall": 43.94,
1426
  "Imagination": 45.92,
1427
  "Feature matching": 41.93,
 
1428
  "Grammar-Overall": 60.56,
1429
  "Pronoun Reference": 62.87,
1430
  "Consistency": 59.26,
1431
  "Negation": 59.23,
1432
+ "Layout-Overall": 58.4,
 
1433
  "2D": 55.15,
1434
  "3D": 61.74,
 
1435
  "Logical Reasoning": 34.09,
 
1436
  "Text": 4.31
1437
+ },
1438
+ {
1439
  "model": "Imagen-3.0-generate-002",
1440
  "link": "https://arxiv.org/pdf/2408.07009",
1441
  "hf": "-",
1442
  "open_source": false,
1443
  "release_date": "2025-02",
1444
+ "Overall": 1.36,
1445
+ "Style": 8.3,
1446
+ "World Knowledge": 0.0,
1447
+ "Attribute-Overall": 0.0,
1448
+ "Quantity": 0.0,
1449
+ "Expression": 0.0,
1450
+ "Material": 0.0,
1451
+ "Size": 0.0,
1452
+ "Shape": 0.0,
1453
+ "Color": 0.0,
 
 
 
 
 
1454
  "Action-Overall": 0.38,
1455
+ "Hand": 0.0,
1456
  "Full body": 0.54,
1457
+ "Animal": 0.0,
1458
+ "Non Contact": 0.0,
1459
+ "Contact": 0.0,
1460
  "State": 1.42,
 
1461
  "Relationship-Overall": 0.13,
1462
  "Composition": 0.34,
1463
+ "Similarity": 0.0,
1464
+ "Inclusion": 0.0,
1465
+ "Comparison": 0.0,
1466
+ "Compound-Overall": 0.0,
1467
+ "Imagination": 0.0,
1468
+ "Feature matching": 0.0,
 
 
1469
  "Grammar-Overall": 4.81,
1470
+ "Pronoun Reference": 0.0,
1471
  "Consistency": 0.46,
1472
  "Negation": 13.46,
1473
+ "Layout-Overall": 0.0,
1474
+ "2D": 0.0,
1475
+ "3D": 0.0,
1476
+ "Logical Reasoning": 0.0,
1477
+ "Text": 0.0
1478
+ },
1479
+ {
 
 
 
1480
  "model": "Imagen-4.0-Fast-preview-06-06",
1481
  "link": "https://deepmind.google/models/imagen/",
1482
  "hf": "-",
1483
  "open_source": false,
1484
  "release_date": "2025-06",
1485
+ "Overall": 71.6,
1486
+ "Style": 93.3,
1487
+ "World Knowledge": 91.3,
 
 
 
 
1488
  "Attribute-Overall": 80.98,
1489
  "Quantity": 76.39,
1490
  "Expression": 66.03,
 
1492
  "Size": 88.19,
1493
  "Shape": 78.75,
1494
  "Color": 95.83,
 
1495
  "Action-Overall": 79.28,
1496
  "Hand": 74.36,
1497
  "Full body": 79.35,
1498
  "Animal": 83.82,
1499
  "Non Contact": 73.47,
1500
+ "Contact": 75.6,
1501
  "State": 88.21,
 
1502
  "Relationship-Overall": 82.49,
1503
  "Composition": 82.09,
1504
  "Similarity": 78.33,
1505
  "Inclusion": 88.04,
1506
  "Comparison": 81.25,
 
1507
  "Compound-Overall": 73.97,
1508
  "Imagination": 83.67,
1509
  "Feature matching": 64.06,
 
1510
  "Grammar-Overall": 77.41,
1511
  "Pronoun Reference": 83.82,
1512
  "Consistency": 78.24,
1513
+ "Negation": 70.0,
 
1514
  "Layout-Overall": 78.73,
1515
  "2D": 80.51,
1516
  "3D": 76.89,
 
1517
  "Logical Reasoning": 54.77,
 
1518
  "Text": 3.74
1519
  },
1520
+ {
1521
  "model": "FLUX-kontext-max",
1522
  "link": "https://bfl.ai/models/flux-kontext",
1523
  "hf": "-",
1524
  "open_source": false,
1525
  "release_date": "2025-05",
 
1526
  "Overall": 71.85,
1527
  "Style": 96.38,
1528
  "World Knowledge": 92.83,
 
1529
  "Attribute-Overall": 76.41,
1530
  "Quantity": 65.97,
1531
  "Expression": 69.44,
 
1533
  "Size": 84.72,
1534
  "Shape": 66.67,
1535
  "Color": 93.33,
 
1536
  "Action-Overall": 78.59,
1537
  "Hand": 76.32,
1538
  "Full body": 83.15,
1539
  "Animal": 83.33,
1540
+ "Non Contact": 69.9,
1541
  "Contact": 73.17,
1542
  "State": 85.78,
 
1543
  "Relationship-Overall": 83.97,
1544
  "Composition": 85.14,
1545
  "Similarity": 74.43,
1546
  "Inclusion": 91.67,
1547
  "Comparison": 83.59,
 
1548
  "Compound-Overall": 75.13,
1549
  "Imagination": 82.65,
1550
  "Feature matching": 67.12,
 
1551
  "Grammar-Overall": 75.68,
1552
  "Pronoun Reference": 79.85,
1553
  "Consistency": 75.46,
1554
  "Negation": 71.48,
 
1555
  "Layout-Overall": 81.34,
1556
  "2D": 81.62,
1557
  "3D": 81.06,
 
1558
  "Logical Reasoning": 56.48,
 
1559
  "Text": 1.72
1560
+ },
1561
+ {
1562
  "model": "Seedream-3.0",
1563
  "link": "https://www.byteplus.com/en/product/Seedream",
1564
  "hf": "-",
1565
  "open_source": false,
1566
  "release_date": "2025-06",
 
1567
  "Overall": 81.68,
1568
+ "Style": 97.5,
 
 
1569
  "World Knowledge": 93.99,
 
1570
  "Attribute-Overall": 88.03,
1571
  "Quantity": 84.03,
1572
  "Expression": 82.69,
1573
  "Material": 94.34,
1574
  "Size": 89.58,
1575
+ "Shape": 80.0,
1576
+ "Color": 97.5,
 
1577
  "Action-Overall": 86.98,
1578
  "Hand": 85.26,
1579
  "Full body": 90.76,
1580
  "Animal": 89.71,
1581
+ "Non Contact": 85.2,
1582
  "Contact": 80.36,
1583
  "State": 90.09,
 
1584
  "Relationship-Overall": 84.39,
1585
  "Composition": 86.82,
1586
  "Similarity": 74.44,
1587
  "Inclusion": 90.22,
1588
  "Comparison": 84.38,
 
1589
  "Compound-Overall": 76.68,
1590
  "Imagination": 82.14,
1591
  "Feature matching": 71.09,
 
1592
  "Grammar-Overall": 67.25,
1593
  "Pronoun Reference": 84.19,
1594
  "Consistency": 79.17,
1595
  "Negation": 39.62,
 
1596
  "Layout-Overall": 84.14,
1597
  "2D": 89.34,
1598
  "3D": 78.79,
 
1599
  "Logical Reasoning": 59.09,
 
1600
  "Text": 78.74
1601
+ },
1602
+ {
1603
  "model": "Imagen-4.0-Ultra-preview-06-06",
1604
  "link": "https://deepmind.google/models/imagen/",
1605
  "hf": "-",
1606
  "open_source": false,
1607
  "release_date": "2025-06",
1608
+ "Overall": 83.08,
1609
+ "Style": 99.2,
1610
+ "World Knowledge": 97.63,
1611
+ "Attribute-Overall": 91.13,
1612
+ "Quantity": 89.58,
1613
+ "Expression": 80.13,
1614
+ "Material": 93.4,
1615
+ "Size": 94.44,
1616
+ "Shape": 90.62,
 
 
 
 
1617
  "Color": 100.0,
1618
+ "Action-Overall": 93.54,
 
1619
  "Hand": 94.87,
1620
+ "Full body": 91.85,
1621
+ "Animal": 96.32,
1622
+ "Non Contact": 88.78,
1623
+ "Contact": 93.45,
1624
+ "State": 96.7,
1625
+ "Relationship-Overall": 92.89,
1626
+ "Composition": 91.89,
 
1627
  "Similarity": 87.22,
1628
+ "Inclusion": 98.37,
1629
+ "Comparison": 95.31,
 
1630
  "Compound-Overall": 89.95,
1631
+ "Imagination": 94.9,
1632
+ "Feature matching": 84.9,
1633
+ "Grammar-Overall": 88.64,
1634
+ "Pronoun Reference": 94.85,
1635
+ "Consistency": 87.96,
1636
+ "Negation": 82.69,
1637
+ "Layout-Overall": 91.04,
1638
+ "2D": 92.65,
1639
+ "3D": 89.39,
1640
+ "Logical Reasoning": 79.55,
1641
+ "Text": 7.18
 
 
 
 
1642
  },
1643
  {
1644
  "model": "GPT-4o",
 
1646
  "hf": "-",
1647
  "open_source": false,
1648
  "release_date": "2025-03",
 
1649
  "Overall": 91.02,
 
1650
  "Style": 99.39,
 
1651
  "World Knowledge": 98.72,
 
1652
  "Attribute-Overall": 94.99,
1653
  "Quantity": 93.62,
1654
  "Expression": 94.59,
 
1656
  "Size": 93.06,
1657
  "Shape": 92.95,
1658
  "Color": 100.0,
 
1659
  "Action-Overall": 92.34,
1660
  "Hand": 94.08,
1661
  "Full body": 97.28,
 
1663
  "Non Contact": 90.31,
1664
  "Contact": 88.34,
1665
  "State": 92.65,
 
1666
  "Relationship-Overall": 95.77,
1667
+ "Composition": 97.3,
1668
  "Similarity": 93.18,
1669
  "Inclusion": 96.69,
1670
  "Comparison": 94.53,
 
1671
  "Compound-Overall": 93.91,
1672
  "Imagination": 95.92,
1673
  "Feature matching": 91.74,
 
1674
  "Grammar-Overall": 91.02,
1675
  "Pronoun Reference": 95.15,
1676
  "Consistency": 89.35,
1677
  "Negation": 88.05,
 
1678
  "Layout-Overall": 89.27,
1679
  "2D": 89.18,
1680
  "3D": 89.35,
 
1681
  "Logical Reasoning": 91.44,
 
1682
  "Text": 63.37
1683
  }
1684
  ]
1685
+ }