CodeGoat24 commited on
Commit
adfb6fc
·
verified ·
1 Parent(s): 72732d7

Update leaderboard_data.json

Browse files
Files changed (1) hide show
  1. leaderboard_data.json +1127 -1622
leaderboard_data.json CHANGED
@@ -6,48 +6,39 @@
6
  "hf": "https://huggingface.co/Tongyi-MAI/Z-Image-Turbo",
7
  "open_source": true,
8
  "release_date": "2025-11",
9
-
10
- "Overall": 71.40,
11
- "Style": 90.00,
12
  "World Knowledge": 92.25,
13
-
14
  "Attribute-Overall": 74.57,
15
- "Quantity": 75.00,
16
  "Expression": 58.97,
17
  "Material": 79.25,
18
  "Size": 77.78,
19
  "Shape": 64.38,
20
  "Color": 95.83,
21
-
22
- "Action-Overall": 69.30,
23
  "Hand": 62.82,
24
  "Full body": 73.37,
25
  "Animal": 78.68,
26
- "Non Contact": 69.90,
27
  "Contact": 61.31,
28
  "State": 70.28,
29
-
30
  "Relationship-Overall": 71.57,
31
  "Composition": 75.68,
32
- "Similarity": 65.00,
33
  "Inclusion": 75.54,
34
  "Comparison": 65.62,
35
-
36
  "Compound-Overall": 63.02,
37
  "Imagination": 64.29,
38
  "Feature matching": 61.72,
39
-
40
  "Grammar-Overall": 64.57,
41
  "Pronoun Reference": 79.78,
42
  "Consistency": 62.04,
43
  "Negation": 50.77,
44
-
45
  "Layout-Overall": 78.36,
46
  "2D": 83.09,
47
  "3D": 73.48,
48
-
49
  "Logical Reasoning": 39.68,
50
-
51
  "Text": 70.69
52
  },
53
  {
@@ -56,11 +47,9 @@
56
  "hf": "https://huggingface.co/black-forest-labs/FLUX.2-dev",
57
  "open_source": true,
58
  "release_date": "2025-11",
59
-
60
  "Overall": 84.76,
61
- "Style": 96.60,
62
  "World Knowledge": 95.41,
63
-
64
  "Attribute-Overall": 87.39,
65
  "Quantity": 73.61,
66
  "Expression": 73.72,
@@ -68,7 +57,6 @@
68
  "Size": 91.67,
69
  "Shape": 88.12,
70
  "Color": 100.0,
71
-
72
  "Action-Overall": 82.22,
73
  "Hand": 74.36,
74
  "Full body": 86.41,
@@ -76,28 +64,22 @@
76
  "Non Contact": 82.14,
77
  "Contact": 80.95,
78
  "State": 84.43,
79
-
80
  "Relationship-Overall": 87.31,
81
  "Composition": 93.24,
82
  "Similarity": 77.78,
83
  "Inclusion": 88.59,
84
  "Comparison": 85.16,
85
-
86
  "Compound-Overall": 83.51,
87
  "Imagination": 84.95,
88
  "Feature matching": 82.03,
89
-
90
  "Grammar-Overall": 77.41,
91
- "Pronoun Reference": 88.60,
92
- "Consistency": 78.70,
93
  "Negation": 64.62,
94
-
95
  "Layout-Overall": 89.55,
96
  "2D": 87.87,
97
  "3D": 91.29,
98
-
99
  "Logical Reasoning": 62.84,
100
-
101
  "Text": 85.34
102
  },
103
  {
@@ -106,50 +88,39 @@
106
  "hf": "-",
107
  "open_source": false,
108
  "release_date": "2025-11",
109
-
110
  "Overall": 92.72,
111
-
112
- "Style": 99.30,
113
-
114
  "World Knowledge": 97.47,
115
-
116
  "Attribute-Overall": 91.95,
117
  "Quantity": 90.28,
118
  "Expression": 85.53,
119
  "Material": 97.64,
120
  "Size": 93.75,
121
- "Shape": 85.00,
122
  "Color": 99.17,
123
-
124
  "Action-Overall": 91.38,
125
  "Hand": 89.47,
126
  "Full body": 91.11,
127
  "Animal": 90.44,
128
- "Non Contact": 89.80,
129
  "Contact": 94.05,
130
  "State": 92.92,
131
-
132
  "Relationship-Overall": 95.43,
133
  "Composition": 96.96,
134
  "Similarity": 96.11,
135
  "Inclusion": 92.39,
136
  "Comparison": 95.31,
137
-
138
  "Compound-Overall": 92.91,
139
  "Imagination": 95.15,
140
  "Feature matching": 90.62,
141
-
142
  "Grammar-Overall": 89.59,
143
  "Pronoun Reference": 94.49,
144
  "Consistency": 87.96,
145
  "Negation": 85.71,
146
-
147
  "Layout-Overall": 93.28,
148
  "2D": 92.65,
149
  "3D": 93.94,
150
-
151
  "Logical Reasoning": 80.24,
152
-
153
  "Text": 95.65
154
  },
155
  {
@@ -158,49 +129,40 @@
158
  "hf": "-",
159
  "open_source": false,
160
  "release_date": "2025-09",
161
-
162
- "Overall": 78.17,
163
- "Style": 93.15,
164
- "World Knowledge": 95.22,
165
-
166
- "Attribute-Overall": 81.06,
167
- "Quantity": 75.00,
168
- "Expression": 67.95,
169
  "Material": 91.04,
170
- "Size": 85.29,
171
- "Shape": 77.50,
172
- "Color": 87.50,
173
-
174
- "Action-Overall": 74.23,
175
- "Hand": 61.18,
176
- "Full body": 75.00,
177
- "Animal": 76.47,
178
- "Non Contact": 75.00,
179
  "Contact": 72.02,
180
- "State": 82.55,
181
-
182
- "Relationship-Overall": 82.23,
183
- "Composition": 85.14,
184
- "Similarity": 75.00,
185
- "Inclusion": 82.07,
186
- "Comparison": 85.94,
187
-
188
- "Compound-Overall": 76.23,
189
- "Imagination": 79.38,
190
- "Feature matching": 73.04,
191
-
192
- "Grammar-Overall": 73.59,
193
- "Pronoun Reference": 84.07,
194
- "Consistency": 73.15,
195
- "Negation": 63.08,
196
-
197
- "Layout-Overall": 77.61,
198
  "2D": 75.74,
199
- "3D": 79.55,
200
-
201
- "Logical Reasoning": 56.36,
202
-
203
- "Text": 71.97
204
  },
205
  {
206
  "model": "Echo-4o",
@@ -208,11 +170,9 @@
208
  "hf": "https://huggingface.co/Yejy53/Echo-4o",
209
  "open_source": true,
210
  "release_date": "2025-8",
211
-
212
  "Overall": 69.12,
213
- "Style": 92.20,
214
  "World Knowledge": 90.51,
215
-
216
  "Attribute-Overall": 79.06,
217
  "Quantity": 70.14,
218
  "Expression": 71.15,
@@ -220,49 +180,40 @@
220
  "Size": 83.33,
221
  "Shape": 68.75,
222
  "Color": 98.33,
223
-
224
  "Action-Overall": 68.92,
225
  "Hand": 66.03,
226
- "Full body": 66.30,
227
  "Animal": 77.94,
228
  "Non Contact": 67.86,
229
  "Contact": 59.52,
230
  "State": 75.94,
231
-
232
  "Relationship-Overall": 76.52,
233
  "Composition": 81.76,
234
  "Similarity": 70.56,
235
  "Inclusion": 77.72,
236
  "Comparison": 71.09,
237
-
238
  "Compound-Overall": 71.78,
239
  "Imagination": 76.79,
240
  "Feature matching": 66.67,
241
-
242
  "Grammar-Overall": 75.13,
243
  "Pronoun Reference": 80.51,
244
  "Consistency": 74.54,
245
- "Negation": 70.00,
246
-
247
  "Layout-Overall": 82.28,
248
  "2D": 87.13,
249
  "3D": 77.27,
250
-
251
  "Logical Reasoning": 44.77,
252
-
253
  "Text": 10.06
254
- },
255
  {
256
  "model": "UniWorld-V1",
257
  "link": "https://arxiv.org/pdf/2506.03147",
258
  "hf": "https://huggingface.co/LanguageBind/UniWorld-V1",
259
  "open_source": true,
260
  "release_date": "2025-06",
261
-
262
  "Overall": 63.11,
263
- "Style": 91.10,
264
  "World Knowledge": 82.91,
265
-
266
  "Attribute-Overall": 70.62,
267
  "Quantity": 70.14,
268
  "Expression": 64.74,
@@ -270,36 +221,29 @@
270
  "Size": 72.22,
271
  "Shape": 66.25,
272
  "Color": 99.17,
273
-
274
  "Action-Overall": 67.21,
275
  "Hand": 55.13,
276
  "Full body": 72.28,
277
  "Animal": 73.53,
278
  "Non Contact": 63.78,
279
- "Contact": 61.90,
280
- "State": 75.00,
281
-
282
  "Relationship-Overall": 67.13,
283
- "Composition": 72.30,
284
  "Similarity": 63.33,
285
  "Inclusion": 64.67,
286
  "Comparison": 64.06,
287
-
288
  "Compound-Overall": 54.51,
289
  "Imagination": 58.16,
290
  "Feature matching": 50.78,
291
-
292
  "Grammar-Overall": 63.77,
293
  "Pronoun Reference": 74.26,
294
  "Consistency": 64.35,
295
  "Negation": 52.31,
296
-
297
  "Layout-Overall": 69.03,
298
- "2D": 73.90,
299
  "3D": 64.02,
300
-
301
  "Logical Reasoning": 38.41,
302
-
303
  "Text": 26.44
304
  },
305
  {
@@ -308,11 +252,9 @@
308
  "hf": "stabilityai/stable-diffusion-3.5-medium",
309
  "open_source": true,
310
  "release_date": "2024-10",
311
-
312
  "Overall": 60.71,
313
- "Style": 89.80,
314
  "World Knowledge": 84.34,
315
-
316
  "Attribute-Overall": 66.99,
317
  "Quantity": 59.72,
318
  "Expression": 51.92,
@@ -320,36 +262,29 @@
320
  "Size": 70.83,
321
  "Shape": 63.75,
322
  "Color": 93.33,
323
-
324
  "Action-Overall": 60.65,
325
- "Hand": 50.00,
326
  "Full body": 63.04,
327
  "Animal": 69.12,
328
  "Non Contact": 55.61,
329
  "Contact": 52.98,
330
- "State": 71.70,
331
-
332
  "Relationship-Overall": 68.78,
333
  "Composition": 74.66,
334
  "Similarity": 61.67,
335
  "Inclusion": 73.37,
336
  "Comparison": 58.59,
337
-
338
  "Compound-Overall": 53.35,
339
  "Imagination": 58.16,
340
  "Feature matching": 48.44,
341
-
342
  "Grammar-Overall": 59.89,
343
  "Pronoun Reference": 73.53,
344
  "Consistency": 61.57,
345
  "Negation": 44.23,
346
-
347
  "Layout-Overall": 70.34,
348
  "2D": 72.06,
349
  "3D": 68.56,
350
-
351
  "Logical Reasoning": 37.73,
352
-
353
  "Text": 15.23
354
  },
355
  {
@@ -358,48 +293,39 @@
358
  "hf": "https://huggingface.co/Alpha-VLLM/Lumina-DiMOO",
359
  "open_source": true,
360
  "release_date": "2025-09",
361
-
362
  "Overall": 71.12,
363
- "Style": 89.70,
364
  "World Knowledge": 90.03,
365
-
366
  "Attribute-Overall": 81.62,
367
  "Quantity": 69.44,
368
- "Expression": 85.90,
369
- "Material": 81.60,
370
  "Size": 76.39,
371
- "Shape": 80.00,
372
  "Color": 99.17,
373
-
374
  "Action-Overall": 73.76,
375
- "Hand": 64.10,
376
- "Full body": 78.80,
377
  "Animal": 75.74,
378
  "Non Contact": 73.98,
379
  "Contact": 64.88,
380
  "State": 82.08,
381
-
382
  "Relationship-Overall": 78.43,
383
  "Composition": 83.45,
384
  "Similarity": 74.44,
385
  "Inclusion": 81.52,
386
  "Comparison": 67.97,
387
-
388
  "Compound-Overall": 73.32,
389
  "Imagination": 78.83,
390
  "Feature matching": 67.71,
391
-
392
  "Grammar-Overall": 70.45,
393
  "Pronoun Reference": 81.99,
394
  "Consistency": 77.78,
395
  "Negation": 52.31,
396
-
397
  "Layout-Overall": 82.84,
398
  "2D": 84.93,
399
  "3D": 80.68,
400
-
401
  "Logical Reasoning": 45.45,
402
-
403
  "Text": 25.57
404
  },
405
  {
@@ -408,11 +334,9 @@
408
  "hf": "https://huggingface.co/Gen-Verse/MMaDA-8B-MixCoT",
409
  "open_source": true,
410
  "release_date": "2025-05",
411
-
412
  "Overall": 41.35,
413
- "Style": 82.40,
414
  "World Knowledge": 56.65,
415
-
416
  "Attribute-Overall": 48.93,
417
  "Quantity": 45.83,
418
  "Expression": 29.49,
@@ -420,36 +344,29 @@
420
  "Size": 49.31,
421
  "Shape": 44.38,
422
  "Color": 74.17,
423
-
424
  "Action-Overall": 37.83,
425
  "Hand": 15.38,
426
  "Full body": 40.22,
427
  "Animal": 52.94,
428
  "Non Contact": 33.16,
429
- "Contact": 25.60,
430
- "State": 56.60,
431
-
432
  "Relationship-Overall": 50.25,
433
  "Composition": 55.07,
434
  "Similarity": 57.22,
435
  "Inclusion": 47.28,
436
  "Comparison": 33.59,
437
-
438
  "Compound-Overall": 32.35,
439
  "Imagination": 40.56,
440
  "Feature matching": 23.96,
441
-
442
  "Grammar-Overall": 55.75,
443
  "Pronoun Reference": 59.19,
444
  "Consistency": 40.28,
445
- "Negation": 65.00,
446
-
447
  "Layout-Overall": 30.22,
448
  "2D": 30.15,
449
- "3D": 30.30,
450
-
451
  "Logical Reasoning": 17.95,
452
-
453
  "Text": 1.15
454
  },
455
  {
@@ -458,19 +375,16 @@
458
  "hf": "https://huggingface.co/OmniGen2/OmniGen2",
459
  "open_source": true,
460
  "release_date": "2025-06",
461
-
462
  "Overall": 63.09,
463
- "Style": 91.90,
464
  "World Knowledge": 86.39,
465
-
466
  "Attribute-Overall": 72.12,
467
  "Quantity": 67.36,
468
  "Expression": 73.08,
469
  "Material": 66.04,
470
  "Size": 72.22,
471
  "Shape": 66.25,
472
- "Color": 95.00,
473
-
474
  "Action-Overall": 62.83,
475
  "Hand": 55.77,
476
  "Full body": 69.02,
@@ -478,28 +392,22 @@
478
  "Non Contact": 62.24,
479
  "Contact": 54.17,
480
  "State": 66.51,
481
-
482
  "Relationship-Overall": 68.27,
483
  "Composition": 68.24,
484
  "Similarity": 67.78,
485
- "Inclusion": 71.20,
486
  "Comparison": 64.84,
487
-
488
  "Compound-Overall": 56.31,
489
  "Imagination": 62.24,
490
  "Feature matching": 50.26,
491
-
492
  "Grammar-Overall": 59.89,
493
  "Pronoun Reference": 71.32,
494
  "Consistency": 60.65,
495
  "Negation": 47.31,
496
-
497
  "Layout-Overall": 71.64,
498
  "2D": 78.31,
499
  "3D": 64.77,
500
-
501
- "Logical Reasoning": 32.50,
502
-
503
  "Text": 29.02
504
  },
505
  {
@@ -508,9 +416,8 @@
508
  "hf": "https://huggingface.co/FoundationVision/Infinity/tree/main/infinity_8b_512x512_weights",
509
  "open_source": true,
510
  "release_date": "2024-12",
511
-
512
  "Overall": 59.81,
513
- "Style": 90.80,
514
  "World Knowledge": 87.97,
515
  "Attribute-Overall": 68.06,
516
  "Quantity": 66.67,
@@ -519,99 +426,81 @@
519
  "Size": 77.78,
520
  "Shape": 58.75,
521
  "Color": 93.33,
522
-
523
  "Action-Overall": 60.17,
524
  "Hand": 55.13,
525
  "Full body": 65.22,
526
  "Animal": 72.06,
527
  "Non Contact": 58.16,
528
- "Contact": 49.40,
529
  "State": 62.26,
530
-
531
  "Relationship-Overall": 69.16,
532
  "Composition": 73.31,
533
- "Similarity": 65.00,
534
  "Inclusion": 67.39,
535
  "Comparison": 67.97,
536
-
537
  "Compound-Overall": 51.42,
538
  "Imagination": 55.87,
539
  "Feature matching": 46.88,
540
-
541
  "Grammar-Overall": 60.16,
542
  "Pronoun Reference": 73.16,
543
  "Consistency": 65.74,
544
  "Negation": 41.92,
545
-
546
- "Layout-Overall": 66.60,
547
  "2D": 71.69,
548
  "3D": 61.36,
549
-
550
  "Logical Reasoning": 31.36,
551
-
552
  "Text": 12.36
553
  },
554
- {
555
  "model": "OneCAT",
556
  "link": "https://arxiv.org/pdf/2509.03498",
557
  "hf": "https://huggingface.co/onecat-ai/OneCAT-3B",
558
  "open_source": true,
559
  "release_date": "2025-09",
560
-
561
  "Overall": 58.28,
562
- "Style": 93.30,
563
  "World Knowledge": 82.28,
564
-
565
  "Attribute-Overall": 63.46,
566
  "Quantity": 59.42,
567
  "Expression": 58.33,
568
  "Material": 67.45,
569
  "Size": 65.97,
570
- "Shape": 42.50,
571
- "Color": 92.50,
572
-
573
  "Action-Overall": 58.56,
574
- "Hand": 35.90,
575
  "Full body": 65.22,
576
  "Animal": 69.12,
577
  "Non Contact": 57.65,
578
  "Contact": 48.81,
579
  "State": 71.23,
580
-
581
  "Relationship-Overall": 68.15,
582
  "Composition": 78.04,
583
  "Similarity": 69.44,
584
- "Inclusion": 62.50,
585
  "Comparison": 51.56,
586
-
587
  "Compound-Overall": 56.96,
588
  "Imagination": 66.33,
589
- "Feature matching": 47.40,
590
-
591
  "Grammar-Overall": 60.83,
592
  "Pronoun Reference": 70.59,
593
  "Consistency": 59.72,
594
  "Negation": 51.54,
595
-
596
  "Layout-Overall": 64.74,
597
  "2D": 64.34,
598
  "3D": 65.15,
599
-
600
  "Logical Reasoning": 33.41,
601
-
602
  "Text": 1.15
603
  },
604
- {
605
  "model": "X-Omni",
606
  "link": "https://arxiv.org/pdf/2507.22058",
607
  "hf": "https://huggingface.co/X-Omni/X-Omni-En",
608
  "open_source": true,
609
  "release_date": "2025-08",
610
-
611
  "Overall": 53.77,
612
- "Style": 72.70,
613
  "World Knowledge": 76.27,
614
-
615
  "Attribute-Overall": 60.04,
616
  "Quantity": 63.19,
617
  "Expression": 53.21,
@@ -619,37 +508,30 @@
619
  "Size": 55.56,
620
  "Shape": 53.75,
621
  "Color": 80.83,
622
-
623
  "Action-Overall": 54.47,
624
  "Hand": 46.79,
625
  "Full body": 56.52,
626
- "Animal": 62.50,
627
  "Non Contact": 56.63,
628
  "Contact": 42.26,
629
  "State": 60.85,
630
-
631
- "Relationship-Overall": 56.60,
632
  "Composition": 61.82,
633
  "Similarity": 56.11,
634
  "Inclusion": 51.09,
635
  "Comparison": 53.12,
636
-
637
  "Compound-Overall": 41.75,
638
  "Imagination": 47.45,
639
  "Feature matching": 35.94,
640
-
641
  "Grammar-Overall": 59.09,
642
  "Pronoun Reference": 66.91,
643
  "Consistency": 54.17,
644
- "Negation": 55.00,
645
-
646
  "Layout-Overall": 62.69,
647
  "2D": 69.49,
648
  "3D": 55.68,
649
-
650
  "Logical Reasoning": 29.09,
651
-
652
- "Text": 25.00
653
  },
654
  {
655
  "model": "FLUX.1-Krea-dev",
@@ -657,19 +539,16 @@
657
  "hf": "https://huggingface.co/black-forest-labs/FLUX.1-Krea-dev",
658
  "open_source": true,
659
  "release_date": "2025-08",
660
-
661
  "Overall": 69.88,
662
- "Style": 88.70,
663
  "World Knowledge": 92.56,
664
-
665
  "Attribute-Overall": 75.96,
666
  "Quantity": 70.83,
667
- "Expression": 60.90,
668
  "Material": 77.36,
669
  "Size": 79.17,
670
  "Shape": 73.12,
671
  "Color": 99.17,
672
-
673
  "Action-Overall": 71.01,
674
  "Hand": 64.74,
675
  "Full body": 70.11,
@@ -677,28 +556,22 @@
677
  "Non Contact": 72.96,
678
  "Contact": 67.26,
679
  "State": 73.11,
680
-
681
  "Relationship-Overall": 73.98,
682
  "Composition": 76.35,
683
  "Similarity": 66.11,
684
  "Inclusion": 77.17,
685
- "Comparison": 75.00,
686
-
687
  "Compound-Overall": 64.43,
688
  "Imagination": 67.35,
689
  "Feature matching": 61.46,
690
-
691
  "Grammar-Overall": 63.37,
692
  "Pronoun Reference": 77.21,
693
  "Consistency": 67.13,
694
  "Negation": 45.77,
695
-
696
  "Layout-Overall": 84.14,
697
  "2D": 86.76,
698
  "3D": 81.44,
699
-
700
  "Logical Reasoning": 39.77,
701
-
702
  "Text": 44.83
703
  },
704
  {
@@ -707,7 +580,6 @@
707
  "hf": "https://huggingface.co/spaces/tencent/HunyuanImage-2.1",
708
  "open_source": true,
709
  "release_date": "2025-09",
710
-
711
  "Overall": 74.64,
712
  "Style": 90.88,
713
  "World Knowledge": 92.06,
@@ -718,47 +590,39 @@
718
  "Size": 78.47,
719
  "Shape": 68.12,
720
  "Color": 99.17,
721
-
722
  "Action-Overall": 77.81,
723
- "Hand": 75.00,
724
  "Full body": 80.98,
725
  "Animal": 82.35,
726
  "Non Contact": 73.71,
727
  "Contact": 72.02,
728
  "State": 82.55,
729
-
730
  "Relationship-Overall": 77.54,
731
  "Composition": 78.38,
732
  "Similarity": 70.56,
733
  "Inclusion": 84.78,
734
- "Comparison": 75.00,
735
-
736
  "Compound-Overall": 64.82,
737
  "Imagination": 64.54,
738
- "Feature matching": 65.10,
739
-
740
  "Grammar-Overall": 62.83,
741
  "Pronoun Reference": 77.94,
742
- "Consistency": 66.20,
743
  "Negation": 44.23,
744
-
745
  "Layout-Overall": 84.14,
746
  "2D": 86.76,
747
  "3D": 81.44,
748
-
749
  "Logical Reasoning": 46.59,
750
-
751
  "Text": 70.11
752
- },
753
- {
754
  "model": "BLIP3-o-Next",
755
  "link": "https://arxiv.org/pdf/2505.09568",
756
  "hf": "https://huggingface.co/BLIP3o/BLIP3o-NEXT-SFT-3B",
757
  "open_source": true,
758
  "release_date": "2025-08",
759
-
760
  "Overall": 65.15,
761
- "Style": 91.00,
762
  "World Knowledge": 86.71,
763
  "Attribute-Overall": 70.94,
764
  "Quantity": 67.36,
@@ -766,154 +630,123 @@
766
  "Material": 70.28,
767
  "Size": 76.39,
768
  "Shape": 60.62,
769
- "Color": 80.00,
770
-
771
  "Action-Overall": 66.83,
772
  "Hand": 57.69,
773
- "Full body": 75.00,
774
  "Animal": 73.53,
775
  "Non Contact": 67.35,
776
  "Contact": 57.74,
777
  "State": 68.87,
778
-
779
- "Relationship-Overall": 73.60,
780
  "Composition": 76.01,
781
- "Similarity": 65.00,
782
  "Inclusion": 77.17,
783
- "Comparison": 75.00,
784
-
785
  "Compound-Overall": 64.82,
786
  "Imagination": 73.72,
787
  "Feature matching": 55.73,
788
-
789
  "Grammar-Overall": 68.05,
790
  "Pronoun Reference": 76.47,
791
  "Consistency": 67.13,
792
- "Negation": 60.00,
793
-
794
  "Layout-Overall": 76.31,
795
  "2D": 80.15,
796
  "3D": 72.35,
797
-
798
  "Logical Reasoning": 48.64,
799
-
800
- "Text": 4.60
801
- },
802
- {
803
  "model": "Kolors",
804
  "link": "https://github.com/Kwai-Kolors/Kolors/blob/master/imgs/Kolors_paper.pdf",
805
  "hf": "https://huggingface.co/Kwai-Kolors/Kolors",
806
  "open_source": true,
807
  "release_date": "2024-7",
808
-
809
- "Overall": 45.47,
810
- "Style": 84.40,
811
  "World Knowledge": 77.22,
812
-
813
  "Attribute-Overall": 54.17,
814
- "Quantity": 62.50,
815
  "Expression": 33.33,
816
  "Material": 51.89,
817
- "Size": 62.50,
818
  "Shape": 40.62,
819
  "Color": 83.33,
820
-
821
- "Action-Overall": 48.00,
822
  "Hand": 42.95,
823
  "Full body": 42.39,
824
  "Animal": 56.62,
825
  "Non Contact": 45.92,
826
  "Contact": 39.88,
827
  "State": 59.43,
828
-
829
  "Relationship-Overall": 52.79,
830
  "Composition": 55.41,
831
  "Similarity": 53.89,
832
  "Inclusion": 51.63,
833
  "Comparison": 46.88,
834
-
835
  "Compound-Overall": 33.63,
836
  "Imagination": 41.33,
837
  "Feature matching": 25.78,
838
-
839
  "Grammar-Overall": 46.66,
840
  "Pronoun Reference": 56.62,
841
  "Consistency": 47.22,
842
  "Negation": 35.77,
843
-
844
  "Layout-Overall": 42.91,
845
  "2D": 43.01,
846
- "3D": 42.80,
847
-
848
  "Logical Reasoning": 19.77,
849
-
850
  "Text": 1.15
851
  },
852
- {
853
  "model": "Seedream-4.0",
854
  "link": "https://www.volcengine.com/docs/82379/1541523",
855
  "hf": "-",
856
  "open_source": false,
857
  "release_date": "2025-09",
858
-
859
- "Overall": 87.35,
860
-
861
- "Style": 98.80,
862
-
863
  "World Knowledge": 95.41,
864
-
865
  "Attribute-Overall": 88.57,
866
  "Quantity": 86.81,
867
- "Expression": 85.90,
868
  "Material": 97.17,
869
  "Size": 84.03,
870
  "Shape": 76.88,
871
  "Color": 100.0,
872
-
873
  "Action-Overall": 85.65,
874
  "Hand": 77.56,
875
- "Full body": 87.50,
876
  "Animal": 88.24,
877
- "Non Contact": 80.10,
878
  "Contact": 83.93,
879
  "State": 94.81,
880
-
881
  "Relationship-Overall": 87.69,
882
  "Composition": 88.18,
883
  "Similarity": 80.56,
884
  "Inclusion": 94.02,
885
- "Comparison": 87.50,
886
-
887
  "Compound-Overall": 86.08,
888
  "Imagination": 88.27,
889
  "Feature matching": 83.85,
890
-
891
  "Grammar-Overall": 78.88,
892
  "Pronoun Reference": 84.93,
893
  "Consistency": 79.17,
894
  "Negation": 72.31,
895
-
896
  "Layout-Overall": 90.67,
897
  "2D": 90.81,
898
  "3D": 90.53,
899
-
900
  "Logical Reasoning": 67.73,
901
-
902
  "Text": 93.97
903
- },
904
- {
905
  "model": "Imagen-4.0-generate-preview-06-06",
906
  "link": "https://deepmind.google/models/imagen/",
907
  "hf": "-",
908
  "open_source": false,
909
  "release_date": "2025-01",
910
-
911
  "Overall": 85.84,
912
-
913
- "Style": 97.80,
914
-
915
  "World Knowledge": 96.36,
916
-
917
  "Attribute-Overall": 84.94,
918
  "Quantity": 84.03,
919
  "Expression": 76.92,
@@ -921,193 +754,153 @@
921
  "Size": 89.58,
922
  "Shape": 71.88,
923
  "Color": 98.33,
924
-
925
- "Action-Overall": 88.40,
926
  "Hand": 86.54,
927
  "Full body": 94.02,
928
  "Animal": 88.97,
929
  "Non Contact": 85.71,
930
  "Contact": 83.33,
931
  "State": 91.04,
932
-
933
  "Relationship-Overall": 89.34,
934
  "Composition": 93.58,
935
  "Similarity": 78.89,
936
  "Inclusion": 95.11,
937
  "Comparison": 85.94,
938
-
939
  "Compound-Overall": 85.31,
940
  "Imagination": 90.31,
941
  "Feature matching": 80.21,
942
-
943
  "Grammar-Overall": 79.68,
944
  "Pronoun Reference": 86.76,
945
  "Consistency": 77.31,
946
  "Negation": 74.23,
947
-
948
  "Layout-Overall": 88.81,
949
  "2D": 88.24,
950
  "3D": 89.39,
951
-
952
  "Logical Reasoning": 70.45,
953
-
954
- "Text": 77.30
955
  },
956
- {
957
  "model": "Runway-Gen4-Image",
958
  "link": "https://docs.dev.runwayml.com/api/#tag/Start-generating/paths/~1v1~1text_to_image/post",
959
  "hf": "-",
960
  "open_source": false,
961
  "release_date": "2024-11",
962
-
963
- "Overall": 69.75,
964
-
965
- "Style": 93.44,
966
-
967
  "World Knowledge": 90.36,
968
-
969
- "Attribute-Overall": 74.03,
970
- "Quantity": 72.86,
971
- "Expression": 51.97,
972
- "Material": 89.42,
973
- "Size": 68.06,
974
- "Shape": 65.62,
975
- "Color": 95.00,
976
-
977
- "Action-Overall": 70.21,
978
- "Hand": 62.18,
979
- "Full body": 79.35,
980
- "Animal": 82.35,
981
- "Non Contact": 66.15,
982
- "Contact": 60.37,
983
- "State": 71.70,
984
-
985
- "Relationship-Overall": 72.56,
986
- "Composition": 74.32,
987
- "Similarity": 62.22,
988
- "Inclusion": 77.84,
989
- "Comparison": 75.78,
990
-
991
- "Compound-Overall": 67.76,
992
- "Imagination": 71.65,
993
- "Feature matching": 63.71,
994
-
995
- "Grammar-Overall": 70.08,
996
  "Pronoun Reference": 71.21,
997
- "Consistency": 67.59,
998
- "Negation": 71.03,
999
-
1000
- "Layout-Overall": 76.33,
1001
- "2D": 77.61,
1002
- "3D": 75.00,
1003
-
1004
- "Logical Reasoning": 49.31,
1005
-
1006
- "Text": 33.43
1007
  },
1008
- {
1009
  "model": "Nano Banana",
1010
  "link": "https://ainanobanana.io/",
1011
  "hf": "-",
1012
  "open_source": false,
1013
  "release_date": "2025-08",
1014
-
1015
- "Overall": 87.45,
1016
-
1017
- "Style": 98.87,
1018
-
1019
- "World Knowledge": 96.32,
1020
-
1021
- "Attribute-Overall": 87.84,
1022
- "Quantity": 85.00,
1023
- "Expression": 83.33,
1024
- "Material": 88.50,
1025
- "Size": 95.74,
1026
- "Shape": 78.21,
1027
- "Color": 99.17,
1028
-
1029
- "Action-Overall": 86.83,
1030
- "Hand": 82.05,
1031
- "Full body": 93.41,
1032
- "Animal": 86.03,
1033
- "Non Contact": 82.47,
1034
- "Contact": 83.33,
1035
- "State": 91.98,
1036
-
1037
- "Relationship-Overall": 92.00,
1038
- "Composition": 94.76,
1039
- "Similarity": 86.52,
1040
- "Inclusion": 91.26,
1041
- "Comparison": 94.53,
1042
-
1043
- "Compound-Overall": 87.83,
1044
- "Imagination": 89.66,
1045
- "Feature matching": 86.02,
1046
-
1047
- "Grammar-Overall": 83.36,
1048
- "Pronoun Reference": 90.71,
1049
- "Consistency": 82.08,
1050
- "Negation": 76.59,
1051
-
1052
- "Layout-Overall": 91.96,
1053
- "2D": 92.65,
1054
- "3D": 91.25,
1055
-
1056
- "Logical Reasoning": 74.26,
1057
-
1058
- "Text": 75.22
1059
  },
1060
- {
1061
  "model": "Stability-AI-stable-image-ultra",
1062
  "link": "https://platform.stability.ai/docs/api-reference#tag/Generate/paths/~1v2beta~1stable-image~1generate~1ultra/post",
1063
  "hf": "-",
1064
  "open_source": false,
1065
  "release_date": "2024-06",
1066
-
1067
- "Overall": 61.96,
1068
-
1069
- "Style": 87.20,
1070
-
1071
- "World Knowledge": 87.18,
1072
-
1073
- "Attribute-Overall": 66.35,
1074
  "Quantity": 67.36,
1075
- "Expression": 48.08,
1076
- "Material": 64.15,
1077
  "Size": 69.44,
1078
- "Shape": 64.38,
1079
  "Color": 91.67,
1080
-
1081
- "Action-Overall": 59.22,
1082
- "Hand": 55.77,
1083
- "Full body": 58.15,
1084
- "Animal": 63.24,
1085
- "Non Contact": 61.22,
1086
  "Contact": 51.79,
1087
- "State": 64.15,
1088
-
1089
- "Relationship-Overall": 69.04,
1090
- "Composition": 72.64,
1091
  "Similarity": 66.67,
1092
- "Inclusion": 70.11,
1093
- "Comparison": 62.50,
1094
-
1095
- "Compound-Overall": 54.25,
1096
- "Imagination": 60.97,
1097
- "Feature matching": 47.40,
1098
-
1099
- "Grammar-Overall": 61.10,
1100
- "Pronoun Reference": 78.68,
1101
- "Consistency": 58.33,
1102
- "Negation": 45.00,
1103
-
1104
- "Layout-Overall": 64.55,
1105
- "2D": 67.28,
1106
- "3D": 61.74,
1107
-
1108
- "Logical Reasoning": 31.59,
1109
-
1110
- "Text": 39.08
1111
  },
1112
  {
1113
  "model": "HiDream_v2L",
@@ -1115,50 +908,39 @@
1115
  "hf": "-",
1116
  "open_source": false,
1117
  "release_date": "2025-07",
1118
-
1119
- "Overall": 61.64,
1120
-
1121
- "Style": 87.99,
1122
-
1123
  "World Knowledge": 89.62,
1124
-
1125
- "Attribute-Overall": 64.38,
1126
- "Quantity": 65.71,
1127
- "Expression": 44.87,
1128
  "Material": 57.82,
1129
  "Size": 74.26,
1130
- "Shape": 59.87,
1131
- "Color": 94.92,
1132
-
1133
- "Action-Overall": 59.50,
1134
- "Hand": 51.28,
1135
- "Full body": 58.56,
1136
- "Animal": 67.65,
1137
- "Non Contact": 61.98,
1138
- "Contact": 51.52,
1139
- "State": 65.09,
1140
-
1141
- "Relationship-Overall": 66.62,
1142
- "Composition": 71.23,
1143
- "Similarity": 64.20,
1144
  "Inclusion": 65.93,
1145
- "Comparison": 60.32,
1146
-
1147
  "Compound-Overall": 49.28,
1148
  "Imagination": 53.75,
1149
  "Feature matching": 44.76,
1150
-
1151
- "Grammar-Overall": 58.86,
1152
- "Pronoun Reference": 72.35,
1153
- "Consistency": 60.00,
1154
- "Negation": 44.23,
1155
-
1156
- "Layout-Overall": 69.06,
1157
- "2D": 70.41,
1158
- "3D": 67.68,
1159
-
1160
- "Logical Reasoning": 26.73,
1161
-
1162
  "Text": 44.31
1163
  },
1164
  {
@@ -1167,251 +949,204 @@
1167
  "hf": "-",
1168
  "open_source": false,
1169
  "release_date": "2025-01",
1170
-
1171
- "Overall": 77.75,
1172
-
1173
- "Style": 92.00,
1174
-
1175
- "World Knowledge": 94.78,
1176
-
1177
- "Attribute-Overall": 83.65,
1178
  "Quantity": 77.08,
1179
- "Expression": 75.00,
1180
- "Material": 85.85,
1181
  "Size": 89.58,
1182
- "Shape": 78.75,
1183
- "Color": 98.33,
1184
-
1185
  "Action-Overall": 79.85,
1186
- "Hand": 73.72,
1187
- "Full body": 84.24,
1188
- "Animal": 81.62,
1189
- "Non Contact": 76.53,
1190
- "Contact": 76.79,
1191
- "State": 84.91,
1192
-
1193
- "Relationship-Overall": 82.36,
1194
- "Composition": 83.45,
1195
- "Similarity": 73.89,
1196
- "Inclusion": 89.13,
1197
- "Comparison": 82.03,
1198
-
1199
- "Compound-Overall": 74.10,
1200
- "Imagination": 80.10,
1201
- "Feature matching": 67.97,
1202
-
1203
- "Grammar-Overall": 76.74,
1204
- "Pronoun Reference": 86.03,
1205
- "Consistency": 75.00,
1206
- "Negation": 68.46,
1207
-
1208
- "Layout-Overall": 86.19,
1209
- "2D": 88.24,
1210
- "3D": 84.09,
1211
-
1212
- "Logical Reasoning": 56.36,
1213
-
1214
- "Text": 51.44
1215
  },
1216
- {
1217
  "model": "Recraft",
1218
  "link": "https://www.recraft.ai/docs#generate-image",
1219
  "hf": "-",
1220
  "open_source": false,
1221
  "release_date": "2024-12",
1222
-
1223
- "Overall": 62.63,
1224
- "Style": 87.20,
1225
- "World Knowledge": 90.19,
1226
-
1227
- "Attribute-Overall": 68.16,
1228
- "Quantity": 68.06,
1229
- "Expression": 56.41,
1230
- "Material": 70.75,
1231
  "Size": 65.97,
1232
- "Shape": 57.50,
1233
- "Color": 95.83,
1234
-
1235
- "Action-Overall": 60.55,
1236
- "Hand": 50.00,
1237
  "Full body": 70.65,
1238
- "Animal": 76.47,
1239
- "Non Contact": 55.61,
1240
- "Contact": 48.81,
1241
- "State": 63.21,
1242
-
1243
  "Relationship-Overall": 62.56,
1244
- "Composition": 64.53,
1245
- "Similarity": 59.44,
1246
- "Inclusion": 59.24,
1247
- "Comparison": 67.19,
1248
-
1249
- "Compound-Overall": 44.85,
1250
- "Imagination": 43.37,
1251
- "Feature matching": 46.35,
1252
-
1253
  "Grammar-Overall": 63.64,
1254
- "Pronoun Reference": 73.16,
1255
- "Consistency": 58.33,
1256
- "Negation": 58.08,
1257
-
1258
- "Layout-Overall": 57.84,
1259
- "2D": 58.82,
1260
- "3D": 56.82,
1261
-
1262
- "Logical Reasoning": 29.55,
1263
-
1264
- "Text": 61.78
1265
- },
1266
- {
1267
  "model": "FLUX-kontext-max",
1268
  "link": "https://bfl.ai/models/flux-kontext",
1269
  "hf": "-",
1270
  "open_source": false,
1271
  "release_date": "2025-05",
1272
-
1273
- "Overall": 80.00,
1274
- "Style": 96.59,
1275
- "World Knowledge": 94.19,
1276
-
1277
  "Attribute-Overall": 80.93,
1278
- "Quantity": 75.69,
1279
- "Expression": 74.32,
1280
- "Material": 82.55,
1281
- "Size": 86.81,
1282
- "Shape": 74.38,
1283
  "Color": 94.17,
1284
-
1285
- "Action-Overall": 77.38,
1286
  "Hand": 67.95,
1287
- "Full body": 83.15,
1288
  "Animal": 77.94,
1289
- "Non Contact": 77.04,
1290
- "Contact": 70.83,
1291
  "State": 84.43,
1292
-
1293
- "Relationship-Overall": 85.08,
1294
- "Composition": 87.50,
1295
- "Similarity": 78.89,
1296
- "Inclusion": 90.00,
1297
- "Comparison": 81.25,
1298
-
1299
- "Compound-Overall": 78.99,
1300
- "Imagination": 83.93,
1301
- "Feature matching": 73.96,
1302
-
1303
- "Grammar-Overall": 78.53,
1304
- "Pronoun Reference": 84.23,
1305
- "Consistency": 78.70,
1306
- "Negation": 72.69,
1307
-
1308
  "Layout-Overall": 85.04,
1309
  "2D": 86.74,
1310
- "3D": 88.33,
1311
-
1312
- "Logical Reasoning": 61.36,
1313
-
1314
- "Text": 61.92
1315
- },
1316
- {
1317
  "model": "FLUX-kontext-pro",
1318
  "link": "https://bfl.ai/models/flux-kontext",
1319
  "hf": "-",
1320
  "open_source": false,
1321
  "release_date": "2025-05",
1322
-
1323
- "Overall": 75.84,
1324
- "Style": 94.78,
1325
- "World Knowledge": 91.61,
1326
-
1327
- "Attribute-Overall": 79.20,
1328
- "Quantity": 75.00,
1329
- "Expression": 71.62,
1330
- "Material": 76.89,
1331
- "Size": 84.72,
1332
- "Shape": 74.38,
1333
- "Color": 97.50,
1334
-
1335
- "Action-Overall": 77.66,
1336
- "Hand": 75.00,
1337
- "Full body": 79.35,
1338
- "Animal": 80.88,
1339
- "Non Contact": 71.94,
1340
- "Contact": 73.21,
1341
- "State": 84.91,
1342
-
1343
- "Relationship-Overall": 79.34,
1344
- "Composition": 81.42,
1345
- "Similarity": 75.56,
1346
- "Inclusion": 83.33,
1347
- "Comparison": 74.22,
1348
-
1349
- "Compound-Overall": 72.68,
1350
- "Imagination": 75.00,
1351
- "Feature matching": 70.31,
1352
-
1353
- "Grammar-Overall": 72.69,
1354
- "Pronoun Reference": 84.23,
1355
- "Consistency": 76.85,
1356
- "Negation": 57.69,
1357
-
1358
- "Layout-Overall": 84.47,
1359
- "2D": 85.98,
1360
- "3D": 82.95,
1361
-
1362
- "Logical Reasoning": 55.68,
1363
-
1364
- "Text": 50.29
1365
- },
1366
- {
1367
  "model": "wan2.2-t2i-plus",
1368
  "link": "https://help.aliyun.com/zh/model-studio/text-to-image-v2-api-reference",
1369
  "hf": "-",
1370
  "open_source": false,
1371
  "release_date": "2025-07",
1372
-
1373
- "Overall": 64.82,
1374
- "Style": 91.10,
1375
- "World Knowledge": 87.34,
1376
-
1377
- "Attribute-Overall": 70.19,
1378
- "Quantity": 76.39,
1379
  "Expression": 55.77,
1380
- "Material": 66.51,
1381
- "Size": 71.53,
1382
- "Shape": 64.38,
1383
- "Color": 94.17,
1384
-
1385
- "Action-Overall": 68.00,
1386
- "Hand": 58.33,
1387
- "Full body": 75.82,
1388
  "Animal": 69.12,
1389
  "Non Contact": 68.88,
1390
- "Contact": 57.74,
1391
- "State": 75.00,
1392
-
1393
- "Relationship-Overall": 73.03,
1394
  "Composition": 70.27,
1395
- "Similarity": 67.98,
1396
- "Inclusion": 77.72,
1397
- "Comparison": 76.69,
1398
-
1399
- "Compound-Overall": 61.37,
1400
- "Imagination": 66.92,
1401
- "Feature matching": 55.73,
1402
-
1403
- "Grammar-Overall": 66.53,
1404
- "Pronoun Reference": 73.90,
1405
- "Consistency": 56.74,
1406
- "Negation": 66.92,
1407
-
1408
- "Layout-Overall": 74.77,
1409
- "2D": 77.49,
1410
  "3D": 71.97,
1411
-
1412
  "Logical Reasoning": 42.05,
1413
-
1414
- "Text": 13.83
1415
  },
1416
  {
1417
  "model": "FLUX.1-dev",
@@ -1419,48 +1154,39 @@
1419
  "hf": "https://huggingface.co/black-forest-labs/FLUX.1-dev",
1420
  "open_source": true,
1421
  "release_date": "2024-08",
1422
-
1423
- "Overall": 61.30,
1424
- "Style": 83.90,
1425
- "World Knowledge": 88.92,
1426
-
1427
- "Attribute-Overall": 67.84,
1428
- "Quantity": 72.22,
1429
- "Expression": 53.85,
1430
  "Material": 58.96,
1431
- "Size": 75.00,
1432
- "Shape": 65.00,
1433
- "Color": 91.67,
1434
-
1435
- "Action-Overall": 62.17,
1436
- "Hand": 51.28,
1437
- "Full body": 67.39,
1438
- "Animal": 69.85,
1439
- "Non Contact": 59.69,
1440
- "Contact": 58.93,
1441
- "State": 65.57,
1442
-
1443
- "Relationship-Overall": 67.26,
1444
- "Composition": 62.50,
1445
- "Similarity": 66.67,
1446
  "Inclusion": 72.83,
1447
- "Comparison": 62.50,
1448
-
1449
- "Compound-Overall": 47.04,
1450
- "Imagination": 47.96,
1451
- "Feature matching": 46.09,
1452
-
1453
- "Grammar-Overall": 60.96,
1454
- "Pronoun Reference": 73.16,
1455
- "Consistency": 63.43,
1456
- "Negation": 46.15,
1457
-
1458
- "Layout-Overall": 71.83,
1459
- "2D": 74.26,
1460
- "3D": 69.32,
1461
-
1462
- "Logical Reasoning": 30.91,
1463
-
1464
  "Text": 32.18
1465
  },
1466
  {
@@ -1469,49 +1195,40 @@
1469
  "hf": "https://huggingface.co/HiDream-ai/HiDream-I1-Full",
1470
  "open_source": true,
1471
  "release_date": "2025-05",
1472
-
1473
- "Overall": 71.81,
1474
- "Style": 92.50,
1475
- "World Knowledge": 94.15,
1476
-
1477
- "Attribute-Overall": 72.97,
1478
  "Quantity": 73.61,
1479
- "Expression": 59.62,
1480
  "Material": 72.17,
1481
  "Size": 79.17,
1482
- "Shape": 61.88,
1483
  "Color": 98.33,
1484
-
1485
- "Action-Overall": 73.00,
1486
- "Hand": 62.18,
1487
  "Full body": 76.09,
1488
- "Animal": 73.53,
1489
- "Non Contact": 74.49,
1490
- "Contact": 70.24,
1491
  "State": 78.77,
1492
-
1493
- "Relationship-Overall": 75.38,
1494
- "Composition": 79.05,
1495
- "Similarity": 68.33,
1496
  "Inclusion": 78.26,
1497
- "Comparison": 72.66,
1498
-
1499
- "Compound-Overall": 62.63,
1500
- "Imagination": 64.29,
1501
- "Feature matching": 60.94,
1502
-
1503
- "Grammar-Overall": 63.24,
1504
- "Pronoun Reference": 83.09,
1505
- "Consistency": 65.74,
1506
- "Negation": 40.38,
1507
-
1508
- "Layout-Overall": 78.17,
1509
  "2D": 82.72,
1510
- "3D": 73.48,
1511
-
1512
- "Logical Reasoning": 41.14,
1513
-
1514
- "Text": 64.94
1515
  },
1516
  {
1517
  "model": "Pref-GRPO",
@@ -1519,49 +1236,40 @@
1519
  "hf": "https://huggingface.co/CodeGoat24/FLUX.1-dev-PrefGRPO",
1520
  "open_source": true,
1521
  "release_date": "2025-08",
1522
-
1523
- "Overall": 69.46,
1524
- "Style": 88.40,
1525
- "World Knowledge": 90.35,
1526
-
1527
- "Attribute-Overall": 75.00,
1528
- "Quantity": 71.53,
1529
- "Expression": 60.90,
1530
- "Material": 73.11,
1531
- "Size": 77.08,
1532
- "Shape": 74.38,
1533
- "Color": 99.17,
1534
-
1535
- "Action-Overall": 69.77,
1536
- "Hand": 60.90,
1537
- "Full body": 72.28,
1538
- "Animal": 77.21,
1539
- "Non Contact": 68.37,
1540
- "Contact": 64.88,
1541
- "State": 74.53,
1542
-
1543
- "Relationship-Overall": 76.52,
1544
- "Composition": 81.42,
1545
  "Similarity": 76.67,
1546
- "Inclusion": 76.09,
1547
  "Comparison": 65.62,
1548
-
1549
- "Compound-Overall": 63.27,
1550
- "Imagination": 65.56,
1551
- "Feature matching": 60.94,
1552
-
1553
- "Grammar-Overall": 62.43,
1554
- "Pronoun Reference": 79.04,
1555
- "Consistency": 66.20,
1556
- "Negation": 41.92,
1557
-
1558
- "Layout-Overall": 77.61,
1559
- "2D": 82.35,
1560
- "3D": 72.73,
1561
-
1562
- "Logical Reasoning": 47.13,
1563
-
1564
- "Text": 47.13
1565
  },
1566
  {
1567
  "model": "SD-3.5-Large",
@@ -1569,49 +1277,40 @@
1569
  "hf": "https://huggingface.co/stabilityai/stable-diffusion-3.5-large",
1570
  "open_source": true,
1571
  "release_date": "2024-10",
1572
-
1573
- "Overall": 62.99,
1574
- "Style": 88.60,
1575
- "World Knowledge": 88.92,
1576
-
1577
- "Attribute-Overall": 68.59,
1578
- "Quantity": 71.53,
1579
- "Expression": 51.92,
1580
- "Material": 68.87,
1581
- "Size": 68.06,
1582
- "Shape": 65.62,
1583
- "Color": 90.83,
1584
-
1585
- "Action-Overall": 62.17,
1586
- "Hand": 57.05,
1587
- "Full body": 61.96,
1588
- "Animal": 63.24,
1589
- "Non Contact": 62.24,
1590
- "Contact": 59.52,
1591
- "State": 67.45,
1592
-
1593
- "Relationship-Overall": 69.80,
1594
- "Composition": 75.34,
1595
- "Similarity": 68.33,
1596
- "Inclusion": 68.48,
1597
- "Comparison": 60.94,
1598
-
1599
- "Compound-Overall": 58.76,
1600
- "Imagination": 64.80,
1601
- "Feature matching": 52.60,
1602
-
1603
- "Grammar-Overall": 58.96,
1604
- "Pronoun Reference": 74.63,
1605
- "Consistency": 61.11,
1606
- "Negation": 40.77,
1607
-
1608
- "Layout-Overall": 69.03,
1609
  "2D": 70.96,
1610
- "3D": 67.05,
1611
-
1612
- "Logical Reasoning": 32.27,
1613
-
1614
- "Text": 32.76
1615
  },
1616
  {
1617
  "model": "Janus-Pro",
@@ -1619,48 +1318,40 @@
1619
  "hf": "https://huggingface.co/deepseek-ai/Janus-Pro-7B",
1620
  "open_source": true,
1621
  "release_date": "2025-01",
1622
-
1623
- "Overall": 61.61,
1624
- "Style": 90.80,
1625
- "World Knowledge": 86.71,
1626
- "Attribute-Overall": 67.74,
1627
  "Quantity": 56.25,
1628
- "Expression": 55.77,
1629
- "Material": 71.70,
1630
  "Size": 73.61,
1631
  "Shape": 61.88,
1632
  "Color": 90.83,
1633
-
1634
- "Action-Overall": 64.26,
1635
- "Hand": 50.64,
1636
- "Full body": 63.04,
1637
- "Animal": 75.00,
1638
- "Non Contact": 62.24,
1639
- "Contact": 56.55,
1640
- "State": 76.42,
1641
-
1642
- "Relationship-Overall": 68.40,
1643
  "Composition": 76.01,
1644
- "Similarity": 56.11,
1645
- "Inclusion": 75.00,
1646
- "Comparison": 58.59,
1647
-
1648
- "Compound-Overall": 62.11,
1649
- "Imagination": 69.64,
1650
- "Feature matching": 54.43,
1651
-
1652
- "Grammar-Overall": 64.44,
1653
- "Pronoun Reference": 75.37,
1654
- "Consistency": 66.20,
1655
- "Negation": 51.54,
1656
-
1657
- "Layout-Overall": 72.01,
1658
  "2D": 74.63,
1659
- "3D": 69.32,
1660
-
1661
- "Logical Reasoning": 37.05,
1662
-
1663
- "Text": 2.59
1664
  },
1665
  {
1666
  "model": "Show-o2",
@@ -1668,47 +1359,39 @@
1668
  "hf": "https://huggingface.co/showlab/show-o2-7B",
1669
  "open_source": true,
1670
  "release_date": "2025-06",
1671
-
1672
- "Overall": 62.73,
1673
- "Style": 87.20,
1674
- "World Knowledge": 86.08,
1675
- "Attribute-Overall": 70.51,
1676
  "Quantity": 59.03,
1677
- "Expression": 63.46,
1678
- "Material": 73.58,
1679
- "Size": 72.92,
1680
- "Shape": 63.12,
1681
- "Color": 95.00,
1682
-
1683
- "Action-Overall": 69.58,
1684
- "Hand": 56.41,
1685
- "Full body": 77.72,
1686
- "Animal": 72.79,
1687
- "Non Contact": 70.41,
1688
- "Contact": 52.38,
1689
- "State": 83.02,
1690
-
1691
- "Relationship-Overall": 70.18,
1692
- "Composition": 79.05,
1693
- "Similarity": 61.11,
1694
- "Inclusion": 70.11,
1695
- "Comparison": 62.50,
1696
-
1697
- "Compound-Overall": 64.69,
1698
- "Imagination": 69.90,
1699
- "Feature matching": 59.38,
1700
-
1701
- "Grammar-Overall": 61.63,
1702
- "Pronoun Reference": 75.37,
1703
- "Consistency": 65.28,
1704
- "Negation": 44.23,
1705
-
1706
- "Layout-Overall": 75.37,
1707
- "2D": 77.94,
1708
- "3D": 72.73,
1709
-
1710
- "Logical Reasoning": 40.91,
1711
-
1712
  "Text": 1.15
1713
  },
1714
  {
@@ -1717,47 +1400,39 @@
1717
  "hf": "https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT",
1718
  "open_source": true,
1719
  "release_date": "2025-05",
1720
-
1721
- "Overall": 61.53,
1722
- "Style": 90.20,
1723
- "World Knowledge": 85.60,
1724
- "Attribute-Overall": 67.74,
1725
- "Quantity": 59.03,
1726
- "Expression": 50.00,
1727
- "Material": 72.64,
1728
- "Size": 76.39,
1729
- "Shape": 59.38,
1730
- "Color": 93.33,
1731
-
1732
- "Action-Overall": 61.98,
1733
- "Hand": 52.56,
1734
- "Full body": 60.87,
1735
- "Animal": 69.12,
1736
- "Non Contact": 62.24,
1737
- "Contact": 58.93,
1738
- "State": 67.45,
1739
-
1740
- "Relationship-Overall": 70.69,
1741
- "Composition": 76.35,
1742
- "Similarity": 70.56,
1743
- "Inclusion": 69.57,
1744
- "Comparison": 59.38,
1745
-
1746
- "Compound-Overall": 58.12,
1747
- "Imagination": 67.35,
1748
- "Feature matching": 48.70,
1749
-
1750
- "Grammar-Overall": 66.44,
1751
- "Pronoun Reference": 71.69,
1752
- "Consistency": 68.52,
1753
- "Negation": 59.23,
1754
-
1755
- "Layout-Overall": 76.49,
1756
- "2D": 79.04,
1757
- "3D": 73.86,
1758
-
1759
- "Logical Reasoning": 30.23,
1760
-
1761
  "Text": 7.76
1762
  },
1763
  {
@@ -1766,47 +1441,39 @@
1766
  "hf": "https://huggingface.co/BLIP3o/BLIP3o-Model-8B",
1767
  "open_source": true,
1768
  "release_date": "2025-05",
1769
-
1770
- "Overall": 59.87,
1771
- "Style": 92.80,
1772
- "World Knowledge": 80.22,
1773
- "Attribute-Overall": 63.89,
1774
- "Quantity": 51.39,
1775
- "Expression": 60.26,
1776
- "Material": 64.62,
1777
- "Size": 75.00,
1778
- "Shape": 54.37,
1779
- "Color": 81.67,
1780
-
1781
- "Action-Overall": 63.97,
1782
- "Hand": 58.33,
1783
- "Full body": 70.11,
1784
- "Animal": 70.59,
1785
- "Non Contact": 60.20,
1786
- "Contact": 51.79,
1787
- "State": 71.70,
1788
-
1789
- "Relationship-Overall": 66.50,
1790
- "Composition": 70.61,
1791
- "Similarity": 60.00,
1792
- "Inclusion": 67.39,
1793
  "Comparison": 64.84,
1794
-
1795
- "Compound-Overall": 53.74,
1796
- "Imagination": 61.73,
1797
  "Feature matching": 45.57,
1798
-
1799
- "Grammar-Overall": 68.58,
1800
- "Pronoun Reference": 79.04,
1801
- "Consistency": 61.11,
1802
- "Negation": 63.85,
1803
-
1804
- "Layout-Overall": 68.47,
1805
- "2D": 72.79,
1806
- "3D": 64.02,
1807
-
1808
- "Logical Reasoning": 39.55,
1809
-
1810
  "Text": 1.15
1811
  },
1812
  {
@@ -1815,48 +1482,40 @@
1815
  "hf": "https://huggingface.co/zai-org/CogView4-6B",
1816
  "open_source": true,
1817
  "release_date": "2024-03",
1818
-
1819
- "Overall": 56.30,
1820
- "Style": 82.00,
1821
- "World Knowledge": 83.07,
1822
- "Attribute-Overall": 63.25,
1823
- "Quantity": 71.53,
1824
- "Expression": 44.23,
1825
- "Material": 55.19,
1826
- "Size": 72.22,
1827
- "Shape": 57.50,
1828
- "Color": 89.17,
1829
-
1830
- "Action-Overall": 57.51,
1831
- "Hand": 53.85,
1832
  "Full body": 59.78,
1833
- "Animal": 68.38,
1834
- "Non Contact": 50.51,
1835
- "Contact": 51.19,
1836
- "State": 62.74,
1837
-
1838
- "Relationship-Overall": 62.44,
1839
- "Composition": 60.47,
1840
- "Similarity": 60.00,
1841
- "Inclusion": 69.57,
1842
- "Comparison": 60.16,
1843
-
1844
- "Compound-Overall": 44.72,
1845
- "Imagination": 47.19,
1846
- "Feature matching": 42.19,
1847
-
1848
  "Grammar-Overall": 54.81,
1849
  "Pronoun Reference": 69.49,
1850
- "Consistency": 56.02,
1851
- "Negation": 38.46,
1852
-
1853
- "Layout-Overall": 69.22,
1854
- "2D": 77.21,
1855
  "3D": 60.98,
1856
-
1857
- "Logical Reasoning": 28.18,
1858
-
1859
- "Text": 17.82
1860
  },
1861
  {
1862
  "model": "Hunyuan-DiT",
@@ -1864,51 +1523,40 @@
1864
  "hf": "https://huggingface.co/Tencent-Hunyuan/HunyuanDiT",
1865
  "open_source": true,
1866
  "release_date": "2024-05",
1867
-
1868
- "Overall": 51.38,
1869
-
1870
- "Style": 94.10,
1871
-
1872
- "World Knowledge": 80.70,
1873
-
1874
- "Attribute-Overall": 62.71,
1875
- "Quantity": 67.36,
1876
- "Expression": 44.23,
1877
- "Material": 71.70,
1878
- "Size": 61.81,
1879
- "Shape": 47.50,
1880
  "Color": 86.67,
1881
-
1882
- "Action-Overall": 49.05,
1883
- "Hand": 35.90,
1884
- "Full body": 54.89,
1885
- "Animal": 54.41,
1886
- "Non Contact": 46.94,
1887
- "Contact": 35.71,
1888
- "State": 62.74,
1889
-
1890
- "Relationship-Overall": 59.64,
1891
- "Composition": 60.14,
1892
- "Similarity": 64.44,
1893
- "Inclusion": 60.33,
1894
  "Comparison": 50.78,
1895
-
1896
- "Compound-Overall": 41.62,
1897
- "Imagination": 46.68,
1898
- "Feature matching": 36.46,
1899
-
1900
- "Grammar-Overall": 55.48,
1901
- "Pronoun Reference": 62.87,
1902
- "Consistency": 57.87,
1903
- "Negation": 45.77,
1904
-
1905
- "Layout-Overall": 44.78,
1906
- "2D": 39.34,
1907
- "3D": 50.38,
1908
-
1909
- "Logical Reasoning": 24.55,
1910
-
1911
- "Text": 1.15
1912
  },
1913
  {
1914
  "model": "Janus",
@@ -1916,50 +1564,39 @@
1916
  "hf": "https://huggingface.co/deepseek-ai/Janus-1.3B",
1917
  "open_source": true,
1918
  "release_date": "2024-10",
1919
-
1920
- "Overall": 51.23,
1921
-
1922
- "Style": 89.90,
1923
-
1924
- "World Knowledge": 73.58,
1925
-
1926
- "Attribute-Overall": 54.81,
1927
- "Quantity": 37.50,
1928
- "Expression": 37.82,
1929
- "Material": 58.96,
1930
- "Size": 65.97,
1931
- "Shape": 47.50,
1932
- "Color": 86.67,
1933
-
1934
- "Action-Overall": 50.38,
1935
- "Hand": 32.69,
1936
  "Full body": 51.63,
1937
- "Animal": 61.76,
1938
- "Non Contact": 48.47,
1939
- "Contact": 38.10,
1940
- "State": 66.51,
1941
-
1942
- "Relationship-Overall": 55.08,
1943
- "Composition": 56.76,
1944
- "Similarity": 53.89,
1945
- "Inclusion": 59.24,
1946
- "Comparison": 46.88,
1947
-
1948
- "Compound-Overall": 46.65,
1949
- "Imagination": 58.16,
1950
- "Feature matching": 34.90,
1951
-
1952
- "Grammar-Overall": 59.09,
1953
- "Pronoun Reference": 66.18,
1954
- "Consistency": 51.39,
1955
- "Negation": 58.08,
1956
-
1957
- "Layout-Overall": 54.85,
1958
- "2D": 57.72,
1959
- "3D": 51.89,
1960
-
1961
- "Logical Reasoning": 26.82,
1962
-
1963
  "Text": 1.15
1964
  },
1965
  {
@@ -1968,51 +1605,40 @@
1968
  "hf": "https://huggingface.co/deepseek-ai/JanusFlow-1.3B",
1969
  "open_source": true,
1970
  "release_date": "2024-11",
1971
-
1972
- "Overall": 46.39,
1973
-
1974
- "Style": 86.20,
1975
-
1976
- "World Knowledge": 62.50,
1977
-
1978
- "Attribute-Overall": 47.97,
1979
- "Quantity": 43.06,
1980
- "Expression": 30.77,
1981
- "Material": 55.19,
1982
  "Size": 55.56,
1983
- "Shape": 30.00,
1984
- "Color": 78.33,
1985
-
1986
- "Action-Overall": 43.35,
1987
- "Hand": 23.08,
1988
- "Full body": 48.37,
1989
- "Animal": 58.82,
1990
- "Non Contact": 36.73,
1991
- "Contact": 36.31,
1992
- "State": 55.66,
1993
-
1994
- "Relationship-Overall": 50.00,
1995
- "Composition": 59.80,
1996
- "Similarity": 38.89,
1997
- "Inclusion": 51.63,
1998
- "Comparison": 40.62,
1999
-
2000
- "Compound-Overall": 45.10,
2001
- "Imagination": 57.65,
2002
- "Feature matching": 32.29,
2003
-
2004
- "Grammar-Overall": 60.29,
2005
- "Pronoun Reference": 66.18,
2006
- "Consistency": 48.61,
2007
- "Negation": 63.85,
2008
-
2009
- "Layout-Overall": 46.46,
2010
- "2D": 49.26,
2011
- "3D": 43.56,
2012
-
2013
- "Logical Reasoning": 21.14,
2014
-
2015
- "Text": 0.86
2016
  },
2017
  {
2018
  "model": "Emu3",
@@ -2020,50 +1646,39 @@
2020
  "hf": "https://huggingface.co/BAAI/Emu3-Gen",
2021
  "open_source": true,
2022
  "release_date": "2024-09",
2023
-
2024
- "Overall": 46.02,
2025
-
2026
- "Style": 86.80,
2027
-
2028
- "World Knowledge": 77.06,
2029
-
2030
- "Attribute-Overall": 51.39,
2031
- "Quantity": 44.44,
2032
  "Expression": 45.51,
2033
- "Material": 53.77,
2034
- "Size": 43.06,
2035
  "Shape": 46.25,
2036
- "Color": 80.00,
2037
-
2038
- "Action-Overall": 40.11,
2039
- "Hand": 25.00,
2040
- "Full body": 47.28,
2041
- "Animal": 50.74,
2042
- "Non Contact": 35.20,
2043
- "Contact": 27.98,
2044
- "State": 52.36,
2045
-
2046
- "Relationship-Overall": 49.75,
2047
- "Composition": 56.76,
2048
- "Similarity": 46.67,
2049
- "Inclusion": 48.37,
2050
- "Comparison": 39.84,
2051
-
2052
- "Compound-Overall": 36.86,
2053
  "Imagination": 41.33,
2054
- "Feature matching": 32.29,
2055
-
2056
- "Grammar-Overall": 52.94,
2057
- "Pronoun Reference": 59.56,
2058
- "Consistency": 53.70,
2059
- "Negation": 45.38,
2060
-
2061
- "Layout-Overall": 44.78,
2062
- "2D": 45.22,
2063
- "3D": 44.32,
2064
-
2065
  "Logical Reasoning": 19.32,
2066
-
2067
  "Text": 1.15
2068
  },
2069
  {
@@ -2072,50 +1687,39 @@
2072
  "hf": "https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic",
2073
  "open_source": true,
2074
  "release_date": "2024-02",
2075
-
2076
- "Overall": 45.61,
2077
-
2078
- "Style": 89.50,
2079
-
2080
- "World Knowledge": 76.11,
2081
-
2082
- "Attribute-Overall": 52.78,
2083
- "Quantity": 58.33,
2084
- "Expression": 43.59,
2085
- "Material": 57.08,
2086
- "Size": 44.44,
2087
- "Shape": 41.25,
2088
- "Color": 75.83,
2089
-
2090
- "Action-Overall": 42.68,
2091
  "Hand": 28.85,
2092
- "Full body": 50.00,
2093
- "Animal": 52.21,
2094
- "Non Contact": 35.20,
2095
- "Contact": 29.17,
2096
- "State": 58.02,
2097
-
2098
- "Relationship-Overall": 51.52,
2099
- "Composition": 60.14,
2100
- "Similarity": 49.44,
2101
- "Inclusion": 48.37,
2102
- "Comparison": 39.06,
2103
-
2104
- "Compound-Overall": 35.44,
2105
- "Imagination": 43.88,
2106
- "Feature matching": 26.82,
2107
-
2108
- "Grammar-Overall": 53.21,
2109
  "Pronoun Reference": 58.82,
2110
- "Consistency": 50.00,
2111
- "Negation": 50.00,
2112
-
2113
- "Layout-Overall": 37.13,
2114
- "2D": 34.56,
2115
- "3D": 39.77,
2116
-
2117
- "Logical Reasoning": 16.59,
2118
-
2119
  "Text": 1.15
2120
  },
2121
  {
@@ -2124,51 +1728,40 @@
2124
  "hf": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
2125
  "open_source": true,
2126
  "release_date": "2023-07",
2127
-
2128
- "Overall": 39.75,
2129
-
2130
- "Style": 87.40,
2131
-
2132
- "World Knowledge": 72.63,
2133
-
2134
- "Attribute-Overall": 44.34,
2135
- "Quantity": 44.44,
2136
- "Expression": 25.00,
2137
- "Material": 52.83,
2138
- "Size": 44.44,
2139
- "Shape": 33.75,
2140
- "Color": 68.33,
2141
-
2142
- "Action-Overall": 34.22,
2143
- "Hand": 19.23,
2144
- "Full body": 35.33,
2145
- "Animal": 43.38,
2146
- "Non Contact": 26.53,
2147
- "Contact": 24.40,
2148
- "State": 53.30,
2149
-
2150
- "Relationship-Overall": 44.92,
2151
  "Composition": 53.72,
2152
- "Similarity": 38.33,
2153
- "Inclusion": 39.67,
2154
- "Comparison": 41.41,
2155
-
2156
- "Compound-Overall": 26.68,
2157
- "Imagination": 33.93,
2158
- "Feature matching": 19.27,
2159
-
2160
- "Grammar-Overall": 47.33,
2161
- "Pronoun Reference": 50.37,
2162
  "Consistency": 42.59,
2163
- "Negation": 48.08,
2164
-
2165
  "Layout-Overall": 29.85,
2166
- "2D": 26.47,
2167
- "3D": 33.33,
2168
-
2169
- "Logical Reasoning": 9.55,
2170
-
2171
- "Text": 1.15
2172
  },
2173
  {
2174
  "model": "GPT-4o",
@@ -2176,50 +1769,39 @@
2176
  "hf": "-",
2177
  "open_source": false,
2178
  "release_date": "2025-03",
2179
-
2180
- "Overall": 92.77,
2181
-
2182
- "Style": 98.57,
2183
-
2184
- "World Knowledge": 98.87,
2185
-
2186
- "Attribute-Overall": 93.59,
2187
- "Quantity": 90.00,
2188
- "Expression": 94.70,
2189
- "Material": 94.20,
2190
- "Size": 91.61,
2191
- "Shape": 92.50,
2192
  "Color": 99.17,
2193
-
2194
- "Action-Overall": 90.79,
2195
- "Hand": 89.74,
2196
- "Full body": 92.22,
2197
- "Animal": 87.12,
2198
- "Non Contact": 90.43,
2199
- "Contact": 89.82,
2200
- "State": 93.75,
2201
-
2202
- "Relationship-Overall": 94.97,
2203
- "Composition": 96.23,
2204
- "Similarity": 95.00,
2205
  "Inclusion": 94.89,
2206
- "Comparison": 92.19,
2207
-
2208
- "Compound-Overall": 93.55,
2209
- "Imagination": 95.64,
2210
- "Feature matching": 91.40,
2211
-
2212
- "Grammar-Overall": 91.76,
2213
- "Pronoun Reference": 92.91,
2214
- "Consistency": 91.67,
2215
- "Negation": 90.57,
2216
-
2217
  "Layout-Overall": 91.35,
2218
- "2D": 91.04,
2219
- "3D": 91.67,
2220
-
2221
- "Logical Reasoning": 84.97,
2222
-
2223
  "Text": 89.24
2224
  },
2225
  {
@@ -2228,51 +1810,40 @@
2228
  "hf": "-",
2229
  "open_source": false,
2230
  "release_date": "2025-06",
2231
-
2232
- "Overall": 91.54,
2233
-
2234
- "Style": 99.20,
2235
-
2236
- "World Knowledge": 97.47,
2237
-
2238
- "Attribute-Overall": 92.52,
2239
- "Quantity": 93.06,
2240
- "Expression": 81.41,
2241
- "Material": 94.34,
2242
- "Size": 95.83,
2243
- "Shape": 91.88,
2244
  "Color": 100.0,
2245
-
2246
- "Action-Overall": 92.20,
2247
- "Hand": 90.38,
2248
- "Full body": 93.44,
2249
- "Animal": 91.91,
2250
- "Non Contact": 90.31,
2251
- "Contact": 89.29,
2252
- "State": 96.70,
2253
-
2254
- "Relationship-Overall": 93.02,
2255
- "Composition": 95.27,
2256
- "Similarity": 84.44,
2257
- "Inclusion": 98.37,
2258
- "Comparison": 92.19,
2259
-
2260
  "Compound-Overall": 91.37,
2261
  "Imagination": 92.86,
2262
  "Feature matching": 89.84,
2263
-
2264
- "Grammar-Overall": 87.97,
2265
- "Pronoun Reference": 94.12,
2266
- "Consistency": 87.04,
2267
- "Negation": 82.31,
2268
-
2269
- "Layout-Overall": 93.10,
2270
- "2D": 92.65,
2271
- "3D": 93.56,
2272
-
2273
- "Logical Reasoning": 79.55,
2274
-
2275
- "Text": 89.08
2276
  },
2277
  {
2278
  "model": "Seedream-3.0",
@@ -2280,51 +1851,40 @@
2280
  "hf": "-",
2281
  "open_source": false,
2282
  "release_date": "2025-06",
2283
-
2284
- "Overall": 78.95,
2285
-
2286
- "Style": 98.10,
2287
-
2288
- "World Knowledge": 95.25,
2289
-
2290
- "Attribute-Overall": 85.58,
2291
- "Quantity": 80.56,
2292
- "Expression": 82.05,
2293
- "Material": 90.57,
2294
- "Size": 85.42,
2295
- "Shape": 78.12,
2296
- "Color": 97.50,
2297
-
2298
- "Action-Overall": 82.98,
2299
- "Hand": 75.00,
2300
- "Full body": 89.67,
2301
- "Animal": 85.29,
2302
- "Non Contact": 75.51,
2303
- "Contact": 80.95,
2304
- "State": 90.09,
2305
-
2306
- "Relationship-Overall": 80.84,
2307
- "Composition": 82.77,
2308
- "Similarity": 73.89,
2309
- "Inclusion": 84.24,
2310
- "Comparison": 81.25,
2311
-
2312
- "Compound-Overall": 73.84,
2313
- "Imagination": 78.57,
2314
- "Feature matching": 69.01,
2315
-
2316
- "Grammar-Overall": 61.36,
2317
- "Pronoun Reference": 79.78,
2318
- "Consistency": 69.91,
2319
- "Negation": 35.00,
2320
-
2321
- "Layout-Overall": 87.31,
2322
- "2D": 86.76,
2323
- "3D": 87.88,
2324
-
2325
- "Logical Reasoning": 52.73,
2326
-
2327
- "Text": 71.55
2328
  },
2329
  {
2330
  "model": "Imagen-3.0-generate-002",
@@ -2332,51 +1892,40 @@
2332
  "hf": "-",
2333
  "open_source": false,
2334
  "release_date": "2025-02",
2335
-
2336
- "Overall": 71.85,
2337
-
2338
- "Style": 89.25,
2339
-
2340
- "World Knowledge": 94.75,
2341
-
2342
- "Attribute-Overall": 77.33,
2343
- "Quantity": 75.78,
2344
- "Expression": 64.67,
2345
- "Material": 80.66,
2346
- "Size": 82.84,
2347
- "Shape": 70.00,
2348
- "Color": 93.10,
2349
-
2350
- "Action-Overall": 81.46,
2351
- "Hand": 80.00,
2352
  "Full body": 83.89,
2353
- "Animal": 85.29,
2354
- "Non Contact": 77.37,
2355
- "Contact": 74.40,
2356
- "State": 87.38,
2357
-
2358
- "Relationship-Overall": 82.86,
2359
- "Composition": 83.90,
2360
- "Similarity": 73.33,
2361
- "Inclusion": 88.64,
2362
- "Comparison": 83.90,
2363
-
2364
- "Compound-Overall": 71.71,
2365
- "Imagination": 79.23,
2366
- "Feature matching": 64.06,
2367
-
2368
- "Grammar-Overall": 69.84,
2369
- "Pronoun Reference": 79.04,
2370
- "Consistency": 70.75,
2371
- "Negation": 59.13,
2372
-
2373
- "Layout-Overall": 81.34,
2374
- "2D": 82.72,
2375
- "3D": 79.92,
2376
-
2377
- "Logical Reasoning": 48.36,
2378
-
2379
- "Text": 21.55
2380
  },
2381
  {
2382
  "model": "DALL-E-3",
@@ -2384,51 +1933,40 @@
2384
  "hf": "-",
2385
  "open_source": false,
2386
  "release_date": "2023-09",
2387
-
2388
- "Overall": 69.18,
2389
-
2390
- "Style": 95.06,
2391
-
2392
- "World Knowledge": 93.51,
2393
-
2394
- "Attribute-Overall": 75.97,
2395
- "Quantity": 62.14,
2396
- "Expression": 59.87,
2397
- "Material": 87.74,
2398
- "Size": 87.50,
2399
- "Shape": 65.00,
2400
- "Color": 92.50,
2401
-
2402
- "Action-Overall": 69.83,
2403
- "Hand": 60.90,
2404
- "Full body": 75.00,
2405
- "Animal": 76.47,
2406
- "Non Contact": 66.84,
2407
- "Contact": 63.41,
2408
- "State": 75.47,
2409
-
2410
- "Relationship-Overall": 78.06,
2411
- "Composition": 82.43,
2412
- "Similarity": 69.44,
2413
- "Inclusion": 87.78,
2414
- "Comparison": 66.41,
2415
-
2416
- "Compound-Overall": 70.60,
2417
- "Imagination": 76.79,
2418
- "Feature matching": 64.21,
2419
-
2420
- "Grammar-Overall": 68.07,
2421
- "Pronoun Reference": 74.24,
2422
- "Consistency": 74.07,
2423
- "Negation": 56.64,
2424
-
2425
- "Layout-Overall": 66.67,
2426
- "2D": 57.72,
2427
- "3D": 76.17,
2428
-
2429
- "Logical Reasoning": 48.18,
2430
-
2431
- "Text": 25.86
2432
  },
2433
  {
2434
  "model": "FLUX-pro-1.1-Ultra",
@@ -2436,51 +1974,40 @@
2436
  "hf": "-",
2437
  "open_source": false,
2438
  "release_date": "2024-11",
2439
-
2440
- "Overall": 70.67,
2441
-
2442
- "Style": 90.60,
2443
-
2444
- "World Knowledge": 91.61,
2445
-
2446
- "Attribute-Overall": 76.50,
2447
- "Quantity": 75.69,
2448
- "Expression": 59.62,
2449
- "Material": 78.77,
2450
- "Size": 77.78,
2451
- "Shape": 74.38,
2452
- "Color": 96.67,
2453
-
2454
- "Action-Overall": 76.50,
2455
- "Hand": 57.69,
2456
- "Full body": 68.48,
2457
- "Animal": 77.21,
2458
- "Non Contact": 76.53,
2459
- "Contact": 64.29,
2460
- "State": 76.89,
2461
-
2462
- "Relationship-Overall": 77.54,
2463
- "Composition": 80.41,
2464
- "Similarity": 72.78,
2465
- "Inclusion": 82.07,
2466
- "Comparison": 71.09,
2467
-
2468
- "Compound-Overall": 67.78,
2469
- "Imagination": 74.74,
2470
- "Feature matching": 60.68,
2471
-
2472
- "Grammar-Overall": 70.05,
2473
- "Pronoun Reference": 84.56,
2474
- "Consistency": 68.98,
2475
- "Negation": 55.77,
2476
-
2477
- "Layout-Overall": 81.53,
2478
- "2D": 80.15,
2479
- "3D": 82.95,
2480
-
2481
- "Logical Reasoning": 43.18,
2482
-
2483
- "Text": 37.36
2484
  },
2485
  {
2486
  "model": "Keling-Ketu",
@@ -2488,50 +2015,39 @@
2488
  "hf": "-",
2489
  "open_source": false,
2490
  "release_date": "2025-04",
2491
-
2492
- "Overall": 65.93,
2493
-
2494
- "Style": 92.27,
2495
-
2496
- "World Knowledge": 86.62,
2497
-
2498
- "Attribute-Overall": 71.66,
2499
- "Quantity": 75.00,
2500
- "Expression": 56.41,
2501
- "Material": 78.77,
2502
- "Size": 79.17,
2503
- "Shape": 53.12,
2504
- "Color": 91.38,
2505
-
2506
- "Action-Overall": 68.73,
2507
- "Hand": 54.49,
2508
- "Full body": 76.09,
2509
- "Animal": 72.79,
2510
- "Non Contact": 69.90,
2511
- "Contact": 58.93,
2512
- "State": 76.89,
2513
-
2514
- "Relationship-Overall": 70.94,
2515
- "Composition": 68.92,
2516
- "Similarity": 70.56,
2517
- "Inclusion": 74.46,
2518
- "Comparison": 71.09,
2519
-
2520
- "Compound-Overall": 60.81,
2521
- "Imagination": 66.24,
2522
- "Feature matching": 55.26,
2523
-
2524
- "Grammar-Overall": 71.26,
2525
- "Pronoun Reference": 77.21,
2526
- "Consistency": 67.59,
2527
- "Negation": 68.08,
2528
-
2529
- "Layout-Overall": 77.23,
2530
- "2D": 80.97,
2531
- "3D": 73.36,
2532
-
2533
- "Logical Reasoning": 43.75,
2534
-
2535
  "Text": 16.03
2536
  },
2537
  {
@@ -2540,51 +2056,40 @@
2540
  "hf": "https://huggingface.co/Qwen/Qwen-Image",
2541
  "open_source": true,
2542
  "release_date": "2025-08",
2543
-
2544
- "Overall": 78.81,
2545
-
2546
- "Style": 95.10,
2547
-
2548
- "World Knowledge": 94.30,
2549
-
2550
- "Attribute-Overall": 87.61,
2551
- "Quantity": 81.94,
2552
- "Expression": 84.62,
2553
  "Material": 91.98,
2554
- "Size": 84.03,
2555
- "Shape": 84.38,
2556
  "Color": 99.17,
2557
-
2558
- "Action-Overall": 84.13,
2559
- "Hand": 82.05,
2560
- "Full body": 88.59,
2561
- "Animal": 88.24,
2562
- "Non Contact": 80.61,
2563
- "Contact": 77.38,
2564
- "State": 87.74,
2565
-
2566
- "Relationship-Overall": 79.70,
2567
- "Composition": 81.76,
2568
- "Similarity": 67.78,
2569
  "Inclusion": 86.96,
2570
- "Comparison": 81.25,
2571
-
2572
- "Compound-Overall": 73.32,
2573
  "Imagination": 73.21,
2574
- "Feature matching": 73.44,
2575
-
2576
- "Grammar-Overall": 60.29,
2577
- "Pronoun Reference": 83.82,
2578
  "Consistency": 70.37,
2579
- "Negation": 27.31,
2580
-
2581
- "Layout-Overall": 85.52,
2582
- "2D": 86.40,
2583
- "3D": 85.23,
2584
-
2585
- "Logical Reasoning": 53.64,
2586
-
2587
- "Text": 76.14
2588
  }
2589
  ]
2590
- }
 
6
  "hf": "https://huggingface.co/Tongyi-MAI/Z-Image-Turbo",
7
  "open_source": true,
8
  "release_date": "2025-11",
9
+ "Overall": 71.4,
10
+ "Style": 90.0,
 
11
  "World Knowledge": 92.25,
 
12
  "Attribute-Overall": 74.57,
13
+ "Quantity": 75.0,
14
  "Expression": 58.97,
15
  "Material": 79.25,
16
  "Size": 77.78,
17
  "Shape": 64.38,
18
  "Color": 95.83,
19
+ "Action-Overall": 69.3,
 
20
  "Hand": 62.82,
21
  "Full body": 73.37,
22
  "Animal": 78.68,
23
+ "Non Contact": 69.9,
24
  "Contact": 61.31,
25
  "State": 70.28,
 
26
  "Relationship-Overall": 71.57,
27
  "Composition": 75.68,
28
+ "Similarity": 65.0,
29
  "Inclusion": 75.54,
30
  "Comparison": 65.62,
 
31
  "Compound-Overall": 63.02,
32
  "Imagination": 64.29,
33
  "Feature matching": 61.72,
 
34
  "Grammar-Overall": 64.57,
35
  "Pronoun Reference": 79.78,
36
  "Consistency": 62.04,
37
  "Negation": 50.77,
 
38
  "Layout-Overall": 78.36,
39
  "2D": 83.09,
40
  "3D": 73.48,
 
41
  "Logical Reasoning": 39.68,
 
42
  "Text": 70.69
43
  },
44
  {
 
47
  "hf": "https://huggingface.co/black-forest-labs/FLUX.2-dev",
48
  "open_source": true,
49
  "release_date": "2025-11",
 
50
  "Overall": 84.76,
51
+ "Style": 96.6,
52
  "World Knowledge": 95.41,
 
53
  "Attribute-Overall": 87.39,
54
  "Quantity": 73.61,
55
  "Expression": 73.72,
 
57
  "Size": 91.67,
58
  "Shape": 88.12,
59
  "Color": 100.0,
 
60
  "Action-Overall": 82.22,
61
  "Hand": 74.36,
62
  "Full body": 86.41,
 
64
  "Non Contact": 82.14,
65
  "Contact": 80.95,
66
  "State": 84.43,
 
67
  "Relationship-Overall": 87.31,
68
  "Composition": 93.24,
69
  "Similarity": 77.78,
70
  "Inclusion": 88.59,
71
  "Comparison": 85.16,
 
72
  "Compound-Overall": 83.51,
73
  "Imagination": 84.95,
74
  "Feature matching": 82.03,
 
75
  "Grammar-Overall": 77.41,
76
+ "Pronoun Reference": 88.6,
77
+ "Consistency": 78.7,
78
  "Negation": 64.62,
 
79
  "Layout-Overall": 89.55,
80
  "2D": 87.87,
81
  "3D": 91.29,
 
82
  "Logical Reasoning": 62.84,
 
83
  "Text": 85.34
84
  },
85
  {
 
88
  "hf": "-",
89
  "open_source": false,
90
  "release_date": "2025-11",
 
91
  "Overall": 92.72,
92
+ "Style": 99.3,
 
 
93
  "World Knowledge": 97.47,
 
94
  "Attribute-Overall": 91.95,
95
  "Quantity": 90.28,
96
  "Expression": 85.53,
97
  "Material": 97.64,
98
  "Size": 93.75,
99
+ "Shape": 85.0,
100
  "Color": 99.17,
 
101
  "Action-Overall": 91.38,
102
  "Hand": 89.47,
103
  "Full body": 91.11,
104
  "Animal": 90.44,
105
+ "Non Contact": 89.8,
106
  "Contact": 94.05,
107
  "State": 92.92,
 
108
  "Relationship-Overall": 95.43,
109
  "Composition": 96.96,
110
  "Similarity": 96.11,
111
  "Inclusion": 92.39,
112
  "Comparison": 95.31,
 
113
  "Compound-Overall": 92.91,
114
  "Imagination": 95.15,
115
  "Feature matching": 90.62,
 
116
  "Grammar-Overall": 89.59,
117
  "Pronoun Reference": 94.49,
118
  "Consistency": 87.96,
119
  "Negation": 85.71,
 
120
  "Layout-Overall": 93.28,
121
  "2D": 92.65,
122
  "3D": 93.94,
 
123
  "Logical Reasoning": 80.24,
 
124
  "Text": 95.65
125
  },
126
  {
 
129
  "hf": "-",
130
  "open_source": false,
131
  "release_date": "2025-09",
132
+ "Overall": 77.87,
133
+ "Style": 92.64,
134
+ "World Knowledge": 94.75,
135
+ "Attribute-Overall": 81.49,
136
+ "Quantity": 75.0,
137
+ "Expression": 70.51,
 
 
138
  "Material": 91.04,
139
+ "Size": 83.09,
140
+ "Shape": 78.75,
141
+ "Color": 88.33,
142
+ "Action-Overall": 74.14,
143
+ "Hand": 59.87,
144
+ "Full body": 74.46,
145
+ "Animal": 77.94,
146
+ "Non Contact": 76.04,
 
147
  "Contact": 72.02,
148
+ "State": 81.6,
149
+ "Relationship-Overall": 81.98,
150
+ "Composition": 85.47,
151
+ "Similarity": 74.44,
152
+ "Inclusion": 81.52,
153
+ "Comparison": 85.16,
154
+ "Compound-Overall": 75.45,
155
+ "Imagination": 78.09,
156
+ "Feature matching": 72.77,
157
+ "Grammar-Overall": 72.79,
158
+ "Pronoun Reference": 83.7,
159
+ "Consistency": 72.69,
160
+ "Negation": 61.54,
161
+ "Layout-Overall": 76.87,
 
 
 
 
162
  "2D": 75.74,
163
+ "3D": 78.03,
164
+ "Logical Reasoning": 55.5,
165
+ "Text": 73.12
 
 
166
  },
167
  {
168
  "model": "Echo-4o",
 
170
  "hf": "https://huggingface.co/Yejy53/Echo-4o",
171
  "open_source": true,
172
  "release_date": "2025-8",
 
173
  "Overall": 69.12,
174
+ "Style": 92.2,
175
  "World Knowledge": 90.51,
 
176
  "Attribute-Overall": 79.06,
177
  "Quantity": 70.14,
178
  "Expression": 71.15,
 
180
  "Size": 83.33,
181
  "Shape": 68.75,
182
  "Color": 98.33,
 
183
  "Action-Overall": 68.92,
184
  "Hand": 66.03,
185
+ "Full body": 66.3,
186
  "Animal": 77.94,
187
  "Non Contact": 67.86,
188
  "Contact": 59.52,
189
  "State": 75.94,
 
190
  "Relationship-Overall": 76.52,
191
  "Composition": 81.76,
192
  "Similarity": 70.56,
193
  "Inclusion": 77.72,
194
  "Comparison": 71.09,
 
195
  "Compound-Overall": 71.78,
196
  "Imagination": 76.79,
197
  "Feature matching": 66.67,
 
198
  "Grammar-Overall": 75.13,
199
  "Pronoun Reference": 80.51,
200
  "Consistency": 74.54,
201
+ "Negation": 70.0,
 
202
  "Layout-Overall": 82.28,
203
  "2D": 87.13,
204
  "3D": 77.27,
 
205
  "Logical Reasoning": 44.77,
 
206
  "Text": 10.06
207
+ },
208
  {
209
  "model": "UniWorld-V1",
210
  "link": "https://arxiv.org/pdf/2506.03147",
211
  "hf": "https://huggingface.co/LanguageBind/UniWorld-V1",
212
  "open_source": true,
213
  "release_date": "2025-06",
 
214
  "Overall": 63.11,
215
+ "Style": 91.1,
216
  "World Knowledge": 82.91,
 
217
  "Attribute-Overall": 70.62,
218
  "Quantity": 70.14,
219
  "Expression": 64.74,
 
221
  "Size": 72.22,
222
  "Shape": 66.25,
223
  "Color": 99.17,
 
224
  "Action-Overall": 67.21,
225
  "Hand": 55.13,
226
  "Full body": 72.28,
227
  "Animal": 73.53,
228
  "Non Contact": 63.78,
229
+ "Contact": 61.9,
230
+ "State": 75.0,
 
231
  "Relationship-Overall": 67.13,
232
+ "Composition": 72.3,
233
  "Similarity": 63.33,
234
  "Inclusion": 64.67,
235
  "Comparison": 64.06,
 
236
  "Compound-Overall": 54.51,
237
  "Imagination": 58.16,
238
  "Feature matching": 50.78,
 
239
  "Grammar-Overall": 63.77,
240
  "Pronoun Reference": 74.26,
241
  "Consistency": 64.35,
242
  "Negation": 52.31,
 
243
  "Layout-Overall": 69.03,
244
+ "2D": 73.9,
245
  "3D": 64.02,
 
246
  "Logical Reasoning": 38.41,
 
247
  "Text": 26.44
248
  },
249
  {
 
252
  "hf": "stabilityai/stable-diffusion-3.5-medium",
253
  "open_source": true,
254
  "release_date": "2024-10",
 
255
  "Overall": 60.71,
256
+ "Style": 89.8,
257
  "World Knowledge": 84.34,
 
258
  "Attribute-Overall": 66.99,
259
  "Quantity": 59.72,
260
  "Expression": 51.92,
 
262
  "Size": 70.83,
263
  "Shape": 63.75,
264
  "Color": 93.33,
 
265
  "Action-Overall": 60.65,
266
+ "Hand": 50.0,
267
  "Full body": 63.04,
268
  "Animal": 69.12,
269
  "Non Contact": 55.61,
270
  "Contact": 52.98,
271
+ "State": 71.7,
 
272
  "Relationship-Overall": 68.78,
273
  "Composition": 74.66,
274
  "Similarity": 61.67,
275
  "Inclusion": 73.37,
276
  "Comparison": 58.59,
 
277
  "Compound-Overall": 53.35,
278
  "Imagination": 58.16,
279
  "Feature matching": 48.44,
 
280
  "Grammar-Overall": 59.89,
281
  "Pronoun Reference": 73.53,
282
  "Consistency": 61.57,
283
  "Negation": 44.23,
 
284
  "Layout-Overall": 70.34,
285
  "2D": 72.06,
286
  "3D": 68.56,
 
287
  "Logical Reasoning": 37.73,
 
288
  "Text": 15.23
289
  },
290
  {
 
293
  "hf": "https://huggingface.co/Alpha-VLLM/Lumina-DiMOO",
294
  "open_source": true,
295
  "release_date": "2025-09",
 
296
  "Overall": 71.12,
297
+ "Style": 89.7,
298
  "World Knowledge": 90.03,
 
299
  "Attribute-Overall": 81.62,
300
  "Quantity": 69.44,
301
+ "Expression": 85.9,
302
+ "Material": 81.6,
303
  "Size": 76.39,
304
+ "Shape": 80.0,
305
  "Color": 99.17,
 
306
  "Action-Overall": 73.76,
307
+ "Hand": 64.1,
308
+ "Full body": 78.8,
309
  "Animal": 75.74,
310
  "Non Contact": 73.98,
311
  "Contact": 64.88,
312
  "State": 82.08,
 
313
  "Relationship-Overall": 78.43,
314
  "Composition": 83.45,
315
  "Similarity": 74.44,
316
  "Inclusion": 81.52,
317
  "Comparison": 67.97,
 
318
  "Compound-Overall": 73.32,
319
  "Imagination": 78.83,
320
  "Feature matching": 67.71,
 
321
  "Grammar-Overall": 70.45,
322
  "Pronoun Reference": 81.99,
323
  "Consistency": 77.78,
324
  "Negation": 52.31,
 
325
  "Layout-Overall": 82.84,
326
  "2D": 84.93,
327
  "3D": 80.68,
 
328
  "Logical Reasoning": 45.45,
 
329
  "Text": 25.57
330
  },
331
  {
 
334
  "hf": "https://huggingface.co/Gen-Verse/MMaDA-8B-MixCoT",
335
  "open_source": true,
336
  "release_date": "2025-05",
 
337
  "Overall": 41.35,
338
+ "Style": 82.4,
339
  "World Knowledge": 56.65,
 
340
  "Attribute-Overall": 48.93,
341
  "Quantity": 45.83,
342
  "Expression": 29.49,
 
344
  "Size": 49.31,
345
  "Shape": 44.38,
346
  "Color": 74.17,
 
347
  "Action-Overall": 37.83,
348
  "Hand": 15.38,
349
  "Full body": 40.22,
350
  "Animal": 52.94,
351
  "Non Contact": 33.16,
352
+ "Contact": 25.6,
353
+ "State": 56.6,
 
354
  "Relationship-Overall": 50.25,
355
  "Composition": 55.07,
356
  "Similarity": 57.22,
357
  "Inclusion": 47.28,
358
  "Comparison": 33.59,
 
359
  "Compound-Overall": 32.35,
360
  "Imagination": 40.56,
361
  "Feature matching": 23.96,
 
362
  "Grammar-Overall": 55.75,
363
  "Pronoun Reference": 59.19,
364
  "Consistency": 40.28,
365
+ "Negation": 65.0,
 
366
  "Layout-Overall": 30.22,
367
  "2D": 30.15,
368
+ "3D": 30.3,
 
369
  "Logical Reasoning": 17.95,
 
370
  "Text": 1.15
371
  },
372
  {
 
375
  "hf": "https://huggingface.co/OmniGen2/OmniGen2",
376
  "open_source": true,
377
  "release_date": "2025-06",
 
378
  "Overall": 63.09,
379
+ "Style": 91.9,
380
  "World Knowledge": 86.39,
 
381
  "Attribute-Overall": 72.12,
382
  "Quantity": 67.36,
383
  "Expression": 73.08,
384
  "Material": 66.04,
385
  "Size": 72.22,
386
  "Shape": 66.25,
387
+ "Color": 95.0,
 
388
  "Action-Overall": 62.83,
389
  "Hand": 55.77,
390
  "Full body": 69.02,
 
392
  "Non Contact": 62.24,
393
  "Contact": 54.17,
394
  "State": 66.51,
 
395
  "Relationship-Overall": 68.27,
396
  "Composition": 68.24,
397
  "Similarity": 67.78,
398
+ "Inclusion": 71.2,
399
  "Comparison": 64.84,
 
400
  "Compound-Overall": 56.31,
401
  "Imagination": 62.24,
402
  "Feature matching": 50.26,
 
403
  "Grammar-Overall": 59.89,
404
  "Pronoun Reference": 71.32,
405
  "Consistency": 60.65,
406
  "Negation": 47.31,
 
407
  "Layout-Overall": 71.64,
408
  "2D": 78.31,
409
  "3D": 64.77,
410
+ "Logical Reasoning": 32.5,
 
 
411
  "Text": 29.02
412
  },
413
  {
 
416
  "hf": "https://huggingface.co/FoundationVision/Infinity/tree/main/infinity_8b_512x512_weights",
417
  "open_source": true,
418
  "release_date": "2024-12",
 
419
  "Overall": 59.81,
420
+ "Style": 90.8,
421
  "World Knowledge": 87.97,
422
  "Attribute-Overall": 68.06,
423
  "Quantity": 66.67,
 
426
  "Size": 77.78,
427
  "Shape": 58.75,
428
  "Color": 93.33,
 
429
  "Action-Overall": 60.17,
430
  "Hand": 55.13,
431
  "Full body": 65.22,
432
  "Animal": 72.06,
433
  "Non Contact": 58.16,
434
+ "Contact": 49.4,
435
  "State": 62.26,
 
436
  "Relationship-Overall": 69.16,
437
  "Composition": 73.31,
438
+ "Similarity": 65.0,
439
  "Inclusion": 67.39,
440
  "Comparison": 67.97,
 
441
  "Compound-Overall": 51.42,
442
  "Imagination": 55.87,
443
  "Feature matching": 46.88,
 
444
  "Grammar-Overall": 60.16,
445
  "Pronoun Reference": 73.16,
446
  "Consistency": 65.74,
447
  "Negation": 41.92,
448
+ "Layout-Overall": 66.6,
 
449
  "2D": 71.69,
450
  "3D": 61.36,
 
451
  "Logical Reasoning": 31.36,
 
452
  "Text": 12.36
453
  },
454
+ {
455
  "model": "OneCAT",
456
  "link": "https://arxiv.org/pdf/2509.03498",
457
  "hf": "https://huggingface.co/onecat-ai/OneCAT-3B",
458
  "open_source": true,
459
  "release_date": "2025-09",
 
460
  "Overall": 58.28,
461
+ "Style": 93.3,
462
  "World Knowledge": 82.28,
 
463
  "Attribute-Overall": 63.46,
464
  "Quantity": 59.42,
465
  "Expression": 58.33,
466
  "Material": 67.45,
467
  "Size": 65.97,
468
+ "Shape": 42.5,
469
+ "Color": 92.5,
 
470
  "Action-Overall": 58.56,
471
+ "Hand": 35.9,
472
  "Full body": 65.22,
473
  "Animal": 69.12,
474
  "Non Contact": 57.65,
475
  "Contact": 48.81,
476
  "State": 71.23,
 
477
  "Relationship-Overall": 68.15,
478
  "Composition": 78.04,
479
  "Similarity": 69.44,
480
+ "Inclusion": 62.5,
481
  "Comparison": 51.56,
 
482
  "Compound-Overall": 56.96,
483
  "Imagination": 66.33,
484
+ "Feature matching": 47.4,
 
485
  "Grammar-Overall": 60.83,
486
  "Pronoun Reference": 70.59,
487
  "Consistency": 59.72,
488
  "Negation": 51.54,
 
489
  "Layout-Overall": 64.74,
490
  "2D": 64.34,
491
  "3D": 65.15,
 
492
  "Logical Reasoning": 33.41,
 
493
  "Text": 1.15
494
  },
495
+ {
496
  "model": "X-Omni",
497
  "link": "https://arxiv.org/pdf/2507.22058",
498
  "hf": "https://huggingface.co/X-Omni/X-Omni-En",
499
  "open_source": true,
500
  "release_date": "2025-08",
 
501
  "Overall": 53.77,
502
+ "Style": 72.7,
503
  "World Knowledge": 76.27,
 
504
  "Attribute-Overall": 60.04,
505
  "Quantity": 63.19,
506
  "Expression": 53.21,
 
508
  "Size": 55.56,
509
  "Shape": 53.75,
510
  "Color": 80.83,
 
511
  "Action-Overall": 54.47,
512
  "Hand": 46.79,
513
  "Full body": 56.52,
514
+ "Animal": 62.5,
515
  "Non Contact": 56.63,
516
  "Contact": 42.26,
517
  "State": 60.85,
518
+ "Relationship-Overall": 56.6,
 
519
  "Composition": 61.82,
520
  "Similarity": 56.11,
521
  "Inclusion": 51.09,
522
  "Comparison": 53.12,
 
523
  "Compound-Overall": 41.75,
524
  "Imagination": 47.45,
525
  "Feature matching": 35.94,
 
526
  "Grammar-Overall": 59.09,
527
  "Pronoun Reference": 66.91,
528
  "Consistency": 54.17,
529
+ "Negation": 55.0,
 
530
  "Layout-Overall": 62.69,
531
  "2D": 69.49,
532
  "3D": 55.68,
 
533
  "Logical Reasoning": 29.09,
534
+ "Text": 25.0
 
535
  },
536
  {
537
  "model": "FLUX.1-Krea-dev",
 
539
  "hf": "https://huggingface.co/black-forest-labs/FLUX.1-Krea-dev",
540
  "open_source": true,
541
  "release_date": "2025-08",
 
542
  "Overall": 69.88,
543
+ "Style": 88.7,
544
  "World Knowledge": 92.56,
 
545
  "Attribute-Overall": 75.96,
546
  "Quantity": 70.83,
547
+ "Expression": 60.9,
548
  "Material": 77.36,
549
  "Size": 79.17,
550
  "Shape": 73.12,
551
  "Color": 99.17,
 
552
  "Action-Overall": 71.01,
553
  "Hand": 64.74,
554
  "Full body": 70.11,
 
556
  "Non Contact": 72.96,
557
  "Contact": 67.26,
558
  "State": 73.11,
 
559
  "Relationship-Overall": 73.98,
560
  "Composition": 76.35,
561
  "Similarity": 66.11,
562
  "Inclusion": 77.17,
563
+ "Comparison": 75.0,
 
564
  "Compound-Overall": 64.43,
565
  "Imagination": 67.35,
566
  "Feature matching": 61.46,
 
567
  "Grammar-Overall": 63.37,
568
  "Pronoun Reference": 77.21,
569
  "Consistency": 67.13,
570
  "Negation": 45.77,
 
571
  "Layout-Overall": 84.14,
572
  "2D": 86.76,
573
  "3D": 81.44,
 
574
  "Logical Reasoning": 39.77,
 
575
  "Text": 44.83
576
  },
577
  {
 
580
  "hf": "https://huggingface.co/spaces/tencent/HunyuanImage-2.1",
581
  "open_source": true,
582
  "release_date": "2025-09",
 
583
  "Overall": 74.64,
584
  "Style": 90.88,
585
  "World Knowledge": 92.06,
 
590
  "Size": 78.47,
591
  "Shape": 68.12,
592
  "Color": 99.17,
 
593
  "Action-Overall": 77.81,
594
+ "Hand": 75.0,
595
  "Full body": 80.98,
596
  "Animal": 82.35,
597
  "Non Contact": 73.71,
598
  "Contact": 72.02,
599
  "State": 82.55,
 
600
  "Relationship-Overall": 77.54,
601
  "Composition": 78.38,
602
  "Similarity": 70.56,
603
  "Inclusion": 84.78,
604
+ "Comparison": 75.0,
 
605
  "Compound-Overall": 64.82,
606
  "Imagination": 64.54,
607
+ "Feature matching": 65.1,
 
608
  "Grammar-Overall": 62.83,
609
  "Pronoun Reference": 77.94,
610
+ "Consistency": 66.2,
611
  "Negation": 44.23,
 
612
  "Layout-Overall": 84.14,
613
  "2D": 86.76,
614
  "3D": 81.44,
 
615
  "Logical Reasoning": 46.59,
 
616
  "Text": 70.11
617
+ },
618
+ {
619
  "model": "BLIP3-o-Next",
620
  "link": "https://arxiv.org/pdf/2505.09568",
621
  "hf": "https://huggingface.co/BLIP3o/BLIP3o-NEXT-SFT-3B",
622
  "open_source": true,
623
  "release_date": "2025-08",
 
624
  "Overall": 65.15,
625
+ "Style": 91.0,
626
  "World Knowledge": 86.71,
627
  "Attribute-Overall": 70.94,
628
  "Quantity": 67.36,
 
630
  "Material": 70.28,
631
  "Size": 76.39,
632
  "Shape": 60.62,
633
+ "Color": 80.0,
 
634
  "Action-Overall": 66.83,
635
  "Hand": 57.69,
636
+ "Full body": 75.0,
637
  "Animal": 73.53,
638
  "Non Contact": 67.35,
639
  "Contact": 57.74,
640
  "State": 68.87,
641
+ "Relationship-Overall": 73.6,
 
642
  "Composition": 76.01,
643
+ "Similarity": 65.0,
644
  "Inclusion": 77.17,
645
+ "Comparison": 75.0,
 
646
  "Compound-Overall": 64.82,
647
  "Imagination": 73.72,
648
  "Feature matching": 55.73,
 
649
  "Grammar-Overall": 68.05,
650
  "Pronoun Reference": 76.47,
651
  "Consistency": 67.13,
652
+ "Negation": 60.0,
 
653
  "Layout-Overall": 76.31,
654
  "2D": 80.15,
655
  "3D": 72.35,
 
656
  "Logical Reasoning": 48.64,
657
+ "Text": 4.6
658
+ },
659
+ {
 
660
  "model": "Kolors",
661
  "link": "https://github.com/Kwai-Kolors/Kolors/blob/master/imgs/Kolors_paper.pdf",
662
  "hf": "https://huggingface.co/Kwai-Kolors/Kolors",
663
  "open_source": true,
664
  "release_date": "2024-7",
665
+ "Overall": 46.07,
666
+ "Style": 84.4,
 
667
  "World Knowledge": 77.22,
 
668
  "Attribute-Overall": 54.17,
669
+ "Quantity": 62.5,
670
  "Expression": 33.33,
671
  "Material": 51.89,
672
+ "Size": 62.5,
673
  "Shape": 40.62,
674
  "Color": 83.33,
675
+ "Action-Overall": 48.0,
 
676
  "Hand": 42.95,
677
  "Full body": 42.39,
678
  "Animal": 56.62,
679
  "Non Contact": 45.92,
680
  "Contact": 39.88,
681
  "State": 59.43,
 
682
  "Relationship-Overall": 52.79,
683
  "Composition": 55.41,
684
  "Similarity": 53.89,
685
  "Inclusion": 51.63,
686
  "Comparison": 46.88,
 
687
  "Compound-Overall": 33.63,
688
  "Imagination": 41.33,
689
  "Feature matching": 25.78,
 
690
  "Grammar-Overall": 46.66,
691
  "Pronoun Reference": 56.62,
692
  "Consistency": 47.22,
693
  "Negation": 35.77,
 
694
  "Layout-Overall": 42.91,
695
  "2D": 43.01,
696
+ "3D": 42.8,
 
697
  "Logical Reasoning": 19.77,
 
698
  "Text": 1.15
699
  },
700
+ {
701
  "model": "Seedream-4.0",
702
  "link": "https://www.volcengine.com/docs/82379/1541523",
703
  "hf": "-",
704
  "open_source": false,
705
  "release_date": "2025-09",
706
+ "Overall": 87.34,
707
+ "Style": 98.8,
 
 
 
708
  "World Knowledge": 95.41,
 
709
  "Attribute-Overall": 88.57,
710
  "Quantity": 86.81,
711
+ "Expression": 85.9,
712
  "Material": 97.17,
713
  "Size": 84.03,
714
  "Shape": 76.88,
715
  "Color": 100.0,
 
716
  "Action-Overall": 85.65,
717
  "Hand": 77.56,
718
+ "Full body": 87.5,
719
  "Animal": 88.24,
720
+ "Non Contact": 80.1,
721
  "Contact": 83.93,
722
  "State": 94.81,
 
723
  "Relationship-Overall": 87.69,
724
  "Composition": 88.18,
725
  "Similarity": 80.56,
726
  "Inclusion": 94.02,
727
+ "Comparison": 87.5,
 
728
  "Compound-Overall": 86.08,
729
  "Imagination": 88.27,
730
  "Feature matching": 83.85,
 
731
  "Grammar-Overall": 78.88,
732
  "Pronoun Reference": 84.93,
733
  "Consistency": 79.17,
734
  "Negation": 72.31,
 
735
  "Layout-Overall": 90.67,
736
  "2D": 90.81,
737
  "3D": 90.53,
 
738
  "Logical Reasoning": 67.73,
 
739
  "Text": 93.97
740
+ },
741
+ {
742
  "model": "Imagen-4.0-generate-preview-06-06",
743
  "link": "https://deepmind.google/models/imagen/",
744
  "hf": "-",
745
  "open_source": false,
746
  "release_date": "2025-01",
 
747
  "Overall": 85.84,
748
+ "Style": 97.8,
 
 
749
  "World Knowledge": 96.36,
 
750
  "Attribute-Overall": 84.94,
751
  "Quantity": 84.03,
752
  "Expression": 76.92,
 
754
  "Size": 89.58,
755
  "Shape": 71.88,
756
  "Color": 98.33,
757
+ "Action-Overall": 88.4,
 
758
  "Hand": 86.54,
759
  "Full body": 94.02,
760
  "Animal": 88.97,
761
  "Non Contact": 85.71,
762
  "Contact": 83.33,
763
  "State": 91.04,
 
764
  "Relationship-Overall": 89.34,
765
  "Composition": 93.58,
766
  "Similarity": 78.89,
767
  "Inclusion": 95.11,
768
  "Comparison": 85.94,
 
769
  "Compound-Overall": 85.31,
770
  "Imagination": 90.31,
771
  "Feature matching": 80.21,
 
772
  "Grammar-Overall": 79.68,
773
  "Pronoun Reference": 86.76,
774
  "Consistency": 77.31,
775
  "Negation": 74.23,
 
776
  "Layout-Overall": 88.81,
777
  "2D": 88.24,
778
  "3D": 89.39,
 
779
  "Logical Reasoning": 70.45,
780
+ "Text": 77.3
 
781
  },
782
+ {
783
  "model": "Runway-Gen4-Image",
784
  "link": "https://docs.dev.runwayml.com/api/#tag/Start-generating/paths/~1v1~1text_to_image/post",
785
  "hf": "-",
786
  "open_source": false,
787
  "release_date": "2024-11",
788
+ "Overall": 69.55,
789
+ "Style": 93.85,
 
 
 
790
  "World Knowledge": 90.36,
791
+ "Attribute-Overall": 74.68,
792
+ "Quantity": 73.57,
793
+ "Expression": 56.58,
794
+ "Material": 87.5,
795
+ "Size": 69.44,
796
+ "Shape": 66.25,
797
+ "Color": 94.17,
798
+ "Action-Overall": 70.11,
799
+ "Hand": 58.33,
800
+ "Full body": 78.8,
801
+ "Animal": 79.41,
802
+ "Non Contact": 67.19,
803
+ "Contact": 62.2,
804
+ "State": 74.06,
805
+ "Relationship-Overall": 71.54,
806
+ "Composition": 73.99,
807
+ "Similarity": 61.11,
808
+ "Inclusion": 76.7,
809
+ "Comparison": 73.44,
810
+ "Compound-Overall": 66.58,
811
+ "Imagination": 70.36,
812
+ "Feature matching": 62.63,
813
+ "Grammar-Overall": 70.9,
 
 
 
 
 
814
  "Pronoun Reference": 71.21,
815
+ "Consistency": 68.06,
816
+ "Negation": 73.02,
817
+ "Layout-Overall": 75.38,
818
+ "2D": 75.0,
819
+ "3D": 75.77,
820
+ "Logical Reasoning": 48.39,
821
+ "Text": 33.72
 
 
 
822
  },
823
+ {
824
  "model": "Nano Banana",
825
  "link": "https://ainanobanana.io/",
826
  "hf": "-",
827
  "open_source": false,
828
  "release_date": "2025-08",
829
+ "Overall": 87.29,
830
+ "Style": 98.59,
831
+ "World Knowledge": 96.2,
832
+ "Attribute-Overall": 87.99,
833
+ "Quantity": 86.43,
834
+ "Expression": 80.77,
835
+ "Material": 88.46,
836
+ "Size": 95.83,
837
+ "Shape": 80.77,
838
+ "Color": 98.33,
839
+ "Action-Overall": 87.36,
840
+ "Hand": 80.13,
841
+ "Full body": 93.48,
842
+ "Animal": 88.24,
843
+ "Non Contact": 83.67,
844
+ "Contact": 80.95,
845
+ "State": 95.28,
846
+ "Relationship-Overall": 92.47,
847
+ "Composition": 93.49,
848
+ "Similarity": 86.67,
849
+ "Inclusion": 94.02,
850
+ "Comparison": 96.09,
851
+ "Compound-Overall": 88.34,
852
+ "Imagination": 90.21,
853
+ "Feature matching": 86.46,
854
+ "Grammar-Overall": 83.82,
855
+ "Pronoun Reference": 90.44,
856
+ "Consistency": 83.33,
857
+ "Negation": 77.31,
858
+ "Layout-Overall": 91.42,
859
+ "2D": 93.01,
860
+ "3D": 89.77,
861
+ "Logical Reasoning": 73.41,
862
+ "Text": 73.28
 
 
 
 
 
 
 
 
 
 
 
863
  },
864
+ {
865
  "model": "Stability-AI-stable-image-ultra",
866
  "link": "https://platform.stability.ai/docs/api-reference#tag/Generate/paths/~1v2beta~1stable-image~1generate~1ultra/post",
867
  "hf": "-",
868
  "open_source": false,
869
  "release_date": "2024-06",
870
+ "Overall": 61.98,
871
+ "Style": 86.9,
872
+ "World Knowledge": 87.5,
873
+ "Attribute-Overall": 66.99,
 
 
 
 
874
  "Quantity": 67.36,
875
+ "Expression": 49.36,
876
+ "Material": 63.21,
877
  "Size": 69.44,
878
+ "Shape": 68.12,
879
  "Color": 91.67,
880
+ "Action-Overall": 58.46,
881
+ "Hand": 55.13,
882
+ "Full body": 60.33,
883
+ "Animal": 64.71,
884
+ "Non Contact": 54.08,
 
885
  "Contact": 51.79,
886
+ "State": 64.62,
887
+ "Relationship-Overall": 69.29,
888
+ "Composition": 74.32,
 
889
  "Similarity": 66.67,
890
+ "Inclusion": 69.02,
891
+ "Comparison": 61.72,
892
+ "Compound-Overall": 54.38,
893
+ "Imagination": 61.99,
894
+ "Feature matching": 46.61,
895
+ "Grammar-Overall": 60.16,
896
+ "Pronoun Reference": 77.21,
897
+ "Consistency": 56.48,
898
+ "Negation": 45.38,
899
+ "Layout-Overall": 65.3,
900
+ "2D": 69.12,
901
+ "3D": 61.36,
902
+ "Logical Reasoning": 30.91,
903
+ "Text": 39.94
 
 
 
 
 
904
  },
905
  {
906
  "model": "HiDream_v2L",
 
908
  "hf": "-",
909
  "open_source": false,
910
  "release_date": "2025-07",
911
+ "Overall": 61.38,
912
+ "Style": 87.29,
 
 
 
913
  "World Knowledge": 89.62,
914
+ "Attribute-Overall": 63.4,
915
+ "Quantity": 67.14,
916
+ "Expression": 42.95,
 
917
  "Material": 57.82,
918
  "Size": 74.26,
919
+ "Shape": 57.32,
920
+ "Color": 91.53,
921
+ "Action-Overall": 60.36,
922
+ "Hand": 54.49,
923
+ "Full body": 59.12,
924
+ "Animal": 72.06,
925
+ "Non Contact": 60.94,
926
+ "Contact": 52.73,
927
+ "State": 63.68,
928
+ "Relationship-Overall": 66.75,
929
+ "Composition": 72.26,
930
+ "Similarity": 64.2,
 
 
931
  "Inclusion": 65.93,
932
+ "Comparison": 58.73,
 
933
  "Compound-Overall": 49.28,
934
  "Imagination": 53.75,
935
  "Feature matching": 44.76,
936
+ "Grammar-Overall": 60.08,
937
+ "Pronoun Reference": 74.62,
938
+ "Consistency": 60.48,
939
+ "Negation": 45.0,
940
+ "Layout-Overall": 67.36,
941
+ "2D": 68.91,
942
+ "3D": 65.78,
943
+ "Logical Reasoning": 25.35,
 
 
 
 
944
  "Text": 44.31
945
  },
946
  {
 
949
  "hf": "-",
950
  "open_source": false,
951
  "release_date": "2025-01",
952
+ "Overall": 77.37,
953
+ "Style": 92.7,
954
+ "World Knowledge": 94.94,
955
+ "Attribute-Overall": 83.23,
 
 
 
 
956
  "Quantity": 77.08,
957
+ "Expression": 73.08,
958
+ "Material": 85.38,
959
  "Size": 89.58,
960
+ "Shape": 79.38,
961
+ "Color": 97.5,
 
962
  "Action-Overall": 79.85,
963
+ "Hand": 74.36,
964
+ "Full body": 85.33,
965
+ "Animal": 85.29,
966
+ "Non Contact": 76.02,
967
+ "Contact": 75.6,
968
+ "State": 82.55,
969
+ "Relationship-Overall": 80.96,
970
+ "Composition": 82.09,
971
+ "Similarity": 73.33,
972
+ "Inclusion": 87.5,
973
+ "Comparison": 79.69,
974
+ "Compound-Overall": 72.94,
975
+ "Imagination": 78.06,
976
+ "Feature matching": 67.71,
977
+ "Grammar-Overall": 76.87,
978
+ "Pronoun Reference": 83.09,
979
+ "Consistency": 78.24,
980
+ "Negation": 69.23,
981
+ "Layout-Overall": 86.38,
982
+ "2D": 87.87,
983
+ "3D": 84.85,
984
+ "Logical Reasoning": 54.09,
985
+ "Text": 51.72
 
 
 
 
 
 
986
  },
987
+ {
988
  "model": "Recraft",
989
  "link": "https://www.recraft.ai/docs#generate-image",
990
  "hf": "-",
991
  "open_source": false,
992
  "release_date": "2024-12",
993
+ "Overall": 62.64,
994
+ "Style": 87.1,
995
+ "World Knowledge": 90.66,
996
+ "Attribute-Overall": 67.31,
997
+ "Quantity": 66.67,
998
+ "Expression": 57.69,
999
+ "Material": 67.92,
 
 
1000
  "Size": 65.97,
1001
+ "Shape": 56.88,
1002
+ "Color": 95.0,
1003
+ "Action-Overall": 60.84,
1004
+ "Hand": 50.0,
 
1005
  "Full body": 70.65,
1006
+ "Animal": 75.74,
1007
+ "Non Contact": 58.16,
1008
+ "Contact": 49.4,
1009
+ "State": 62.26,
 
1010
  "Relationship-Overall": 62.56,
1011
+ "Composition": 62.16,
1012
+ "Similarity": 55.56,
1013
+ "Inclusion": 63.59,
1014
+ "Comparison": 71.88,
1015
+ "Compound-Overall": 44.2,
1016
+ "Imagination": 42.6,
1017
+ "Feature matching": 45.83,
 
 
1018
  "Grammar-Overall": 63.64,
1019
+ "Pronoun Reference": 72.06,
1020
+ "Consistency": 56.94,
1021
+ "Negation": 60.38,
1022
+ "Layout-Overall": 59.7,
1023
+ "2D": 60.29,
1024
+ "3D": 59.09,
1025
+ "Logical Reasoning": 28.86,
1026
+ "Text": 61.49
1027
+ },
1028
+ {
 
 
 
1029
  "model": "FLUX-kontext-max",
1030
  "link": "https://bfl.ai/models/flux-kontext",
1031
  "hf": "-",
1032
  "open_source": false,
1033
  "release_date": "2025-05",
1034
+ "Overall": 80.08,
1035
+ "Style": 96.39,
1036
+ "World Knowledge": 93.71,
 
 
1037
  "Attribute-Overall": 80.93,
1038
+ "Quantity": 75.0,
1039
+ "Expression": 77.03,
1040
+ "Material": 80.66,
1041
+ "Size": 86.11,
1042
+ "Shape": 75.62,
1043
  "Color": 94.17,
1044
+ "Action-Overall": 76.62,
 
1045
  "Hand": 67.95,
1046
+ "Full body": 79.89,
1047
  "Animal": 77.94,
1048
+ "Non Contact": 75.0,
1049
+ "Contact": 72.02,
1050
  "State": 84.43,
1051
+ "Relationship-Overall": 83.29,
1052
+ "Composition": 85.14,
1053
+ "Similarity": 76.67,
1054
+ "Inclusion": 89.44,
1055
+ "Comparison": 79.69,
1056
+ "Compound-Overall": 78.22,
1057
+ "Imagination": 82.65,
1058
+ "Feature matching": 73.7,
1059
+ "Grammar-Overall": 78.67,
1060
+ "Pronoun Reference": 85.0,
1061
+ "Consistency": 77.31,
1062
+ "Negation": 73.46,
 
 
 
 
1063
  "Layout-Overall": 85.04,
1064
  "2D": 86.74,
1065
+ "3D": 83.33,
1066
+ "Logical Reasoning": 63.41,
1067
+ "Text": 64.53
1068
+ },
1069
+ {
 
 
1070
  "model": "FLUX-kontext-pro",
1071
  "link": "https://bfl.ai/models/flux-kontext",
1072
  "hf": "-",
1073
  "open_source": false,
1074
  "release_date": "2025-05",
1075
+ "Overall": 75.36,
1076
+ "Style": 94.08,
1077
+ "World Knowledge": 91.45,
1078
+ "Attribute-Overall": 78.66,
1079
+ "Quantity": 74.31,
1080
+ "Expression": 72.3,
1081
+ "Material": 77.36,
1082
+ "Size": 83.33,
1083
+ "Shape": 71.88,
1084
+ "Color": 97.5,
1085
+ "Action-Overall": 76.52,
1086
+ "Hand": 75.0,
1087
+ "Full body": 77.17,
1088
+ "Animal": 81.62,
1089
+ "Non Contact": 69.9,
1090
+ "Contact": 72.62,
1091
+ "State": 83.02,
1092
+ "Relationship-Overall": 78.83,
1093
+ "Composition": 81.76,
1094
+ "Similarity": 73.89,
1095
+ "Inclusion": 82.78,
1096
+ "Comparison": 73.44,
1097
+ "Compound-Overall": 74.1,
1098
+ "Imagination": 76.02,
1099
+ "Feature matching": 72.14,
1100
+ "Grammar-Overall": 71.6,
1101
+ "Pronoun Reference": 83.46,
1102
+ "Consistency": 75.0,
1103
+ "Negation": 56.92,
1104
+ "Layout-Overall": 83.14,
1105
+ "2D": 85.61,
1106
+ "3D": 80.68,
1107
+ "Logical Reasoning": 55.23,
1108
+ "Text": 50.0
1109
+ },
1110
+ {
 
 
 
 
 
 
 
 
 
1111
  "model": "wan2.2-t2i-plus",
1112
  "link": "https://help.aliyun.com/zh/model-studio/text-to-image-v2-api-reference",
1113
  "hf": "-",
1114
  "open_source": false,
1115
  "release_date": "2025-07",
1116
+ "Overall": 64.71,
1117
+ "Style": 91.2,
1118
+ "World Knowledge": 86.71,
1119
+ "Attribute-Overall": 69.23,
1120
+ "Quantity": 75.0,
 
 
1121
  "Expression": 55.77,
1122
+ "Material": 65.57,
1123
+ "Size": 72.92,
1124
+ "Shape": 61.25,
1125
+ "Color": 92.5,
1126
+ "Action-Overall": 69.14,
1127
+ "Hand": 60.26,
1128
+ "Full body": 78.57,
 
1129
  "Animal": 69.12,
1130
  "Non Contact": 68.88,
1131
+ "Contact": 58.93,
1132
+ "State": 75.94,
1133
+ "Relationship-Overall": 72.77,
 
1134
  "Composition": 70.27,
1135
+ "Similarity": 69.66,
1136
+ "Inclusion": 75.54,
1137
+ "Comparison": 78.91,
1138
+ "Compound-Overall": 60.98,
1139
+ "Imagination": 67.95,
1140
+ "Feature matching": 53.91,
1141
+ "Grammar-Overall": 67.65,
1142
+ "Pronoun Reference": 76.84,
1143
+ "Consistency": 55.56,
1144
+ "Negation": 68.08,
1145
+ "Layout-Overall": 74.44,
1146
+ "2D": 76.84,
 
 
 
1147
  "3D": 71.97,
 
1148
  "Logical Reasoning": 42.05,
1149
+ "Text": 12.93
 
1150
  },
1151
  {
1152
  "model": "FLUX.1-dev",
 
1154
  "hf": "https://huggingface.co/black-forest-labs/FLUX.1-dev",
1155
  "open_source": true,
1156
  "release_date": "2024-08",
1157
+ "Overall": 60.97,
1158
+ "Style": 85.0,
1159
+ "World Knowledge": 87.5,
1160
+ "Attribute-Overall": 67.2,
1161
+ "Quantity": 71.53,
1162
+ "Expression": 51.92,
 
 
1163
  "Material": 58.96,
1164
+ "Size": 74.31,
1165
+ "Shape": 65.62,
1166
+ "Color": 90.0,
1167
+ "Action-Overall": 62.26,
1168
+ "Hand": 50.0,
1169
+ "Full body": 69.02,
1170
+ "Animal": 69.12,
1171
+ "Non Contact": 60.2,
1172
+ "Contact": 61.9,
1173
+ "State": 63.21,
1174
+ "Relationship-Overall": 66.88,
1175
+ "Composition": 66.89,
1176
+ "Similarity": 65.56,
 
 
1177
  "Inclusion": 72.83,
1178
+ "Comparison": 60.16,
1179
+ "Compound-Overall": 45.75,
1180
+ "Imagination": 46.17,
1181
+ "Feature matching": 45.31,
1182
+ "Grammar-Overall": 62.3,
1183
+ "Pronoun Reference": 76.47,
1184
+ "Consistency": 61.57,
1185
+ "Negation": 48.08,
1186
+ "Layout-Overall": 70.9,
1187
+ "2D": 74.63,
1188
+ "3D": 67.05,
1189
+ "Logical Reasoning": 29.77,
 
 
 
 
 
1190
  "Text": 32.18
1191
  },
1192
  {
 
1195
  "hf": "https://huggingface.co/HiDream-ai/HiDream-I1-Full",
1196
  "open_source": true,
1197
  "release_date": "2025-05",
1198
+ "Overall": 71.36,
1199
+ "Style": 92.3,
1200
+ "World Knowledge": 93.67,
1201
+ "Attribute-Overall": 73.4,
 
 
1202
  "Quantity": 73.61,
1203
+ "Expression": 61.54,
1204
  "Material": 72.17,
1205
  "Size": 79.17,
1206
+ "Shape": 62.5,
1207
  "Color": 98.33,
1208
+ "Action-Overall": 72.53,
1209
+ "Hand": 60.9,
 
1210
  "Full body": 76.09,
1211
+ "Animal": 74.26,
1212
+ "Non Contact": 73.98,
1213
+ "Contact": 68.45,
1214
  "State": 78.77,
1215
+ "Relationship-Overall": 74.24,
1216
+ "Composition": 76.69,
1217
+ "Similarity": 67.78,
 
1218
  "Inclusion": 78.26,
1219
+ "Comparison": 71.88,
1220
+ "Compound-Overall": 60.31,
1221
+ "Imagination": 61.99,
1222
+ "Feature matching": 58.59,
1223
+ "Grammar-Overall": 62.43,
1224
+ "Pronoun Reference": 81.62,
1225
+ "Consistency": 63.89,
1226
+ "Negation": 41.15,
1227
+ "Layout-Overall": 77.61,
 
 
 
1228
  "2D": 82.72,
1229
+ "3D": 72.35,
1230
+ "Logical Reasoning": 40.45,
1231
+ "Text": 66.67
 
 
1232
  },
1233
  {
1234
  "model": "Pref-GRPO",
 
1236
  "hf": "https://huggingface.co/CodeGoat24/FLUX.1-dev-PrefGRPO",
1237
  "open_source": true,
1238
  "release_date": "2025-08",
1239
+ "Overall": 68.41,
1240
+ "Style": 87.9,
1241
+ "World Knowledge": 86.08,
1242
+ "Attribute-Overall": 72.86,
1243
+ "Quantity": 68.75,
1244
+ "Expression": 59.62,
1245
+ "Material": 74.06,
1246
+ "Size": 76.39,
1247
+ "Shape": 65.62,
1248
+ "Color": 98.33,
1249
+ "Action-Overall": 69.2,
1250
+ "Hand": 57.69,
1251
+ "Full body": 72.83,
1252
+ "Animal": 75.0,
1253
+ "Non Contact": 70.41,
1254
+ "Contact": 64.29,
1255
+ "State": 73.58,
1256
+ "Relationship-Overall": 76.27,
1257
+ "Composition": 81.08,
 
 
 
 
1258
  "Similarity": 76.67,
1259
+ "Inclusion": 75.54,
1260
  "Comparison": 65.62,
1261
+ "Compound-Overall": 62.89,
1262
+ "Imagination": 66.58,
1263
+ "Feature matching": 59.11,
1264
+ "Grammar-Overall": 62.03,
1265
+ "Pronoun Reference": 77.94,
1266
+ "Consistency": 66.67,
1267
+ "Negation": 41.54,
1268
+ "Layout-Overall": 78.54,
1269
+ "2D": 83.82,
1270
+ "3D": 73.11,
1271
+ "Logical Reasoning": 40.68,
1272
+ "Text": 47.7
 
 
 
 
 
1273
  },
1274
  {
1275
  "model": "SD-3.5-Large",
 
1277
  "hf": "https://huggingface.co/stabilityai/stable-diffusion-3.5-large",
1278
  "open_source": true,
1279
  "release_date": "2024-10",
1280
+ "Overall": 62.89,
1281
+ "Style": 88.6,
1282
+ "World Knowledge": 89.72,
1283
+ "Attribute-Overall": 68.8,
1284
+ "Quantity": 69.44,
1285
+ "Expression": 51.28,
1286
+ "Material": 70.28,
1287
+ "Size": 70.83,
1288
+ "Shape": 64.38,
1289
+ "Color": 91.67,
1290
+ "Action-Overall": 61.98,
1291
+ "Hand": 57.69,
1292
+ "Full body": 63.04,
1293
+ "Animal": 62.5,
1294
+ "Non Contact": 59.69,
1295
+ "Contact": 58.93,
1296
+ "State": 68.4,
1297
+ "Relationship-Overall": 67.51,
1298
+ "Composition": 73.99,
1299
+ "Similarity": 65.0,
1300
+ "Inclusion": 66.3,
1301
+ "Comparison": 57.81,
1302
+ "Compound-Overall": 58.38,
1303
+ "Imagination": 68.37,
1304
+ "Feature matching": 48.18,
1305
+ "Grammar-Overall": 59.89,
1306
+ "Pronoun Reference": 77.21,
1307
+ "Consistency": 60.19,
1308
+ "Negation": 41.54,
1309
+ "Layout-Overall": 67.72,
 
 
 
 
 
 
 
1310
  "2D": 70.96,
1311
+ "3D": 64.39,
1312
+ "Logical Reasoning": 32.05,
1313
+ "Text": 34.2
 
 
1314
  },
1315
  {
1316
  "model": "Janus-Pro",
 
1318
  "hf": "https://huggingface.co/deepseek-ai/Janus-Pro-7B",
1319
  "open_source": true,
1320
  "release_date": "2025-01",
1321
+ "Overall": 61.36,
1322
+ "Style": 90.4,
1323
+ "World Knowledge": 86.55,
1324
+ "Attribute-Overall": 68.59,
 
1325
  "Quantity": 56.25,
1326
+ "Expression": 57.69,
1327
+ "Material": 74.06,
1328
  "Size": 73.61,
1329
  "Shape": 61.88,
1330
  "Color": 90.83,
1331
+ "Action-Overall": 63.88,
1332
+ "Hand": 47.44,
1333
+ "Full body": 65.22,
1334
+ "Animal": 72.79,
1335
+ "Non Contact": 60.71,
1336
+ "Contact": 59.52,
1337
+ "State": 75.47,
1338
+ "Relationship-Overall": 69.54,
 
 
1339
  "Composition": 76.01,
1340
+ "Similarity": 58.33,
1341
+ "Inclusion": 73.91,
1342
+ "Comparison": 64.06,
1343
+ "Compound-Overall": 60.18,
1344
+ "Imagination": 67.35,
1345
+ "Feature matching": 52.86,
1346
+ "Grammar-Overall": 64.04,
1347
+ "Pronoun Reference": 76.1,
1348
+ "Consistency": 64.81,
1349
+ "Negation": 50.77,
1350
+ "Layout-Overall": 72.76,
 
 
 
1351
  "2D": 74.63,
1352
+ "3D": 70.83,
1353
+ "Logical Reasoning": 35.68,
1354
+ "Text": 2.01
 
 
1355
  },
1356
  {
1357
  "model": "Show-o2",
 
1359
  "hf": "https://huggingface.co/showlab/show-o2-7B",
1360
  "open_source": true,
1361
  "release_date": "2025-06",
1362
+ "Overall": 61.9,
1363
+ "Style": 87.4,
1364
+ "World Knowledge": 85.44,
1365
+ "Attribute-Overall": 69.87,
 
1366
  "Quantity": 59.03,
1367
+ "Expression": 64.1,
1368
+ "Material": 70.75,
1369
+ "Size": 74.31,
1370
+ "Shape": 61.25,
1371
+ "Color": 95.0,
1372
+ "Action-Overall": 69.01,
1373
+ "Hand": 54.49,
1374
+ "Full body": 75.0,
1375
+ "Animal": 75.0,
1376
+ "Non Contact": 72.45,
1377
+ "Contact": 50.6,
1378
+ "State": 82.08,
1379
+ "Relationship-Overall": 68.78,
1380
+ "Composition": 76.35,
1381
+ "Similarity": 60.56,
1382
+ "Inclusion": 71.2,
1383
+ "Comparison": 59.38,
1384
+ "Compound-Overall": 63.79,
1385
+ "Imagination": 66.84,
1386
+ "Feature matching": 60.68,
1387
+ "Grammar-Overall": 60.83,
1388
+ "Pronoun Reference": 77.57,
1389
+ "Consistency": 63.43,
1390
+ "Negation": 41.15,
1391
+ "Layout-Overall": 73.13,
1392
+ "2D": 75.37,
1393
+ "3D": 70.83,
1394
+ "Logical Reasoning": 39.55,
 
 
 
 
 
 
 
1395
  "Text": 1.15
1396
  },
1397
  {
 
1400
  "hf": "https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT",
1401
  "open_source": true,
1402
  "release_date": "2025-05",
1403
+ "Overall": 60.66,
1404
+ "Style": 89.3,
1405
+ "World Knowledge": 84.81,
1406
+ "Attribute-Overall": 66.45,
1407
+ "Quantity": 61.11,
1408
+ "Expression": 46.15,
1409
+ "Material": 69.34,
1410
+ "Size": 78.47,
1411
+ "Shape": 55.62,
1412
+ "Color": 94.17,
1413
+ "Action-Overall": 60.74,
1414
+ "Hand": 54.49,
1415
+ "Full body": 59.78,
1416
+ "Animal": 68.38,
1417
+ "Non Contact": 58.16,
1418
+ "Contact": 52.98,
1419
+ "State": 69.81,
1420
+ "Relationship-Overall": 70.18,
1421
+ "Composition": 75.0,
1422
+ "Similarity": 68.33,
1423
+ "Inclusion": 71.2,
1424
+ "Comparison": 60.16,
1425
+ "Compound-Overall": 56.06,
1426
+ "Imagination": 63.52,
1427
+ "Feature matching": 48.44,
1428
+ "Grammar-Overall": 65.78,
1429
+ "Pronoun Reference": 73.53,
1430
+ "Consistency": 66.67,
1431
+ "Negation": 56.92,
1432
+ "Layout-Overall": 74.63,
1433
+ "2D": 77.94,
1434
+ "3D": 71.21,
1435
+ "Logical Reasoning": 30.91,
 
 
 
 
 
 
 
 
1436
  "Text": 7.76
1437
  },
1438
  {
 
1441
  "hf": "https://huggingface.co/BLIP3o/BLIP3o-Model-8B",
1442
  "open_source": true,
1443
  "release_date": "2025-05",
1444
+ "Overall": 59.71,
1445
+ "Style": 91.4,
1446
+ "World Knowledge": 79.59,
1447
+ "Attribute-Overall": 63.68,
1448
+ "Quantity": 53.47,
1449
+ "Expression": 59.62,
1450
+ "Material": 63.21,
1451
+ "Size": 75.0,
1452
+ "Shape": 51.88,
1453
+ "Color": 84.17,
1454
+ "Action-Overall": 63.12,
1455
+ "Hand": 54.49,
1456
+ "Full body": 69.02,
1457
+ "Animal": 68.38,
1458
+ "Non Contact": 59.69,
1459
+ "Contact": 50.6,
1460
+ "State": 74.06,
1461
+ "Relationship-Overall": 67.89,
1462
+ "Composition": 73.99,
1463
+ "Similarity": 62.78,
1464
+ "Inclusion": 65.22,
 
 
 
1465
  "Comparison": 64.84,
1466
+ "Compound-Overall": 54.25,
1467
+ "Imagination": 62.76,
 
1468
  "Feature matching": 45.57,
1469
+ "Grammar-Overall": 67.65,
1470
+ "Pronoun Reference": 78.31,
1471
+ "Consistency": 59.72,
1472
+ "Negation": 63.08,
1473
+ "Layout-Overall": 68.1,
1474
+ "2D": 74.63,
1475
+ "3D": 61.36,
1476
+ "Logical Reasoning": 40.23,
 
 
 
 
1477
  "Text": 1.15
1478
  },
1479
  {
 
1482
  "hf": "https://huggingface.co/zai-org/CogView4-6B",
1483
  "open_source": true,
1484
  "release_date": "2024-03",
1485
+ "Overall": 56.0,
1486
+ "Style": 80.8,
1487
+ "World Knowledge": 81.96,
1488
+ "Attribute-Overall": 63.14,
1489
+ "Quantity": 70.83,
1490
+ "Expression": 46.79,
1491
+ "Material": 55.66,
1492
+ "Size": 68.75,
1493
+ "Shape": 58.75,
1494
+ "Color": 87.5,
1495
+ "Action-Overall": 59.51,
1496
+ "Hand": 57.69,
 
 
1497
  "Full body": 59.78,
1498
+ "Animal": 69.85,
1499
+ "Non Contact": 52.55,
1500
+ "Contact": 53.57,
1501
+ "State": 65.09,
1502
+ "Relationship-Overall": 60.91,
1503
+ "Composition": 58.11,
1504
+ "Similarity": 60.0,
1505
+ "Inclusion": 66.3,
1506
+ "Comparison": 60.94,
1507
+ "Compound-Overall": 44.97,
1508
+ "Imagination": 49.23,
1509
+ "Feature matching": 40.62,
 
 
 
1510
  "Grammar-Overall": 54.81,
1511
  "Pronoun Reference": 69.49,
1512
+ "Consistency": 54.17,
1513
+ "Negation": 40.0,
1514
+ "Layout-Overall": 69.03,
1515
+ "2D": 76.84,
 
1516
  "3D": 60.98,
1517
+ "Logical Reasoning": 27.95,
1518
+ "Text": 16.95
 
 
1519
  },
1520
  {
1521
  "model": "Hunyuan-DiT",
 
1523
  "hf": "https://huggingface.co/Tencent-Hunyuan/HunyuanDiT",
1524
  "open_source": true,
1525
  "release_date": "2024-05",
1526
+ "Overall": 51.07,
1527
+ "Style": 92.9,
1528
+ "World Knowledge": 78.8,
1529
+ "Attribute-Overall": 63.14,
1530
+ "Quantity": 70.14,
1531
+ "Expression": 47.44,
1532
+ "Material": 69.34,
1533
+ "Size": 62.5,
1534
+ "Shape": 46.88,
 
 
 
 
1535
  "Color": 86.67,
1536
+ "Action-Overall": 48.95,
1537
+ "Hand": 35.9,
1538
+ "Full body": 53.8,
1539
+ "Animal": 56.62,
1540
+ "Non Contact": 43.37,
1541
+ "Contact": 37.5,
1542
+ "State": 63.68,
1543
+ "Relationship-Overall": 59.52,
1544
+ "Composition": 61.49,
1545
+ "Similarity": 62.78,
1546
+ "Inclusion": 59.24,
 
 
1547
  "Comparison": 50.78,
1548
+ "Compound-Overall": 41.11,
1549
+ "Imagination": 48.21,
1550
+ "Feature matching": 33.85,
1551
+ "Grammar-Overall": 56.28,
1552
+ "Pronoun Reference": 66.18,
1553
+ "Consistency": 56.94,
1554
+ "Negation": 45.38,
1555
+ "Layout-Overall": 45.15,
1556
+ "2D": 38.97,
1557
+ "3D": 51.52,
1558
+ "Logical Reasoning": 23.41,
1559
+ "Text": 1.44
 
 
 
 
 
1560
  },
1561
  {
1562
  "model": "Janus",
 
1564
  "hf": "https://huggingface.co/deepseek-ai/Janus-1.3B",
1565
  "open_source": true,
1566
  "release_date": "2024-10",
1567
+ "Overall": 50.4,
1568
+ "Style": 90.1,
1569
+ "World Knowledge": 70.41,
1570
+ "Attribute-Overall": 54.7,
1571
+ "Quantity": 35.42,
1572
+ "Expression": 38.46,
1573
+ "Material": 61.79,
1574
+ "Size": 66.67,
1575
+ "Shape": 42.5,
1576
+ "Color": 88.33,
1577
+ "Action-Overall": 49.43,
1578
+ "Hand": 30.77,
 
 
 
 
 
1579
  "Full body": 51.63,
1580
+ "Animal": 62.5,
1581
+ "Non Contact": 44.9,
1582
+ "Contact": 35.71,
1583
+ "State": 67.92,
1584
+ "Relationship-Overall": 53.3,
1585
+ "Composition": 52.7,
1586
+ "Similarity": 51.67,
1587
+ "Inclusion": 58.15,
1588
+ "Comparison": 50.0,
1589
+ "Compound-Overall": 46.26,
1590
+ "Imagination": 56.89,
1591
+ "Feature matching": 35.42,
1592
+ "Grammar-Overall": 57.62,
1593
+ "Pronoun Reference": 65.07,
1594
+ "Consistency": 49.07,
1595
+ "Negation": 56.92,
1596
+ "Layout-Overall": 54.66,
1597
+ "2D": 55.51,
1598
+ "3D": 53.79,
1599
+ "Logical Reasoning": 26.36,
 
 
 
 
 
 
1600
  "Text": 1.15
1601
  },
1602
  {
 
1605
  "hf": "https://huggingface.co/deepseek-ai/JanusFlow-1.3B",
1606
  "open_source": true,
1607
  "release_date": "2024-11",
1608
+ "Overall": 45.15,
1609
+ "Style": 84.6,
1610
+ "World Knowledge": 59.65,
1611
+ "Attribute-Overall": 47.44,
1612
+ "Quantity": 44.44,
1613
+ "Expression": 31.41,
1614
+ "Material": 51.42,
 
 
 
 
1615
  "Size": 55.56,
1616
+ "Shape": 31.25,
1617
+ "Color": 76.67,
1618
+ "Action-Overall": 42.21,
1619
+ "Hand": 22.44,
1620
+ "Full body": 47.28,
1621
+ "Animal": 52.94,
1622
+ "Non Contact": 34.69,
1623
+ "Contact": 32.14,
1624
+ "State": 60.38,
1625
+ "Relationship-Overall": 48.48,
1626
+ "Composition": 56.08,
1627
+ "Similarity": 42.78,
1628
+ "Inclusion": 48.37,
1629
+ "Comparison": 39.06,
1630
+ "Compound-Overall": 43.3,
1631
+ "Imagination": 56.38,
1632
+ "Feature matching": 29.95,
1633
+ "Grammar-Overall": 58.82,
1634
+ "Pronoun Reference": 66.91,
1635
+ "Consistency": 47.69,
1636
+ "Negation": 59.62,
1637
+ "Layout-Overall": 43.84,
1638
+ "2D": 47.43,
1639
+ "3D": 40.15,
1640
+ "Logical Reasoning": 22.05,
1641
+ "Text": 1.15
 
 
 
 
 
 
 
1642
  },
1643
  {
1644
  "model": "Emu3",
 
1646
  "hf": "https://huggingface.co/BAAI/Emu3-Gen",
1647
  "open_source": true,
1648
  "release_date": "2024-09",
1649
+ "Overall": 45.42,
1650
+ "Style": 87.5,
1651
+ "World Knowledge": 76.42,
1652
+ "Attribute-Overall": 50.11,
1653
+ "Quantity": 42.36,
 
 
 
 
1654
  "Expression": 45.51,
1655
+ "Material": 52.83,
1656
+ "Size": 40.28,
1657
  "Shape": 46.25,
1658
+ "Color": 77.5,
1659
+ "Action-Overall": 40.4,
1660
+ "Hand": 23.08,
1661
+ "Full body": 49.46,
1662
+ "Animal": 54.41,
1663
+ "Non Contact": 34.69,
1664
+ "Contact": 29.17,
1665
+ "State": 50.47,
1666
+ "Relationship-Overall": 48.6,
1667
+ "Composition": 55.41,
1668
+ "Similarity": 44.44,
1669
+ "Inclusion": 46.74,
1670
+ "Comparison": 41.41,
1671
+ "Compound-Overall": 36.21,
 
 
 
1672
  "Imagination": 41.33,
1673
+ "Feature matching": 30.99,
1674
+ "Grammar-Overall": 50.67,
1675
+ "Pronoun Reference": 58.09,
1676
+ "Consistency": 49.07,
1677
+ "Negation": 44.23,
1678
+ "Layout-Overall": 43.84,
1679
+ "2D": 42.28,
1680
+ "3D": 45.45,
 
 
 
1681
  "Logical Reasoning": 19.32,
 
1682
  "Text": 1.15
1683
  },
1684
  {
 
1687
  "hf": "https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic",
1688
  "open_source": true,
1689
  "release_date": "2024-02",
1690
+ "Overall": 44.74,
1691
+ "Style": 89.2,
1692
+ "World Knowledge": 74.84,
1693
+ "Attribute-Overall": 52.14,
1694
+ "Quantity": 60.42,
1695
+ "Expression": 44.23,
1696
+ "Material": 54.72,
1697
+ "Size": 47.22,
1698
+ "Shape": 38.75,
1699
+ "Color": 71.67,
1700
+ "Action-Overall": 41.63,
 
 
 
 
 
1701
  "Hand": 28.85,
1702
+ "Full body": 50.54,
1703
+ "Animal": 50.74,
1704
+ "Non Contact": 33.16,
1705
+ "Contact": 27.38,
1706
+ "State": 56.6,
1707
+ "Relationship-Overall": 51.65,
1708
+ "Composition": 61.15,
1709
+ "Similarity": 50.56,
1710
+ "Inclusion": 45.65,
1711
+ "Comparison": 39.84,
1712
+ "Compound-Overall": 35.7,
1713
+ "Imagination": 44.64,
1714
+ "Feature matching": 26.56,
1715
+ "Grammar-Overall": 51.2,
 
 
 
1716
  "Pronoun Reference": 58.82,
1717
+ "Consistency": 47.22,
1718
+ "Negation": 46.54,
1719
+ "Layout-Overall": 34.89,
1720
+ "2D": 30.51,
1721
+ "3D": 39.39,
1722
+ "Logical Reasoning": 15.0,
 
 
 
1723
  "Text": 1.15
1724
  },
1725
  {
 
1728
  "hf": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
1729
  "open_source": true,
1730
  "release_date": "2023-07",
1731
+ "Overall": 39.61,
1732
+ "Style": 86.5,
1733
+ "World Knowledge": 72.94,
1734
+ "Attribute-Overall": 42.52,
1735
+ "Quantity": 42.36,
1736
+ "Expression": 25.0,
1737
+ "Material": 50.47,
1738
+ "Size": 41.67,
1739
+ "Shape": 31.87,
1740
+ "Color": 66.67,
1741
+ "Action-Overall": 33.46,
1742
+ "Hand": 19.87,
1743
+ "Full body": 36.96,
1744
+ "Animal": 41.91,
1745
+ "Non Contact": 26.02,
1746
+ "Contact": 21.43,
1747
+ "State": 51.42,
1748
+ "Relationship-Overall": 44.04,
 
 
 
 
 
 
1749
  "Composition": 53.72,
1750
+ "Similarity": 37.22,
1751
+ "Inclusion": 38.04,
1752
+ "Comparison": 39.84,
1753
+ "Compound-Overall": 25.0,
1754
+ "Imagination": 32.91,
1755
+ "Feature matching": 16.93,
1756
+ "Grammar-Overall": 48.13,
1757
+ "Pronoun Reference": 51.84,
 
 
1758
  "Consistency": 42.59,
1759
+ "Negation": 48.85,
 
1760
  "Layout-Overall": 29.85,
1761
+ "2D": 28.31,
1762
+ "3D": 31.44,
1763
+ "Logical Reasoning": 12.27,
1764
+ "Text": 1.44
 
 
1765
  },
1766
  {
1767
  "model": "GPT-4o",
 
1769
  "hf": "-",
1770
  "open_source": false,
1771
  "release_date": "2025-03",
1772
+ "Overall": 92.48,
1773
+ "Style": 98.98,
1774
+ "World Knowledge": 98.22,
1775
+ "Attribute-Overall": 94.01,
1776
+ "Quantity": 89.29,
1777
+ "Expression": 96.0,
1778
+ "Material": 94.66,
1779
+ "Size": 92.96,
1780
+ "Shape": 92.5,
 
 
 
 
1781
  "Color": 99.17,
1782
+ "Action-Overall": 90.78,
1783
+ "Hand": 88.46,
1784
+ "Full body": 93.33,
1785
+ "Animal": 87.88,
1786
+ "Non Contact": 92.02,
1787
+ "Contact": 89.16,
1788
+ "State": 92.31,
1789
+ "Relationship-Overall": 94.33,
1790
+ "Composition": 96.58,
1791
+ "Similarity": 91.11,
 
 
1792
  "Inclusion": 94.89,
1793
+ "Comparison": 92.97,
1794
+ "Compound-Overall": 92.89,
1795
+ "Imagination": 94.07,
1796
+ "Feature matching": 91.67,
1797
+ "Grammar-Overall": 91.21,
1798
+ "Pronoun Reference": 91.04,
1799
+ "Consistency": 93.06,
1800
+ "Negation": 89.75,
 
 
 
1801
  "Layout-Overall": 91.35,
1802
+ "2D": 92.16,
1803
+ "3D": 90.53,
1804
+ "Logical Reasoning": 83.79,
 
 
1805
  "Text": 89.24
1806
  },
1807
  {
 
1810
  "hf": "-",
1811
  "open_source": false,
1812
  "release_date": "2025-06",
1813
+ "Overall": 91.65,
1814
+ "Style": 99.1,
1815
+ "World Knowledge": 97.78,
1816
+ "Attribute-Overall": 92.09,
1817
+ "Quantity": 94.44,
1818
+ "Expression": 80.77,
1819
+ "Material": 95.28,
1820
+ "Size": 94.44,
1821
+ "Shape": 88.75,
 
 
 
 
1822
  "Color": 100.0,
1823
+ "Action-Overall": 92.1,
1824
+ "Hand": 89.74,
1825
+ "Full body": 93.41,
1826
+ "Animal": 93.38,
1827
+ "Non Contact": 88.78,
1828
+ "Contact": 87.5,
1829
+ "State": 98.58,
1830
+ "Relationship-Overall": 93.53,
1831
+ "Composition": 96.28,
1832
+ "Similarity": 87.78,
1833
+ "Inclusion": 96.2,
1834
+ "Comparison": 91.41,
 
 
 
1835
  "Compound-Overall": 91.37,
1836
  "Imagination": 92.86,
1837
  "Feature matching": 89.84,
1838
+ "Grammar-Overall": 87.83,
1839
+ "Pronoun Reference": 91.91,
1840
+ "Consistency": 90.28,
1841
+ "Negation": 81.54,
1842
+ "Layout-Overall": 92.91,
1843
+ "2D": 93.75,
1844
+ "3D": 92.05,
1845
+ "Logical Reasoning": 80.45,
1846
+ "Text": 89.37
 
 
 
 
1847
  },
1848
  {
1849
  "model": "Seedream-3.0",
 
1851
  "hf": "-",
1852
  "open_source": false,
1853
  "release_date": "2025-06",
1854
+ "Overall": 78.41,
1855
+ "Style": 98.19,
1856
+ "World Knowledge": 94.9,
1857
+ "Attribute-Overall": 84.62,
1858
+ "Quantity": 79.02,
1859
+ "Expression": 81.94,
1860
+ "Material": 89.62,
1861
+ "Size": 83.8,
1862
+ "Shape": 77.22,
1863
+ "Color": 96.67,
1864
+ "Action-Overall": 83.14,
1865
+ "Hand": 75.97,
1866
+ "Full body": 89.56,
1867
+ "Animal": 86.03,
1868
+ "Non Contact": 75.38,
1869
+ "Contact": 81.93,
1870
+ "State": 89.1,
1871
+ "Relationship-Overall": 80.18,
1872
+ "Composition": 81.57,
1873
+ "Similarity": 74.16,
1874
+ "Inclusion": 83.61,
1875
+ "Comparison": 80.47,
1876
+ "Compound-Overall": 72.32,
1877
+ "Imagination": 76.92,
1878
+ "Feature matching": 67.62,
1879
+ "Grammar-Overall": 60.3,
1880
+ "Pronoun Reference": 77.94,
1881
+ "Consistency": 68.4,
1882
+ "Negation": 35.14,
1883
+ "Layout-Overall": 88.74,
1884
+ "2D": 88.15,
1885
+ "3D": 89.35,
1886
+ "Logical Reasoning": 51.83,
1887
+ "Text": 69.86
 
 
 
 
 
 
 
 
 
 
 
1888
  },
1889
  {
1890
  "model": "Imagen-3.0-generate-002",
 
1892
  "hf": "-",
1893
  "open_source": false,
1894
  "release_date": "2025-02",
1895
+ "Overall": 71.34,
1896
+ "Style": 89.35,
1897
+ "World Knowledge": 93.95,
1898
+ "Attribute-Overall": 77.92,
1899
+ "Quantity": 71.09,
1900
+ "Expression": 64.0,
1901
+ "Material": 85.85,
1902
+ "Size": 89.78,
1903
+ "Shape": 64.38,
1904
+ "Color": 93.28,
1905
+ "Action-Overall": 78.8,
1906
+ "Hand": 75.0,
 
 
 
 
 
1907
  "Full body": 83.89,
1908
+ "Animal": 80.15,
1909
+ "Non Contact": 75.65,
1910
+ "Contact": 71.43,
1911
+ "State": 85.29,
1912
+ "Relationship-Overall": 82.75,
1913
+ "Composition": 83.22,
1914
+ "Similarity": 76.14,
1915
+ "Inclusion": 88.27,
1916
+ "Comparison": 83.06,
1917
+ "Compound-Overall": 72.81,
1918
+ "Imagination": 80.36,
1919
+ "Feature matching": 65.1,
1920
+ "Grammar-Overall": 69.97,
1921
+ "Pronoun Reference": 80.88,
1922
+ "Consistency": 70.28,
1923
+ "Negation": 57.94,
1924
+ "Layout-Overall": 80.04,
1925
+ "2D": 82.35,
1926
+ "3D": 77.65,
1927
+ "Logical Reasoning": 45.09,
1928
+ "Text": 22.7
 
 
 
 
 
 
1929
  },
1930
  {
1931
  "model": "DALL-E-3",
 
1933
  "hf": "-",
1934
  "open_source": false,
1935
  "release_date": "2023-09",
1936
+ "Overall": 68.85,
1937
+ "Style": 94.43,
1938
+ "World Knowledge": 92.64,
1939
+ "Attribute-Overall": 75.76,
1940
+ "Quantity": 60.14,
1941
+ "Expression": 63.16,
1942
+ "Material": 87.2,
1943
+ "Size": 84.72,
1944
+ "Shape": 66.25,
1945
+ "Color": 91.6,
1946
+ "Action-Overall": 70.78,
1947
+ "Hand": 60.78,
1948
+ "Full body": 76.67,
1949
+ "Animal": 77.94,
1950
+ "Non Contact": 68.72,
1951
+ "Contact": 63.19,
1952
+ "State": 76.19,
1953
+ "Relationship-Overall": 78.31,
1954
+ "Composition": 82.99,
1955
+ "Similarity": 71.51,
1956
+ "Inclusion": 85.47,
1957
+ "Comparison": 66.93,
1958
+ "Compound-Overall": 71.08,
1959
+ "Imagination": 78.01,
1960
+ "Feature matching": 63.95,
1961
+ "Grammar-Overall": 69.22,
1962
+ "Pronoun Reference": 76.34,
1963
+ "Consistency": 72.09,
1964
+ "Negation": 59.45,
1965
+ "Layout-Overall": 65.65,
1966
+ "2D": 54.78,
1967
+ "3D": 77.25,
1968
+ "Logical Reasoning": 46.22,
1969
+ "Text": 24.43
 
 
 
 
 
 
 
 
 
 
 
1970
  },
1971
  {
1972
  "model": "FLUX-pro-1.1-Ultra",
 
1974
  "hf": "-",
1975
  "open_source": false,
1976
  "release_date": "2024-11",
1977
+ "Overall": 70.46,
1978
+ "Style": 90.99,
1979
+ "World Knowledge": 91.3,
1980
+ "Attribute-Overall": 76.79,
1981
+ "Quantity": 72.92,
1982
+ "Expression": 60.65,
1983
+ "Material": 79.25,
1984
+ "Size": 75.0,
1985
+ "Shape": 78.12,
1986
+ "Color": 98.33,
1987
+ "Action-Overall": 71.39,
1988
+ "Hand": 58.97,
1989
+ "Full body": 69.02,
1990
+ "Animal": 76.47,
1991
+ "Non Contact": 78.06,
1992
+ "Contact": 65.48,
1993
+ "State": 77.83,
1994
+ "Relationship-Overall": 78.05,
1995
+ "Composition": 81.08,
1996
+ "Similarity": 74.44,
1997
+ "Inclusion": 80.98,
1998
+ "Comparison": 71.88,
1999
+ "Compound-Overall": 68.17,
2000
+ "Imagination": 77.3,
2001
+ "Feature matching": 58.85,
2002
+ "Grammar-Overall": 68.18,
2003
+ "Pronoun Reference": 83.46,
2004
+ "Consistency": 65.74,
2005
+ "Negation": 54.23,
2006
+ "Layout-Overall": 80.6,
2007
+ "2D": 81.25,
2008
+ "3D": 79.92,
2009
+ "Logical Reasoning": 41.46,
2010
+ "Text": 37.64
 
 
 
 
 
 
 
 
 
 
 
2011
  },
2012
  {
2013
  "model": "Keling-Ketu",
 
2015
  "hf": "-",
2016
  "open_source": false,
2017
  "release_date": "2025-04",
2018
+ "Overall": 65.23,
2019
+ "Style": 92.25,
2020
+ "World Knowledge": 87.08,
2021
+ "Attribute-Overall": 70.81,
2022
+ "Quantity": 74.29,
2023
+ "Expression": 56.77,
2024
+ "Material": 78.67,
2025
+ "Size": 74.83,
2026
+ "Shape": 53.75,
2027
+ "Color": 89.66,
2028
+ "Action-Overall": 67.81,
2029
+ "Hand": 53.85,
2030
+ "Full body": 72.28,
2031
+ "Animal": 71.32,
2032
+ "Non Contact": 70.77,
2033
+ "Contact": 59.28,
2034
+ "State": 75.94,
2035
+ "Relationship-Overall": 69.52,
2036
+ "Composition": 68.14,
2037
+ "Similarity": 69.27,
2038
+ "Inclusion": 72.13,
2039
+ "Comparison": 69.29,
2040
+ "Compound-Overall": 59.66,
2041
+ "Imagination": 66.15,
2042
+ "Feature matching": 53.03,
2043
+ "Grammar-Overall": 68.99,
2044
+ "Pronoun Reference": 74.91,
2045
+ "Consistency": 64.19,
2046
+ "Negation": 66.8,
2047
+ "Layout-Overall": 74.57,
2048
+ "2D": 77.61,
2049
+ "3D": 71.43,
2050
+ "Logical Reasoning": 45.6,
 
 
 
 
 
 
 
 
 
 
 
2051
  "Text": 16.03
2052
  },
2053
  {
 
2056
  "hf": "https://huggingface.co/Qwen/Qwen-Image",
2057
  "open_source": true,
2058
  "release_date": "2025-08",
2059
+ "Overall": 78.36,
2060
+ "Style": 94.7,
2061
+ "World Knowledge": 94.15,
2062
+ "Attribute-Overall": 87.93,
2063
+ "Quantity": 84.03,
2064
+ "Expression": 85.26,
 
 
 
 
2065
  "Material": 91.98,
2066
+ "Size": 86.11,
2067
+ "Shape": 81.88,
2068
  "Color": 99.17,
2069
+ "Action-Overall": 82.6,
2070
+ "Hand": 78.21,
2071
+ "Full body": 86.96,
2072
+ "Animal": 86.76,
2073
+ "Non Contact": 77.55,
2074
+ "Contact": 76.79,
2075
+ "State": 88.68,
2076
+ "Relationship-Overall": 80.08,
2077
+ "Composition": 82.09,
2078
+ "Similarity": 71.11,
 
 
2079
  "Inclusion": 86.96,
2080
+ "Comparison": 78.12,
2081
+ "Compound-Overall": 72.94,
 
2082
  "Imagination": 73.21,
2083
+ "Feature matching": 72.66,
2084
+ "Grammar-Overall": 60.96,
2085
+ "Pronoun Reference": 84.93,
 
2086
  "Consistency": 70.37,
2087
+ "Negation": 28.08,
2088
+ "Layout-Overall": 86.57,
2089
+ "2D": 87.13,
2090
+ "3D": 85.98,
2091
+ "Logical Reasoning": 51.59,
2092
+ "Text": 72.13
 
 
 
2093
  }
2094
  ]
2095
+ }