CodeGoat24 commited on
Commit
f316e01
·
verified ·
1 Parent(s): 687c628

Update leaderboard_data.json

Browse files
Files changed (1) hide show
  1. leaderboard_data.json +209 -662
leaderboard_data.json CHANGED
@@ -6,11 +6,9 @@
6
  "hf": "https://huggingface.co/black-forest-labs/FLUX.2-dev",
7
  "open_source": true,
8
  "release_date": "2025-11",
9
-
10
  "Overall": 90.31,
11
  "Style": 99.17,
12
  "World Knowledge": 96.39,
13
-
14
  "Attribute-Overall": 94.57,
15
  "Quantity": 82.98,
16
  "Expression": 88.47,
@@ -18,36 +16,29 @@
18
  "Size": 92.42,
19
  "Shape": 91.43,
20
  "Color": 98.69,
21
-
22
  "Action-Overall": 86.17,
23
  "Hand": 84.94,
24
  "Full body": 85.94,
25
  "Animal": 85.51,
26
  "Non Contact": 86.16,
27
  "Contact": 82.47,
28
- "State": 88.10,
29
-
30
- "Relationship-Overall": 91.70,
31
  "Composition": 91.33,
32
  "Similarity": 89.42,
33
  "Inclusion": 95.69,
34
  "Comparison": 90.78,
35
-
36
  "Compound-Overall": 90.16,
37
  "Imagination": 89.94,
38
  "Feature matching": 90.65,
39
-
40
  "Grammar-Overall": 84.52,
41
  "Pronoun Reference": 94.84,
42
  "Consistency": 82.94,
43
  "Negation": 76.76,
44
-
45
  "Layout-Overall": 92.22,
46
  "2D": 92.94,
47
- "3D": 91.30,
48
-
49
- "Logical Reasoning": 79.90,
50
-
51
  "Text": 88.32
52
  },
53
  {
@@ -56,21 +47,16 @@
56
  "hf": "-",
57
  "open_source": false,
58
  "release_date": "2025-11",
59
-
60
- "Overall": 94.20,
61
-
62
  "Style": 99.58,
63
-
64
  "World Knowledge": 97.83,
65
-
66
  "Attribute-Overall": 95.94,
67
  "Quantity": 89.36,
68
  "Expression": 90.69,
69
  "Material": 97.52,
70
  "Size": 96.97,
71
  "Shape": 91.43,
72
- "Color": 95.53,
73
-
74
  "Action-Overall": 89.19,
75
  "Hand": 86.22,
76
  "Full body": 89.69,
@@ -78,28 +64,22 @@
78
  "Non Contact": 89.29,
79
  "Contact": 89.37,
80
  "State": 89.38,
81
-
82
  "Relationship-Overall": 94.29,
83
  "Composition": 94.39,
84
  "Similarity": 91.99,
85
  "Inclusion": 98.28,
86
  "Comparison": 92.48,
87
-
88
- "Compound-Overall": 94.10,
89
  "Imagination": 94.92,
90
  "Feature matching": 92.29,
91
-
92
  "Grammar-Overall": 93.15,
93
- "Pronoun Reference": 99.60,
94
  "Consistency": 90.08,
95
  "Negation": 90.14,
96
-
97
  "Layout-Overall": 93.73,
98
  "2D": 93.79,
99
  "3D": 93.66,
100
-
101
  "Logical Reasoning": 87.75,
102
-
103
  "Text": 96.47
104
  },
105
  {
@@ -108,48 +88,39 @@
108
  "hf": "https://huggingface.co/Tongyi-MAI/Z-Image-Turbo",
109
  "open_source": true,
110
  "release_date": "2025-11",
111
-
112
- "Overall": 80.71,
113
  "Style": 93.19,
114
  "World Knowledge": 93.93,
115
-
116
  "Attribute-Overall": 89.34,
117
  "Quantity": 82.98,
118
  "Expression": 76.11,
119
  "Material": 91.72,
120
- "Size": 87.50,
121
  "Shape": 80.77,
122
  "Color": 96.38,
123
-
124
- "Action-Overall": 74.20,
125
  "Hand": 75.64,
126
  "Full body": 74.06,
127
  "Animal": 71.01,
128
  "Non Contact": 71.43,
129
  "Contact": 66.38,
130
  "State": 77.98,
131
-
132
  "Relationship-Overall": 80.44,
133
  "Composition": 78.32,
134
  "Similarity": 73.08,
135
  "Inclusion": 87.93,
136
  "Comparison": 83.74,
137
-
138
  "Compound-Overall": 76.46,
139
  "Imagination": 77.75,
140
- "Feature matching": 73.60,
141
-
142
  "Grammar-Overall": 76.65,
143
  "Pronoun Reference": 91.27,
144
  "Consistency": 69.84,
145
  "Negation": 69.72,
146
-
147
  "Layout-Overall": 86.67,
148
  "2D": 87.71,
149
  "3D": 85.33,
150
-
151
  "Logical Reasoning": 66.18,
152
-
153
  "Text": 70.11
154
  },
155
  {
@@ -158,63 +129,50 @@
158
  "hf": "-",
159
  "open_source": false,
160
  "release_date": "2025-09",
161
-
162
- "Overall": 84.34,
163
- "Style": 96.75,
164
- "World Knowledge": 95.52,
165
-
166
- "Attribute-Overall": 91.40,
167
  "Quantity": 85.64,
168
- "Expression": 81.01,
169
- "Material": 94.03,
170
- "Size": 88.17,
171
- "Shape": 87.50,
172
  "Color": 96.11,
173
-
174
- "Action-Overall": 77.55,
175
- "Hand": 73.08,
176
  "Full body": 82.91,
177
- "Animal": 77.21,
178
- "Non Contact": 71.76,
179
- "Contact": 69.83,
180
- "State": 81.27,
181
-
182
- "Relationship-Overall": 86.96,
183
- "Composition": 85.26,
184
- "Similarity": 81.41,
185
- "Inclusion": 94.48,
186
- "Comparison": 88.11,
187
-
188
- "Compound-Overall": 85.60,
189
- "Imagination": 87.55,
190
- "Feature matching": 81.31,
191
-
192
- "Grammar-Overall": 78.06,
193
- "Pronoun Reference": 92.86,
194
- "Consistency": 77.42,
195
  "Negation": 65.49,
196
-
197
- "Layout-Overall": 87.18,
198
- "2D": 88.28,
199
  "3D": 85.77,
200
-
201
- "Logical Reasoning": 71.32,
202
-
203
- "Text": 73.10
204
  },
205
- {
206
  "model": "Emu3",
207
  "link": "https://arxiv.org/pdf/2409.18869",
208
  "hf": "https://huggingface.co/BAAI/Emu3-Gen",
209
  "open_source": true,
210
  "release_date": "2024-09",
211
-
212
  "Overall": 50.95,
213
-
214
  "Style": 89.36,
215
-
216
  "World Knowledge": 76.16,
217
-
218
  "Attribute-Overall": 66.81,
219
  "Quantity": 44.68,
220
  "Expression": 48.47,
@@ -222,49 +180,40 @@
222
  "Size": 73.24,
223
  "Shape": 54.29,
224
  "Color": 76.61,
225
-
226
- "Action-Overall": 43.80,
227
  "Hand": 28.85,
228
  "Full body": 46.25,
229
  "Animal": 43.48,
230
  "Non Contact": 30.49,
231
  "Contact": 25.57,
232
  "State": 56.92,
233
-
234
- "Relationship-Overall": 51.70,
235
  "Composition": 53.77,
236
  "Similarity": 42.31,
237
  "Inclusion": 59.48,
238
- "Comparison": 53.77,
239
-
240
- "Compound-Overall": 46.00,
241
  "Imagination": 51.69,
242
  "Feature matching": 33.41,
243
-
244
  "Grammar-Overall": 50.25,
245
  "Pronoun Reference": 55.95,
246
  "Consistency": 42.46,
247
  "Negation": 52.11,
248
-
249
  "Layout-Overall": 56.67,
250
  "2D": 56.36,
251
  "3D": 57.07,
252
-
253
  "Logical Reasoning": 27.43,
254
-
255
  "Text": 1.36
256
  },
257
- {
258
  "model": "UniWorld-V1",
259
  "link": "https://arxiv.org/pdf/2506.03147",
260
  "hf": "https://huggingface.co/LanguageBind/UniWorld-V1",
261
  "open_source": true,
262
  "release_date": "2025-06",
263
-
264
- "Overall": 69.60,
265
  "Style": 93.19,
266
- "World Knowledge": 84.10,
267
-
268
  "Attribute-Overall": 79.94,
269
  "Quantity": 66.49,
270
  "Expression": 72.64,
@@ -272,7 +221,6 @@
272
  "Size": 81.06,
273
  "Shape": 72.38,
274
  "Color": 87.95,
275
-
276
  "Action-Overall": 65.81,
277
  "Hand": 63.78,
278
  "Full body": 64.38,
@@ -280,49 +228,40 @@
280
  "Non Contact": 62.95,
281
  "Contact": 55.17,
282
  "State": 70.85,
283
-
284
  "Relationship-Overall": 68.91,
285
  "Composition": 66.96,
286
  "Similarity": 67.31,
287
  "Inclusion": 72.99,
288
  "Comparison": 70.39,
289
-
290
  "Compound-Overall": 71.37,
291
  "Imagination": 74.16,
292
  "Feature matching": 65.19,
293
-
294
  "Grammar-Overall": 75.13,
295
  "Pronoun Reference": 84.13,
296
  "Consistency": 69.44,
297
  "Negation": 72.18,
298
-
299
- "Layout-Overall": 79.60,
300
  "2D": 83.33,
301
  "3D": 74.82,
302
-
303
  "Logical Reasoning": 57.04,
304
-
305
  "Text": 20.92
306
  },
307
- {
308
  "model": "Echo-4o",
309
  "link": "https://arxiv.org/pdf/2508.09987",
310
  "hf": "https://huggingface.co/Yejy53/Echo-4o",
311
  "open_source": true,
312
  "release_date": "2025-8",
313
-
314
  "Overall": 76.41,
315
- "Style": 96.10,
316
  "World Knowledge": 90.17,
317
-
318
  "Attribute-Overall": 90.24,
319
- "Quantity": 73.40,
320
  "Expression": 82.08,
321
  "Material": 92.39,
322
- "Size": 89.20,
323
  "Shape": 84.44,
324
  "Color": 95.49,
325
-
326
  "Action-Overall": 73.56,
327
  "Hand": 72.12,
328
  "Full body": 76.56,
@@ -330,49 +269,40 @@
330
  "Non Contact": 66.96,
331
  "Contact": 65.23,
332
  "State": 77.47,
333
-
334
  "Relationship-Overall": 82.81,
335
- "Composition": 83.80,
336
  "Similarity": 78.21,
337
  "Inclusion": 84.77,
338
  "Comparison": 82.77,
339
-
340
  "Compound-Overall": 84.88,
341
  "Imagination": 85.44,
342
  "Feature matching": 83.64,
343
-
344
  "Grammar-Overall": 82.36,
345
  "Pronoun Reference": 86.11,
346
  "Consistency": 83.33,
347
  "Negation": 78.17,
348
-
349
  "Layout-Overall": 86.43,
350
- "2D": 88.70,
351
  "3D": 83.51,
352
-
353
  "Logical Reasoning": 69.42,
354
-
355
  "Text": 8.15
356
- },
357
- {
358
  "model": "SD-3.5-Medium",
359
  "link": "https://stability.ai/news/introducing-stable-diffusion-3-5",
360
  "hf": "stabilityai/stable-diffusion-3.5-medium",
361
  "open_source": true,
362
  "release_date": "2024-10",
363
-
364
  "Overall": 64.67,
365
  "Style": 92.19,
366
  "World Knowledge": 86.56,
367
-
368
  "Attribute-Overall": 80.24,
369
- "Quantity": 61.70,
370
  "Expression": 62.64,
371
  "Material": 83.73,
372
  "Size": 82.01,
373
- "Shape": 73.60,
374
  "Color": 87.79,
375
-
376
  "Action-Overall": 58.59,
377
  "Hand": 58.01,
378
  "Full body": 56.56,
@@ -380,28 +310,22 @@
380
  "Non Contact": 42.86,
381
  "Contact": 46.55,
382
  "State": 68.18,
383
-
384
  "Relationship-Overall": 69.88,
385
  "Composition": 70.15,
386
  "Similarity": 62.82,
387
  "Inclusion": 75.86,
388
  "Comparison": 69.66,
389
-
390
  "Compound-Overall": 62.86,
391
  "Imagination": 65.61,
392
  "Feature matching": 56.78,
393
-
394
  "Grammar-Overall": 65.86,
395
  "Pronoun Reference": 79.37,
396
  "Consistency": 61.11,
397
- "Negation": 58.10,
398
-
399
  "Layout-Overall": 73.25,
400
  "2D": 73.59,
401
  "3D": 72.83,
402
-
403
  "Logical Reasoning": 45.87,
404
-
405
  "Text": 11.41
406
  },
407
  {
@@ -410,19 +334,16 @@
410
  "hf": "https://huggingface.co/Alpha-VLLM/Lumina-DiMOO",
411
  "open_source": true,
412
  "release_date": "2025-09",
413
-
414
  "Overall": 71.81,
415
  "Style": 86.88,
416
  "World Knowledge": 88.58,
417
-
418
  "Attribute-Overall": 83.71,
419
  "Quantity": 74.47,
420
  "Expression": 76.11,
421
- "Material": 80.80,
422
  "Size": 84.47,
423
  "Shape": 78.67,
424
  "Color": 90.83,
425
-
426
  "Action-Overall": 69.66,
427
  "Hand": 67.63,
428
  "Full body": 71.56,
@@ -430,28 +351,22 @@
430
  "Non Contact": 65.18,
431
  "Contact": 57.18,
432
  "State": 74.21,
433
-
434
  "Relationship-Overall": 73.33,
435
  "Composition": 69.77,
436
  "Similarity": 72.76,
437
  "Inclusion": 82.18,
438
  "Comparison": 73.06,
439
-
440
  "Compound-Overall": 74.93,
441
- "Imagination": 77.00,
442
  "Feature matching": 70.33,
443
-
444
  "Grammar-Overall": 74.49,
445
  "Pronoun Reference": 89.68,
446
  "Consistency": 66.67,
447
  "Negation": 67.96,
448
-
449
  "Layout-Overall": 84.84,
450
  "2D": 90.11,
451
  "3D": 78.08,
452
-
453
  "Logical Reasoning": 58.01,
454
-
455
  "Text": 23.64
456
  },
457
  {
@@ -460,19 +375,16 @@
460
  "hf": "https://huggingface.co/Gen-Verse/MMaDA-8B-MixCoT",
461
  "open_source": true,
462
  "release_date": "2025-05",
463
-
464
- "Overall": 40.10,
465
  "Style": 75.83,
466
  "World Knowledge": 52.75,
467
-
468
- "Attribute-Overall": 49.90,
469
  "Quantity": 50.53,
470
  "Expression": 37.22,
471
  "Material": 47.52,
472
  "Size": 54.55,
473
  "Shape": 40.56,
474
  "Color": 57.81,
475
-
476
  "Action-Overall": 32.42,
477
  "Hand": 16.67,
478
  "Full body": 30.63,
@@ -480,28 +392,22 @@
480
  "Non Contact": 19.64,
481
  "Contact": 17.24,
482
  "State": 44.17,
483
-
484
  "Relationship-Overall": 39.06,
485
  "Composition": 39.16,
486
  "Similarity": 33.97,
487
  "Inclusion": 48.56,
488
  "Comparison": 34.71,
489
-
490
  "Compound-Overall": 38.37,
491
  "Imagination": 45.99,
492
- "Feature matching": 21.50,
493
-
494
- "Grammar-Overall": 50.00,
495
  "Pronoun Reference": 53.97,
496
  "Consistency": 39.29,
497
  "Negation": 55.99,
498
-
499
  "Layout-Overall": 43.02,
500
  "2D": 47.46,
501
  "3D": 37.32,
502
-
503
  "Logical Reasoning": 19.42,
504
-
505
  "Text": 0.27
506
  },
507
  {
@@ -510,48 +416,39 @@
510
  "hf": "https://huggingface.co/OmniGen2/OmniGen2",
511
  "open_source": true,
512
  "release_date": "2025-06",
513
-
514
  "Overall": 71.39,
515
  "Style": 94.35,
516
  "World Knowledge": 84.83,
517
-
518
  "Attribute-Overall": 83.03,
519
  "Quantity": 66.49,
520
  "Expression": 73.89,
521
  "Material": 81.78,
522
  "Size": 81.63,
523
- "Shape": 77.80,
524
  "Color": 90.93,
525
-
526
  "Action-Overall": 66.57,
527
  "Hand": 67.31,
528
  "Full body": 64.06,
529
  "Animal": 65.22,
530
  "Non Contact": 64.29,
531
- "Contact": 54.60,
532
  "State": 72.13,
533
-
534
  "Relationship-Overall": 73.06,
535
  "Composition": 67.73,
536
  "Similarity": 72.76,
537
- "Inclusion": 81.90,
538
  "Comparison": 75.97,
539
-
540
  "Compound-Overall": 70.49,
541
  "Imagination": 72.47,
542
  "Feature matching": 66.12,
543
-
544
- "Grammar-Overall": 76.40,
545
  "Pronoun Reference": 84.52,
546
  "Consistency": 75.79,
547
  "Negation": 69.72,
548
-
549
  "Layout-Overall": 80.63,
550
- "2D": 82.20,
551
  "3D": 78.62,
552
-
553
  "Logical Reasoning": 56.55,
554
-
555
  "Text": 27.99
556
  },
557
  {
@@ -560,11 +457,9 @@
560
  "hf": "https://huggingface.co/black-forest-labs/FLUX.1-Krea-dev",
561
  "open_source": true,
562
  "release_date": "2025-08",
563
-
564
  "Overall": 78.45,
565
- "Style": 94.10,
566
  "World Knowledge": 93.79,
567
-
568
  "Attribute-Overall": 89.55,
569
  "Quantity": 81.38,
570
  "Expression": 76.81,
@@ -572,36 +467,29 @@
572
  "Size": 88.64,
573
  "Shape": 85.31,
574
  "Color": 95.44,
575
-
576
  "Action-Overall": 76.28,
577
- "Hand": 75.00,
578
  "Full body": 76.25,
579
  "Animal": 72.46,
580
- "Non Contact": 69.20,
581
  "Contact": 72.99,
582
  "State": 80.43,
583
-
584
  "Relationship-Overall": 81.73,
585
  "Composition": 80.87,
586
  "Similarity": 73.08,
587
  "Inclusion": 88.22,
588
  "Comparison": 84.47,
589
-
590
  "Compound-Overall": 80.67,
591
  "Imagination": 80.59,
592
  "Feature matching": 80.84,
593
-
594
  "Grammar-Overall": 75.25,
595
  "Pronoun Reference": 91.27,
596
  "Consistency": 74.21,
597
  "Negation": 61.97,
598
-
599
  "Layout-Overall": 86.59,
600
  "2D": 85.45,
601
- "3D": 86.59,
602
-
603
  "Logical Reasoning": 65.53,
604
-
605
  "Text": 41.03
606
  },
607
  {
@@ -610,7 +498,6 @@
610
  "hf": "https://huggingface.co/FoundationVision/Infinity/tree/main/infinity_8b_512x512_weights",
611
  "open_source": true,
612
  "release_date": "2024-12",
613
-
614
  "Overall": 67.28,
615
  "Style": 92.77,
616
  "World Knowledge": 88.44,
@@ -621,36 +508,29 @@
621
  "Size": 82.95,
622
  "Shape": 71.15,
623
  "Color": 88.73,
624
-
625
  "Action-Overall": 63.28,
626
  "Hand": 58.65,
627
- "Full body": 60.13,
628
  "Animal": 67.75,
629
  "Non Contact": 58.48,
630
  "Contact": 52.87,
631
  "State": 69.07,
632
-
633
  "Relationship-Overall": 70.04,
634
- "Composition": 66.20,
635
  "Similarity": 67.63,
636
  "Inclusion": 78.45,
637
  "Comparison": 72.09,
638
-
639
  "Compound-Overall": 66.13,
640
  "Imagination": 68.57,
641
  "Feature matching": 60.75,
642
-
643
  "Grammar-Overall": 68.53,
644
  "Pronoun Reference": 76.59,
645
  "Consistency": 71.43,
646
- "Negation": 58.80,
647
-
648
  "Layout-Overall": 77.54,
649
  "2D": 80.93,
650
  "3D": 73.19,
651
-
652
  "Logical Reasoning": 51.46,
653
-
654
  "Text": 13.59
655
  },
656
  {
@@ -659,68 +539,57 @@
659
  "hf": "https://huggingface.co/showlab/show-o2-7B",
660
  "open_source": true,
661
  "release_date": "2025-06",
662
-
663
  "Overall": 70.33,
664
  "Style": 93.11,
665
  "World Knowledge": 88.44,
666
  "Attribute-Overall": 86.35,
667
  "Quantity": 59.04,
668
  "Expression": 71.53,
669
- "Material": 88.10,
670
  "Size": 87.31,
671
  "Shape": 81.12,
672
  "Color": 94.71,
673
-
674
  "Action-Overall": 69.02,
675
  "Hand": 53.85,
676
- "Full body": 80.00,
677
- "Animal": 69.20,
678
  "Non Contact": 60.27,
679
  "Contact": 55.75,
680
  "State": 76.68,
681
-
682
  "Relationship-Overall": 77.37,
683
  "Composition": 77.42,
684
  "Similarity": 68.59,
685
  "Inclusion": 80.17,
686
  "Comparison": 81.55,
687
-
688
  "Compound-Overall": 76.45,
689
  "Imagination": 77.64,
690
  "Feature matching": 73.83,
691
-
692
- "Grammar-Overall": 70.30,
693
- "Pronoun Reference": 87.30,
694
  "Consistency": 66.67,
695
  "Negation": 58.45,
696
-
697
  "Layout-Overall": 80.63,
698
  "2D": 80.08,
699
  "3D": 81.34,
700
-
701
  "Logical Reasoning": 59.71,
702
-
703
- "Text": 1.90
704
  },
705
- {
706
  "model": "OneCAT",
707
  "link": "https://arxiv.org/pdf/2509.03498",
708
  "hf": "https://huggingface.co/onecat-ai/OneCAT-3B",
709
  "open_source": true,
710
  "release_date": "2025-09",
711
-
712
  "Overall": 62.92,
713
  "Style": 94.93,
714
  "World Knowledge": 83.67,
715
-
716
- "Attribute-Overall": 74.90,
717
- "Quantity": 61.70,
718
  "Expression": 66.39,
719
  "Material": 78.09,
720
  "Size": 82.58,
721
  "Shape": 62.24,
722
  "Color": 78.88,
723
-
724
  "Action-Overall": 58.95,
725
  "Hand": 37.82,
726
  "Full body": 59.06,
@@ -728,29 +597,23 @@
728
  "Non Contact": 50.89,
729
  "Contact": 43.97,
730
  "State": 71.44,
731
-
732
  "Relationship-Overall": 65.36,
733
  "Composition": 67.47,
734
  "Similarity": 62.82,
735
  "Inclusion": 63.22,
736
  "Comparison": 65.05,
737
-
738
  "Compound-Overall": 63.59,
739
  "Imagination": 72.57,
740
  "Feature matching": 43.69,
741
-
742
  "Grammar-Overall": 63.58,
743
  "Pronoun Reference": 74.21,
744
  "Consistency": 67.46,
745
- "Negation": 50.70,
746
-
747
  "Layout-Overall": 74.29,
748
  "2D": 75.28,
749
  "3D": 73.01,
750
-
751
  "Logical Reasoning": 48.06,
752
-
753
- "Text": 1.90
754
  },
755
  {
756
  "model": "Bagel",
@@ -758,7 +621,6 @@
758
  "hf": "https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT",
759
  "open_source": true,
760
  "release_date": "2025-05",
761
-
762
  "Overall": 71.26,
763
  "Style": 92.44,
764
  "World Knowledge": 89.31,
@@ -769,7 +631,6 @@
769
  "Size": 86.17,
770
  "Shape": 76.92,
771
  "Color": 91.88,
772
-
773
  "Action-Overall": 67.62,
774
  "Hand": 68.59,
775
  "Full body": 67.19,
@@ -777,28 +638,22 @@
777
  "Non Contact": 58.48,
778
  "Contact": 59.77,
779
  "State": 71.94,
780
-
781
- "Relationship-Overall": 75.70,
782
  "Composition": 72.19,
783
  "Similarity": 72.12,
784
  "Inclusion": 85.92,
785
  "Comparison": 76.46,
786
-
787
  "Compound-Overall": 74.71,
788
  "Imagination": 77.32,
789
  "Feature matching": 68.93,
790
-
791
  "Grammar-Overall": 74.75,
792
- "Pronoun Reference": 87.30,
793
  "Consistency": 70.63,
794
  "Negation": 67.25,
795
-
796
- "Layout-Overall": 81.90,
797
  "2D": 83.47,
798
  "3D": 79.89,
799
-
800
  "Logical Reasoning": 59.71,
801
-
802
  "Text": 12.23
803
  },
804
  {
@@ -807,11 +662,9 @@
807
  "hf": "https://huggingface.co/X-Omni/X-Omni-En",
808
  "open_source": true,
809
  "release_date": "2025-08",
810
-
811
- "Overall": 67.00,
812
  "Style": 80.15,
813
  "World Knowledge": 82.37,
814
-
815
  "Attribute-Overall": 79.82,
816
  "Quantity": 66.49,
817
  "Expression": 70.83,
@@ -819,7 +672,6 @@
819
  "Size": 81.44,
820
  "Shape": 69.93,
821
  "Color": 86.01,
822
-
823
  "Action-Overall": 61.96,
824
  "Hand": 58.97,
825
  "Full body": 63.44,
@@ -827,28 +679,22 @@
827
  "Non Contact": 56.25,
828
  "Contact": 48.56,
829
  "State": 68.08,
830
-
831
  "Relationship-Overall": 64.28,
832
  "Composition": 59.69,
833
  "Similarity": 58.97,
834
  "Inclusion": 67.53,
835
  "Comparison": 74.27,
836
-
837
  "Compound-Overall": 64.17,
838
  "Imagination": 65.51,
839
  "Feature matching": 61.21,
840
-
841
  "Grammar-Overall": 68.78,
842
- "Pronoun Reference":82.14,
843
- "Consistency": 61.90,
844
  "Negation": 63.03,
845
-
846
  "Layout-Overall": 73.33,
847
  "2D": 78.25,
848
  "3D": 67.03,
849
-
850
- "Logical Reasoning": 51.70,
851
-
852
  "Text": 43.48
853
  },
854
  {
@@ -857,48 +703,39 @@
857
  "hf": "https://huggingface.co/HiDream-ai/HiDream-I1-Full",
858
  "open_source": true,
859
  "release_date": "2025-05",
860
-
861
  "Overall": 74.25,
862
  "Style": 93.11,
863
  "World Knowledge": 92.63,
864
-
865
  "Attribute-Overall": 83.49,
866
- "Quantity": 73.40,
867
  "Expression": 68.47,
868
  "Material": 83.51,
869
  "Size": 84.47,
870
- "Shape": 75.70,
871
  "Color": 92.19,
872
-
873
  "Action-Overall": 68.82,
874
  "Hand": 65.06,
875
  "Full body": 68.44,
876
  "Animal": 62.32,
877
  "Non Contact": 71.43,
878
  "Contact": 57.47,
879
- "State": 75.20,
880
-
881
- "Relationship-Overall": 74.30,
882
  "Composition": 72.07,
883
- "Similarity": 73.40,
884
  "Inclusion": 78.74,
885
  "Comparison": 75.49,
886
-
887
  "Compound-Overall": 69.77,
888
  "Imagination": 73.63,
889
  "Feature matching": 61.21,
890
-
891
  "Grammar-Overall": 72.59,
892
  "Pronoun Reference": 86.51,
893
  "Consistency": 69.84,
894
  "Negation": 62.68,
895
-
896
  "Layout-Overall": 79.92,
897
  "2D": 82.63,
898
  "3D": 76.45,
899
-
900
  "Logical Reasoning": 50.24,
901
-
902
  "Text": 57.61
903
  },
904
  {
@@ -907,7 +744,6 @@
907
  "hf": "https://huggingface.co/spaces/tencent/HunyuanImage-2.1",
908
  "open_source": true,
909
  "release_date": "2025-09",
910
-
911
  "Overall": 82.19,
912
  "Style": 94.52,
913
  "World Knowledge": 93.35,
@@ -917,46 +753,38 @@
917
  "Material": 93.75,
918
  "Size": 90.34,
919
  "Shape": 87.24,
920
- "Color": 97.90,
921
-
922
  "Action-Overall": 81.14,
923
  "Hand": 82.05,
924
  "Full body": 81.88,
925
  "Animal": 79.71,
926
  "Non Contact": 76.79,
927
- "Contact": 75.00,
928
  "State": 84.09,
929
-
930
  "Relationship-Overall": 85.13,
931
  "Composition": 83.93,
932
  "Similarity": 78.53,
933
  "Inclusion": 92.82,
934
  "Comparison": 85.92,
935
-
936
  "Compound-Overall": 82.49,
937
  "Imagination": 82.28,
938
  "Feature matching": 82.94,
939
-
940
  "Grammar-Overall": 77.41,
941
  "Pronoun Reference": 91.27,
942
  "Consistency": 75.79,
943
  "Negation": 66.55,
944
-
945
  "Layout-Overall": 88.65,
946
  "2D": 90.25,
947
  "3D": 86.59,
948
-
949
- "Logical Reasoning": 68.20,
950
-
951
  "Text": 58.15
952
- },
953
  {
954
- "model": "BLIP3-o",
955
  "link": "https://arxiv.org/pdf/2505.09568",
956
  "hf": "https://huggingface.co/BLIP3o/BLIP3o-NEXT-SFT-3B",
957
  "open_source": true,
958
  "release_date": "2025-08",
959
-
960
  "Overall": 61.01,
961
  "Style": 91.61,
962
  "World Knowledge": 74.42,
@@ -967,7 +795,6 @@
967
  "Size": 78.22,
968
  "Shape": 57.87,
969
  "Color": 78.88,
970
-
971
  "Action-Overall": 55.38,
972
  "Hand": 48.08,
973
  "Full body": 54.69,
@@ -975,100 +802,81 @@
975
  "Non Contact": 46.88,
976
  "Contact": 35.92,
977
  "State": 64.82,
978
-
979
  "Relationship-Overall": 62.61,
980
  "Composition": 60.97,
981
  "Similarity": 57.69,
982
  "Inclusion": 62.36,
983
  "Comparison": 69.66,
984
-
985
  "Compound-Overall": 65.55,
986
  "Imagination": 70.89,
987
  "Feature matching": 53.74,
988
-
989
  "Grammar-Overall": 65.36,
990
- "Pronoun Reference": 74.60,
991
- "Consistency": 62.30,
992
  "Negation": 59.86,
993
-
994
  "Layout-Overall": 74.21,
995
- "2D": 77.40,
996
  "3D": 70.11,
997
-
998
- "Logical Reasoning": 48.30,
999
-
1000
  "Text": 1.36
1001
  },
1002
- {
1003
  "model": "Janus-flow",
1004
  "link": "https://arxiv.org/pdf/2411.07975",
1005
  "hf": "https://huggingface.co/deepseek-ai/JanusFlow-1.3B",
1006
  "open_source": true,
1007
  "release_date": "2024-11",
1008
-
1009
- "Overall": 54.80,
1010
-
1011
- "Style": 88.70,
1012
-
1013
- "World Knowledge": 65.90,
1014
-
1015
- "Attribute-Overall": 63.60,
1016
  "Quantity": 42.55,
1017
  "Expression": 43.89,
1018
  "Material": 63.18,
1019
  "Size": 71.59,
1020
  "Shape": 45.98,
1021
  "Color": 76.47,
1022
-
1023
  "Action-Overall": 48.68,
1024
- "Hand": 26.60,
1025
  "Full body": 50.94,
1026
  "Animal": 53.26,
1027
  "Non Contact": 39.29,
1028
  "Contact": 35.92,
1029
  "State": 59.98,
1030
-
1031
  "Relationship-Overall": 58.24,
1032
  "Composition": 58.55,
1033
  "Similarity": 52.88,
1034
  "Inclusion": 60.34,
1035
  "Comparison": 59.95,
1036
-
1037
  "Compound-Overall": 55.16,
1038
  "Imagination": 62.34,
1039
  "Feature matching": 39.25,
1040
-
1041
  "Grammar-Overall": 63.83,
1042
  "Pronoun Reference": 71.03,
1043
- "Consistency": 50.00,
1044
  "Negation": 69.72,
1045
-
1046
  "Layout-Overall": 60.48,
1047
  "2D": 60.03,
1048
  "3D": 61.05,
1049
-
1050
  "Logical Reasoning": 41.75,
1051
-
1052
  "Text": 1.63
1053
- },
1054
- {
1055
  "model": "BLIP3-o-Next",
1056
  "link": "https://arxiv.org/pdf/2505.09568",
1057
  "hf": "https://huggingface.co/BLIP3o/BLIP3o-NEXT-SFT-3B",
1058
  "open_source": true,
1059
  "release_date": "2025-08",
1060
-
1061
  "Overall": 71.03,
1062
- "Style": 94.60,
1063
  "World Knowledge": 88.87,
1064
  "Attribute-Overall": 80.57,
1065
  "Quantity": 70.74,
1066
- "Expression": 80.00,
1067
  "Material": 81.93,
1068
  "Size": 86.36,
1069
  "Shape": 71.85,
1070
  "Color": 81.81,
1071
-
1072
  "Action-Overall": 70.18,
1073
  "Hand": 65.71,
1074
  "Full body": 68.44,
@@ -1076,28 +884,22 @@
1076
  "Non Contact": 60.71,
1077
  "Contact": 60.63,
1078
  "State": 76.58,
1079
-
1080
  "Relationship-Overall": 74.68,
1081
  "Composition": 72.32,
1082
  "Similarity": 70.19,
1083
  "Inclusion": 81.03,
1084
  "Comparison": 77.18,
1085
-
1086
  "Compound-Overall": 74.27,
1087
- "Imagination": 78.80,
1088
  "Feature matching": 64.25,
1089
-
1090
  "Grammar-Overall": 76.02,
1091
  "Pronoun Reference": 83.33,
1092
  "Consistency": 73.02,
1093
  "Negation": 72.18,
1094
-
1095
  "Layout-Overall": 80.71,
1096
- "2D": 82.20,
1097
- "3D": 78.80,
1098
-
1099
  "Logical Reasoning": 65.53,
1100
-
1101
  "Text": 4.89
1102
  },
1103
  {
@@ -1106,13 +908,9 @@
1106
  "hf": "https://huggingface.co/deepseek-ai/Janus-1.3B",
1107
  "open_source": true,
1108
  "release_date": "2024-10",
1109
-
1110
  "Overall": 60.37,
1111
-
1112
  "Style": 92.03,
1113
-
1114
  "World Knowledge": 73.27,
1115
-
1116
  "Attribute-Overall": 70.67,
1117
  "Quantity": 42.55,
1118
  "Expression": 48.61,
@@ -1120,7 +918,6 @@
1120
  "Size": 79.17,
1121
  "Shape": 57.69,
1122
  "Color": 82.86,
1123
-
1124
  "Action-Overall": 55.78,
1125
  "Hand": 39.42,
1126
  "Full body": 57.19,
@@ -1128,28 +925,22 @@
1128
  "Non Contact": 51.34,
1129
  "Contact": 40.23,
1130
  "State": 64.23,
1131
-
1132
  "Relationship-Overall": 63.25,
1133
  "Composition": 62.76,
1134
  "Similarity": 60.26,
1135
  "Inclusion": 67.82,
1136
  "Comparison": 62.62,
1137
-
1138
  "Compound-Overall": 61.85,
1139
  "Imagination": 69.73,
1140
  "Feature matching": 44.39,
1141
-
1142
  "Grammar-Overall": 67.26,
1143
  "Pronoun Reference": 74.21,
1144
  "Consistency": 59.52,
1145
  "Negation": 67.96,
1146
-
1147
  "Layout-Overall": 64.13,
1148
  "2D": 62.85,
1149
  "3D": 65.76,
1150
-
1151
  "Logical Reasoning": 54.37,
1152
-
1153
  "Text": 1.09
1154
  },
1155
  {
@@ -1158,13 +949,9 @@
1158
  "hf": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
1159
  "open_source": true,
1160
  "release_date": "2023-07",
1161
-
1162
  "Overall": 41.48,
1163
-
1164
  "Style": 81.81,
1165
-
1166
  "World Knowledge": 69.51,
1167
-
1168
  "Attribute-Overall": 54.31,
1169
  "Quantity": 39.36,
1170
  "Expression": 44.03,
@@ -1172,7 +959,6 @@
1172
  "Size": 58.14,
1173
  "Shape": 43.01,
1174
  "Color": 58.81,
1175
-
1176
  "Action-Overall": 31.18,
1177
  "Hand": 19.23,
1178
  "Full body": 29.69,
@@ -1180,28 +966,22 @@
1180
  "Non Contact": 17.41,
1181
  "Contact": 16.67,
1182
  "State": 43.87,
1183
-
1184
  "Relationship-Overall": 36.26,
1185
  "Composition": 41.07,
1186
  "Similarity": 27.88,
1187
  "Inclusion": 42.24,
1188
- "Comparison": 28.40,
1189
-
1190
- "Compound-Overall": 34.30,
1191
  "Imagination": 41.24,
1192
  "Feature matching": 18.93,
1193
-
1194
  "Grammar-Overall": 46.83,
1195
  "Pronoun Reference": 53.57,
1196
- "Consistency": 37.70,
1197
  "Negation": 48.94,
1198
-
1199
- "Layout-Overall": 40.40,
1200
  "2D": 39.12,
1201
  "3D": 42.03,
1202
-
1203
  "Logical Reasoning": 19.42,
1204
-
1205
  "Text": 0.82
1206
  },
1207
  {
@@ -1210,7 +990,6 @@
1210
  "hf": "https://huggingface.co/zai-org/CogView4-6B",
1211
  "open_source": true,
1212
  "release_date": "2024-03",
1213
-
1214
  "Overall": 67.68,
1215
  "Style": 88.29,
1216
  "World Knowledge": 89.45,
@@ -1219,9 +998,8 @@
1219
  "Expression": 66.53,
1220
  "Material": 79.74,
1221
  "Size": 83.14,
1222
- "Shape": 74.30,
1223
  "Color": 88.21,
1224
-
1225
  "Action-Overall": 64.33,
1226
  "Hand": 68.91,
1227
  "Full body": 60.31,
@@ -1229,28 +1007,22 @@
1229
  "Non Contact": 53.12,
1230
  "Contact": 56.32,
1231
  "State": 68.97,
1232
-
1233
  "Relationship-Overall": 66.97,
1234
  "Composition": 61.86,
1235
- "Similarity": 64.10,
1236
  "Inclusion": 76.44,
1237
  "Comparison": 70.87,
1238
-
1239
  "Compound-Overall": 66.86,
1240
  "Imagination": 68.99,
1241
  "Feature matching": 62.15,
1242
-
1243
- "Grammar-Overall": 71.70,
1244
  "Pronoun Reference": 86.51,
1245
  "Consistency": 67.46,
1246
  "Negation": 62.32,
1247
-
1248
  "Layout-Overall": 79.84,
1249
  "2D": 83.62,
1250
- "3D": 75.00,
1251
-
1252
  "Logical Reasoning": 49.76,
1253
-
1254
  "Text": 19.02
1255
  },
1256
  {
@@ -1259,11 +1031,9 @@
1259
  "hf": "https://huggingface.co/Kwai-Kolors/Kolors",
1260
  "open_source": true,
1261
  "release_date": "2024-7",
1262
-
1263
- "Overall": 53.60,
1264
  "Style": 86.54,
1265
  "World Knowledge": 76.01,
1266
-
1267
  "Attribute-Overall": 68.12,
1268
  "Quantity": 61.17,
1269
  "Expression": 50.42,
@@ -1271,36 +1041,29 @@
1271
  "Size": 71.97,
1272
  "Shape": 58.74,
1273
  "Color": 74.06,
1274
-
1275
  "Action-Overall": 49.96,
1276
  "Hand": 39.74,
1277
  "Full body": 38.44,
1278
  "Animal": 50.36,
1279
  "Non Contact": 44.64,
1280
- "Contact": 34.20,
1281
  "State": 63.24,
1282
-
1283
  "Relationship-Overall": 58.51,
1284
  "Composition": 58.04,
1285
  "Similarity": 58.01,
1286
  "Inclusion": 62.36,
1287
  "Comparison": 56.55,
1288
-
1289
  "Compound-Overall": 47.24,
1290
  "Imagination": 52.11,
1291
  "Feature matching": 36.45,
1292
-
1293
- "Grammar-Overall": 55.20,
1294
  "Pronoun Reference": 72.22,
1295
  "Consistency": 53.57,
1296
  "Negation": 41.55,
1297
-
1298
  "Layout-Overall": 60.95,
1299
  "2D": 61.02,
1300
  "3D": 60.87,
1301
-
1302
  "Logical Reasoning": 31.31,
1303
-
1304
  "Text": 2.17
1305
  },
1306
  {
@@ -1309,7 +1072,6 @@
1309
  "hf": "https://huggingface.co/deepseek-ai/Janus-Pro-7B",
1310
  "open_source": true,
1311
  "release_date": "2025-01",
1312
-
1313
  "Overall": 71.11,
1314
  "Style": 94.02,
1315
  "World Knowledge": 88.15,
@@ -1319,8 +1081,7 @@
1319
  "Material": 83.43,
1320
  "Size": 85.42,
1321
  "Shape": 75.87,
1322
- "Color": 89.20,
1323
-
1324
  "Action-Overall": 69.14,
1325
  "Hand": 57.69,
1326
  "Full body": 73.44,
@@ -1328,28 +1089,22 @@
1328
  "Non Contact": 62.95,
1329
  "Contact": 61.21,
1330
  "State": 73.52,
1331
-
1332
  "Relationship-Overall": 77.96,
1333
  "Composition": 77.42,
1334
  "Similarity": 71.15,
1335
  "Inclusion": 82.18,
1336
  "Comparison": 80.58,
1337
-
1338
  "Compound-Overall": 76.53,
1339
  "Imagination": 80.59,
1340
  "Feature matching": 67.52,
1341
-
1342
  "Grammar-Overall": 74.62,
1343
- "Pronoun Reference": 87.30,
1344
  "Consistency": 73.81,
1345
  "Negation": 64.08,
1346
-
1347
  "Layout-Overall": 82.14,
1348
  "2D": 81.78,
1349
  "3D": 82.61,
1350
-
1351
  "Logical Reasoning": 62.62,
1352
-
1353
  "Text": 4.08
1354
  },
1355
  {
@@ -1358,11 +1113,9 @@
1358
  "hf": "https://huggingface.co/stabilityai/stable-diffusion-3.5-large",
1359
  "open_source": true,
1360
  "release_date": "2024-10",
1361
-
1362
  "Overall": 64.35,
1363
  "Style": 88.12,
1364
  "World Knowledge": 88.15,
1365
-
1366
  "Attribute-Overall": 78.78,
1367
  "Quantity": 68.62,
1368
  "Expression": 62.22,
@@ -1370,7 +1123,6 @@
1370
  "Size": 78.79,
1371
  "Shape": 70.63,
1372
  "Color": 86.32,
1373
-
1374
  "Action-Overall": 59.63,
1375
  "Hand": 57.69,
1376
  "Full body": 52.81,
@@ -1378,28 +1130,22 @@
1378
  "Non Contact": 50.89,
1379
  "Contact": 48.85,
1380
  "State": 68.68,
1381
-
1382
  "Relationship-Overall": 67.62,
1383
  "Composition": 70.15,
1384
  "Similarity": 62.18,
1385
  "Inclusion": 70.11,
1386
  "Comparison": 64.81,
1387
-
1388
  "Compound-Overall": 62.21,
1389
  "Imagination": 65.82,
1390
  "Feature matching": 54.21,
1391
-
1392
  "Grammar-Overall": 65.23,
1393
  "Pronoun Reference": 75.79,
1394
  "Consistency": 61.51,
1395
  "Negation": 59.15,
1396
-
1397
  "Layout-Overall": 71.19,
1398
  "2D": 73.45,
1399
- "3D": 68.30,
1400
-
1401
- "Logical Reasoning": 44.90,
1402
-
1403
  "Text": 17.66
1404
  },
1405
  {
@@ -1408,65 +1154,50 @@
1408
  "hf": "-",
1409
  "open_source": false,
1410
  "release_date": "2025-09",
1411
-
1412
  "Overall": 89.77,
1413
-
1414
  "Style": 98.42,
1415
-
1416
  "World Knowledge": 95.95,
1417
-
1418
  "Attribute-Overall": 95.06,
1419
  "Quantity": 92.02,
1420
  "Expression": 89.31,
1421
  "Material": 95.26,
1422
- "Size": 94.70,
1423
  "Shape": 92.48,
1424
  "Color": 98.27,
1425
-
1426
  "Action-Overall": 86.76,
1427
  "Hand": 83.01,
1428
- "Full body": 87.50,
1429
  "Animal": 81.52,
1430
  "Non Contact": 88.39,
1431
  "Contact": 83.62,
1432
  "State": 89.82,
1433
-
1434
  "Relationship-Overall": 88.69,
1435
  "Composition": 87.37,
1436
  "Similarity": 80.77,
1437
  "Inclusion": 93.97,
1438
  "Comparison": 92.72,
1439
-
1440
  "Compound-Overall": 87.79,
1441
  "Imagination": 88.19,
1442
  "Feature matching": 86.92,
1443
-
1444
  "Grammar-Overall": 82.74,
1445
  "Pronoun Reference": 95.63,
1446
  "Consistency": 83.33,
1447
  "Negation": 70.77,
1448
-
1449
  "Layout-Overall": 92.38,
1450
  "2D": 92.94,
1451
  "3D": 91.67,
1452
-
1453
  "Logical Reasoning": 79.13,
1454
-
1455
  "Text": 90.76
1456
- },
1457
  {
1458
  "model": "DALL-E-3",
1459
  "link": "https://openai.com/zh-Hans-CN/index/dall-e-3/",
1460
  "hf": "-",
1461
  "open_source": false,
1462
  "release_date": "2023-09",
1463
-
1464
  "Overall": 70.82,
1465
-
1466
  "Style": 95.08,
1467
-
1468
  "World Knowledge": 92.71,
1469
-
1470
  "Attribute-Overall": 84.98,
1471
  "Quantity": 64.67,
1472
  "Expression": 72.59,
@@ -1474,7 +1205,6 @@
1474
  "Size": 89.48,
1475
  "Shape": 77.14,
1476
  "Color": 90.15,
1477
-
1478
  "Action-Overall": 68.36,
1479
  "Hand": 63.49,
1480
  "Full body": 63.96,
@@ -1482,43 +1212,33 @@
1482
  "Non Contact": 59.55,
1483
  "Contact": 60.17,
1484
  "State": 76.29,
1485
-
1486
- "Relationship-Overall": 77.90,
1487
  "Composition": 80.57,
1488
  "Similarity": 70.51,
1489
  "Inclusion": 83.53,
1490
  "Comparison": 73.76,
1491
-
1492
  "Compound-Overall": 73.88,
1493
  "Imagination": 77.67,
1494
- "Feature matching": 65.00,
1495
-
1496
  "Grammar-Overall": 68.19,
1497
  "Pronoun Reference": 82.92,
1498
  "Consistency": 66.27,
1499
  "Negation": 56.99,
1500
-
1501
  "Layout-Overall": 71.76,
1502
  "2D": 69.22,
1503
- "3D": 75.00,
1504
-
1505
  "Logical Reasoning": 57.11,
1506
-
1507
  "Text": 18.26
1508
  },
1509
- {
1510
  "model": "Runway-Gen4-Image",
1511
  "link": "https://docs.dev.runwayml.com/api/#tag/Start-generating/paths/~1v1~1text_to_image/post",
1512
  "hf": "-",
1513
  "open_source": false,
1514
  "release_date": "2024-11",
1515
-
1516
  "Overall": 68.29,
1517
-
1518
  "Style": 91.72,
1519
-
1520
  "World Knowledge": 88.82,
1521
-
1522
  "Attribute-Overall": 79.83,
1523
  "Quantity": 70.65,
1524
  "Expression": 65.43,
@@ -1526,59 +1246,47 @@
1526
  "Size": 81.01,
1527
  "Shape": 67.38,
1528
  "Color": 85.64,
1529
-
1530
- "Action-Overall": 64.30,
1531
  "Hand": 55.33,
1532
  "Full body": 63.92,
1533
  "Animal": 70.65,
1534
  "Non Contact": 56.82,
1535
- "Contact": 56.10,
1536
  "State": 69.76,
1537
-
1538
  "Relationship-Overall": 69.53,
1539
  "Composition": 70.05,
1540
  "Similarity": 59.09,
1541
  "Inclusion": 76.76,
1542
  "Comparison": 70.39,
1543
-
1544
  "Compound-Overall": 68.57,
1545
  "Imagination": 69.47,
1546
- "Feature matching": 66.50,
1547
-
1548
  "Grammar-Overall": 70.55,
1549
  "Pronoun Reference": 76.23,
1550
- "Consistency": 62.70,
1551
  "Negation": 72.76,
1552
-
1553
  "Layout-Overall": 73.79,
1554
  "2D": 72.56,
1555
  "3D": 75.37,
1556
-
1557
  "Logical Reasoning": 48.28,
1558
-
1559
  "Text": 27.47
1560
  },
1561
- {
1562
  "model": "Imagen-4.0-generate-preview-06-06",
1563
  "link": "https://deepmind.google/models/imagen/",
1564
  "hf": "-",
1565
  "open_source": false,
1566
  "release_date": "2025-01",
1567
-
1568
  "Overall": 85.34,
1569
-
1570
  "Style": 94.44,
1571
-
1572
  "World Knowledge": 97.11,
1573
-
1574
  "Attribute-Overall": 90.14,
1575
  "Quantity": 82.45,
1576
  "Expression": 77.64,
1577
  "Material": 90.96,
1578
  "Size": 92.23,
1579
  "Shape": 86.36,
1580
- "Color": 95.60,
1581
-
1582
  "Action-Overall": 82.62,
1583
  "Hand": 83.65,
1584
  "Full body": 82.81,
@@ -1586,43 +1294,33 @@
1586
  "Non Contact": 85.27,
1587
  "Contact": 78.74,
1588
  "State": 84.09,
1589
-
1590
  "Relationship-Overall": 86.42,
1591
  "Composition": 86.48,
1592
  "Similarity": 80.13,
1593
  "Inclusion": 91.38,
1594
  "Comparison": 86.89,
1595
-
1596
  "Compound-Overall": 86.56,
1597
  "Imagination": 86.81,
1598
  "Feature matching": 85.98,
1599
-
1600
  "Grammar-Overall": 81.35,
1601
  "Pronoun Reference": 94.05,
1602
  "Consistency": 80.56,
1603
  "Negation": 70.77,
1604
-
1605
  "Layout-Overall": 90.24,
1606
- "2D": 90.40,
1607
  "3D": 90.04,
1608
-
1609
  "Logical Reasoning": 72.82,
1610
-
1611
  "Text": 71.74
1612
  },
1613
- {
1614
  "model": "Imagen-3.0-generate-002",
1615
  "link": "https://arxiv.org/pdf/2408.07009",
1616
  "hf": "-",
1617
  "open_source": false,
1618
  "release_date": "2025-02",
1619
-
1620
  "Overall": 75.76,
1621
-
1622
  "Style": 92.41,
1623
-
1624
  "World Knowledge": 94.19,
1625
-
1626
  "Attribute-Overall": 86.32,
1627
  "Quantity": 75.58,
1628
  "Expression": 71.41,
@@ -1630,7 +1328,6 @@
1630
  "Size": 88.52,
1631
  "Shape": 78.27,
1632
  "Color": 93.13,
1633
-
1634
  "Action-Overall": 75.81,
1635
  "Hand": 73.63,
1636
  "Full body": 77.12,
@@ -1638,28 +1335,22 @@
1638
  "Non Contact": 69.44,
1639
  "Contact": 65.48,
1640
  "State": 80.62,
1641
-
1642
  "Relationship-Overall": 80.76,
1643
  "Composition": 80.15,
1644
  "Similarity": 74.17,
1645
  "Inclusion": 90.59,
1646
  "Comparison": 78.54,
1647
-
1648
- "Compound-Overall": 78.70,
1649
  "Imagination": 81.14,
1650
  "Feature matching": 73.22,
1651
-
1652
  "Grammar-Overall": 77.96,
1653
  "Pronoun Reference": 91.67,
1654
  "Consistency": 76.61,
1655
  "Negation": 66.67,
1656
-
1657
  "Layout-Overall": 86.06,
1658
  "2D": 83.97,
1659
  "3D": 88.69,
1660
-
1661
  "Logical Reasoning": 61.25,
1662
-
1663
  "Text": 24.18
1664
  },
1665
  {
@@ -1668,13 +1359,9 @@
1668
  "hf": "-",
1669
  "open_source": false,
1670
  "release_date": "2024-06",
1671
-
1672
  "Overall": 62.01,
1673
-
1674
  "Style": 85.63,
1675
-
1676
  "World Knowledge": 86.71,
1677
-
1678
  "Attribute-Overall": 74.73,
1679
  "Quantity": 66.49,
1680
  "Expression": 55.69,
@@ -1682,51 +1369,40 @@
1682
  "Size": 77.27,
1683
  "Shape": 67.48,
1684
  "Color": 83.02,
1685
-
1686
  "Action-Overall": 58.27,
1687
  "Hand": 58.33,
1688
  "Full body": 49.38,
1689
  "Animal": 59.42,
1690
  "Non Contact": 52.23,
1691
  "Contact": 45.98,
1692
- "State": 66.30,
1693
-
1694
  "Relationship-Overall": 63.63,
1695
  "Composition": 64.92,
1696
  "Similarity": 56.73,
1697
  "Inclusion": 67.53,
1698
  "Comparison": 63.11,
1699
-
1700
  "Compound-Overall": 58.28,
1701
  "Imagination": 62.66,
1702
- "Feature matching": 48.60,
1703
-
1704
- "Grammar-Overall": 65.10,
1705
  "Pronoun Reference": 76.19,
1706
  "Consistency": 61.11,
1707
- "Negation": 58.80,
1708
-
1709
  "Layout-Overall": 71.67,
1710
  "2D": 74.86,
1711
  "3D": 67.57,
1712
-
1713
  "Logical Reasoning": 40.29,
1714
-
1715
  "Text": 15.76
1716
  },
1717
- {
1718
  "model": "FLUX-pro-1.1-Ultra",
1719
  "link": "https://bfl.ai/",
1720
  "hf": "-",
1721
  "open_source": false,
1722
  "release_date": "2024-11",
1723
-
1724
- "Overall": 75.40,
1725
-
1726
  "Style": 91.36,
1727
-
1728
  "World Knowledge": 91.76,
1729
-
1730
  "Attribute-Overall": 84.97,
1731
  "Quantity": 79.26,
1732
  "Expression": 68.58,
@@ -1734,7 +1410,6 @@
1734
  "Size": 89.96,
1735
  "Shape": 80.59,
1736
  "Color": 93.01,
1737
-
1738
  "Action-Overall": 72.43,
1739
  "Hand": 67.31,
1740
  "Full body": 66.25,
@@ -1742,28 +1417,22 @@
1742
  "Non Contact": 66.96,
1743
  "Contact": 62.07,
1744
  "State": 80.53,
1745
-
1746
- "Relationship-Overall": 81.90,
1747
  "Composition": 81.89,
1748
  "Similarity": 74.04,
1749
  "Inclusion": 90.52,
1750
  "Comparison": 80.58,
1751
-
1752
  "Compound-Overall": 78.07,
1753
- "Imagination": 80.40,
1754
  "Feature matching": 72.88,
1755
-
1756
  "Grammar-Overall": 71.94,
1757
  "Pronoun Reference": 84.52,
1758
  "Consistency": 68.55,
1759
  "Negation": 63.73,
1760
-
1761
  "Layout-Overall": 82.62,
1762
  "2D": 81.78,
1763
- "3D": 83.70,
1764
-
1765
  "Logical Reasoning": 60.92,
1766
-
1767
  "Text": 38.04
1768
  },
1769
  {
@@ -1772,51 +1441,40 @@
1772
  "hf": "https://huggingface.co/Qwen/Qwen-Image",
1773
  "open_source": true,
1774
  "release_date": "2025-08",
1775
-
1776
  "Overall": 83.94,
1777
-
1778
  "Style": 96.93,
1779
-
1780
  "World Knowledge": 95.09,
1781
-
1782
  "Attribute-Overall": 93.65,
1783
  "Quantity": 92.02,
1784
  "Expression": 89.86,
1785
- "Material": 94.50,
1786
  "Size": 89.58,
1787
  "Shape": 86.71,
1788
  "Color": 97.85,
1789
-
1790
  "Action-Overall": 81.86,
1791
  "Hand": 78.53,
1792
  "Full body": 81.88,
1793
- "Animal": 83.70,
1794
  "Non Contact": 83.04,
1795
  "Contact": 71.84,
1796
  "State": 85.57,
1797
-
1798
  "Relationship-Overall": 83.41,
1799
  "Composition": 81.76,
1800
  "Similarity": 79.17,
1801
  "Inclusion": 88.79,
1802
  "Comparison": 85.19,
1803
-
1804
  "Compound-Overall": 81.98,
1805
  "Imagination": 82.38,
1806
  "Feature matching": 81.07,
1807
-
1808
  "Grammar-Overall": 73.86,
1809
  "Pronoun Reference": 90.48,
1810
  "Consistency": 78.57,
1811
  "Negation": 54.93,
1812
-
1813
  "Layout-Overall": 88.97,
1814
  "2D": 91.24,
1815
  "3D": 86.05,
1816
-
1817
  "Logical Reasoning": 66.75,
1818
-
1819
- "Text": 76.90
1820
  },
1821
  {
1822
  "model": "FLUX-kontext-pro",
@@ -1824,63 +1482,50 @@
1824
  "hf": "-",
1825
  "open_source": false,
1826
  "release_date": "2025-05",
1827
-
1828
  "Overall": 78.58,
1829
  "Style": 94.83,
1830
- "World Knowledge": 93.60,
1831
-
1832
  "Attribute-Overall": 86.24,
1833
  "Quantity": 74.47,
1834
- "Expression": 75.00,
1835
  "Material": 85.47,
1836
  "Size": 89.58,
1837
  "Shape": 80.63,
1838
  "Color": 92.89,
1839
-
1840
  "Action-Overall": 74.44,
1841
  "Hand": 73.05,
1842
  "Full body": 73.12,
1843
- "Animal": 75.00,
1844
  "Non Contact": 67.73,
1845
- "Contact": 70.40,
1846
  "State": 77.98,
1847
-
1848
- "Relationship-Overall": 78.40,
1849
  "Composition": 73.85,
1850
  "Similarity": 72.08,
1851
  "Inclusion": 89.08,
1852
  "Comparison": 82.77,
1853
-
1854
  "Compound-Overall": 79.75,
1855
  "Imagination": 83.58,
1856
  "Feature matching": 71.23,
1857
-
1858
  "Grammar-Overall": 77.05,
1859
  "Pronoun Reference": 90.32,
1860
- "Consistency": 75.40,
1861
- "Negation": 66.90,
1862
-
1863
  "Layout-Overall": 85.46,
1864
  "2D": 84.09,
1865
  "3D": 87.23,
1866
-
1867
  "Logical Reasoning": 66.26,
1868
-
1869
  "Text": 49.73
1870
- },
1871
  {
1872
  "model": "Hunyuan-DiT",
1873
  "link": "https://arxiv.org/pdf/2405.08748",
1874
  "hf": "https://huggingface.co/Tencent-Hunyuan/HunyuanDiT",
1875
  "open_source": true,
1876
  "release_date": "2024-05",
1877
-
1878
  "Overall": 54.88,
1879
-
1880
  "Style": 92.94,
1881
-
1882
  "World Knowledge": 80.06,
1883
-
1884
  "Attribute-Overall": 69.47,
1885
  "Quantity": 65.43,
1886
  "Expression": 52.22,
@@ -1888,36 +1533,29 @@
1888
  "Size": 75.19,
1889
  "Shape": 58.22,
1890
  "Color": 76.31,
1891
-
1892
- "Action-Overall": 48.80,
1893
- "Hand": 39.10,
1894
  "Full body": 46.25,
1895
  "Animal": 47.46,
1896
  "Non Contact": 41.07,
1897
  "Contact": 34.48,
1898
  "State": 59.58,
1899
-
1900
  "Relationship-Overall": 55.66,
1901
  "Composition": 56.89,
1902
  "Similarity": 55.45,
1903
  "Inclusion": 57.18,
1904
  "Comparison": 52.18,
1905
-
1906
  "Compound-Overall": 50.22,
1907
  "Imagination": 55.49,
1908
  "Feature matching": 38.55,
1909
-
1910
  "Grammar-Overall": 58.76,
1911
  "Pronoun Reference": 64.68,
1912
  "Consistency": 59.52,
1913
  "Negation": 52.82,
1914
-
1915
  "Layout-Overall": 61.43,
1916
  "2D": 60.45,
1917
  "3D": 62.68,
1918
-
1919
  "Logical Reasoning": 29.85,
1920
-
1921
  "Text": 1.63
1922
  },
1923
  {
@@ -1926,48 +1564,39 @@
1926
  "hf": "https://huggingface.co/black-forest-labs/FLUX.1-dev",
1927
  "open_source": true,
1928
  "release_date": "2024-08",
1929
-
1930
  "Overall": 69.42,
1931
  "Style": 89.29,
1932
  "World Knowledge": 89.45,
1933
-
1934
- "Attribute-Overall": 79.90,
1935
  "Quantity": 73.94,
1936
  "Expression": 64.44,
1937
  "Material": 80.05,
1938
  "Size": 84.47,
1939
- "Shape": 71.50,
1940
  "Color": 87.47,
1941
-
1942
- "Action-Overall": 64.54,
1943
  "Hand": 63.78,
1944
- "Full body": 62.50,
1945
  "Animal": 65.94,
1946
- "Non Contact": 56.70,
1947
  "Contact": 56.32,
1948
  "State": 69.57,
1949
-
1950
- "Relationship-Overall": 69.40,
1951
  "Composition": 65.05,
1952
  "Similarity": 66.03,
1953
- "Inclusion": 79.60,
1954
- "Comparison": 71.60,
1955
-
1956
  "Compound-Overall": 68.46,
1957
- "Imagination": 71.10,
1958
  "Feature matching": 62.62,
1959
-
1960
  "Grammar-Overall": 70.56,
1961
  "Pronoun Reference": 83.33,
1962
  "Consistency": 67.46,
1963
  "Negation": 61.97,
1964
-
1965
  "Layout-Overall": 77.54,
1966
  "2D": 81.21,
1967
  "3D": 72.83,
1968
-
1969
  "Logical Reasoning": 54.37,
1970
-
1971
  "Text": 30.71
1972
  },
1973
  {
@@ -1976,11 +1605,9 @@
1976
  "hf": "-",
1977
  "open_source": false,
1978
  "release_date": "2025-05",
1979
-
1980
  "Overall": 80.88,
1981
  "Style": 96.51,
1982
  "World Knowledge": 93.35,
1983
-
1984
  "Attribute-Overall": 87.45,
1985
  "Quantity": 79.79,
1986
  "Expression": 76.68,
@@ -1988,7 +1615,6 @@
1988
  "Size": 88.83,
1989
  "Shape": 81.51,
1990
  "Color": 93.74,
1991
-
1992
  "Action-Overall": 75.52,
1993
  "Hand": 73.08,
1994
  "Full body": 75.94,
@@ -1996,41 +1622,33 @@
1996
  "Non Contact": 66.82,
1997
  "Contact": 71.55,
1998
  "State": 79.76,
1999
-
2000
  "Relationship-Overall": 80.78,
2001
- "Composition": 77.30,
2002
  "Similarity": 73.05,
2003
  "Inclusion": 89.94,
2004
  "Comparison": 85.44,
2005
-
2006
  "Compound-Overall": 82.24,
2007
  "Imagination": 84.75,
2008
  "Feature matching": 76.65,
2009
-
2010
  "Grammar-Overall": 79.34,
2011
  "Pronoun Reference": 90.08,
2012
  "Consistency": 76.61,
2013
  "Negation": 72.18,
2014
-
2015
  "Layout-Overall": 87.58,
2016
  "2D": 85.73,
2017
  "3D": 89.96,
2018
-
2019
  "Logical Reasoning": 71.12,
2020
-
2021
  "Text": 54.89
2022
- },
2023
  {
2024
  "model": "Recraft",
2025
  "link": "https://www.recraft.ai/docs#generate-image",
2026
  "hf": "-",
2027
  "open_source": false,
2028
  "release_date": "2024-12",
2029
-
2030
  "Overall": 60.93,
2031
  "Style": 87.13,
2032
  "World Knowledge": 86.99,
2033
-
2034
  "Attribute-Overall": 73.23,
2035
  "Quantity": 56.38,
2036
  "Expression": 57.22,
@@ -2038,7 +1656,6 @@
2038
  "Size": 76.89,
2039
  "Shape": 63.64,
2040
  "Color": 83.07,
2041
-
2042
  "Action-Overall": 51.77,
2043
  "Hand": 40.06,
2044
  "Full body": 54.37,
@@ -2046,78 +1663,63 @@
2046
  "Non Contact": 45.09,
2047
  "Contact": 37.36,
2048
  "State": 60.08,
2049
-
2050
  "Relationship-Overall": 55.82,
2051
  "Composition": 51.79,
2052
  "Similarity": 46.47,
2053
  "Inclusion": 66.09,
2054
  "Comparison": 61.89,
2055
-
2056
  "Compound-Overall": 49.56,
2057
  "Imagination": 50.21,
2058
  "Feature matching": 48.13,
2059
-
2060
  "Grammar-Overall": 60.28,
2061
  "Pronoun Reference": 73.41,
2062
  "Consistency": 55.56,
2063
  "Negation": 52.82,
2064
-
2065
  "Layout-Overall": 63.81,
2066
  "2D": 65.96,
2067
  "3D": 61.05,
2068
-
2069
  "Logical Reasoning": 34.22,
2070
-
2071
  "Text": 46.47
2072
- },
2073
  {
2074
  "model": "wan2.2-t2i-plus",
2075
  "link": "https://help.aliyun.com/zh/model-studio/text-to-image-v2-api-reference",
2076
  "hf": "-",
2077
  "open_source": false,
2078
  "release_date": "2025-07",
2079
-
2080
  "Overall": 68.76,
2081
  "Style": 90.28,
2082
  "World Knowledge": 87.57,
2083
-
2084
  "Attribute-Overall": 81.08,
2085
  "Quantity": 78.19,
2086
  "Expression": 69.17,
2087
  "Material": 80.42,
2088
  "Size": 82.77,
2089
- "Shape": 73.60,
2090
- "Color": 88.10,
2091
-
2092
  "Action-Overall": 66.49,
2093
- "Hand": 64.10,
2094
  "Full body": 60.94,
2095
  "Animal": 70.29,
2096
  "Non Contact": 59.38,
2097
  "Contact": 55.46,
2098
  "State": 73.32,
2099
-
2100
  "Relationship-Overall": 72.79,
2101
  "Composition": 69.13,
2102
  "Similarity": 66.67,
2103
  "Inclusion": 81.03,
2104
  "Comparison": 77.43,
2105
-
2106
  "Compound-Overall": 71.73,
2107
  "Imagination": 74.16,
2108
  "Feature matching": 66.36,
2109
-
2110
  "Grammar-Overall": 70.18,
2111
- "Pronoun Reference": 86.90,
2112
  "Consistency": 61.11,
2113
  "Negation": 63.38,
2114
-
2115
  "Layout-Overall": 79.13,
2116
  "2D": 82.34,
2117
- "3D": 75.00,
2118
-
2119
  "Logical Reasoning": 55.58,
2120
-
2121
  "Text": 12.77
2122
  },
2123
  {
@@ -2126,21 +1728,16 @@
2126
  "hf": "-",
2127
  "open_source": false,
2128
  "release_date": "2025-08",
2129
-
2130
  "Overall": 88.82,
2131
-
2132
  "Style": 98.83,
2133
-
2134
  "World Knowledge": 95.78,
2135
-
2136
  "Attribute-Overall": 93.06,
2137
  "Quantity": 88.24,
2138
  "Expression": 86.09,
2139
  "Material": 93.05,
2140
- "Size": 93.70,
2141
  "Shape": 88.73,
2142
  "Color": 97.31,
2143
-
2144
  "Action-Overall": 83.93,
2145
  "Hand": 84.57,
2146
  "Full body": 84.95,
@@ -2148,28 +1745,22 @@
2148
  "Non Contact": 83.41,
2149
  "Contact": 78.16,
2150
  "State": 86.28,
2151
-
2152
  "Relationship-Overall": 91.59,
2153
  "Composition": 90.98,
2154
  "Similarity": 91.32,
2155
- "Inclusion": 92.80,
2156
  "Comparison": 91.91,
2157
-
2158
  "Compound-Overall": 90.63,
2159
  "Imagination": 92.15,
2160
  "Feature matching": 87.23,
2161
-
2162
  "Grammar-Overall": 89.33,
2163
  "Pronoun Reference": 94.84,
2164
  "Consistency": 89.24,
2165
  "Negation": 84.51,
2166
-
2167
  "Layout-Overall": 94.04,
2168
  "2D": 94.77,
2169
  "3D": 93.12,
2170
-
2171
  "Logical Reasoning": 81.27,
2172
-
2173
  "Text": 69.75
2174
  },
2175
  {
@@ -2178,21 +1769,16 @@
2178
  "hf": "-",
2179
  "open_source": false,
2180
  "release_date": "2025-03",
2181
-
2182
  "Overall": 92.63,
2183
-
2184
  "Style": 99.08,
2185
-
2186
  "World Knowledge": 97.95,
2187
-
2188
  "Attribute-Overall": 93.53,
2189
- "Quantity": 86.70,
2190
  "Expression": 93.44,
2191
  "Material": 92.45,
2192
  "Size": 94.89,
2193
  "Shape": 92.48,
2194
  "Color": 94.95,
2195
-
2196
  "Action-Overall": 87.78,
2197
  "Hand": 89.94,
2198
  "Full body": 87.19,
@@ -2200,28 +1786,22 @@
2200
  "Non Contact": 89.29,
2201
  "Contact": 83.05,
2202
  "State": 87.75,
2203
-
2204
  "Relationship-Overall": 91.13,
2205
  "Composition": 89.18,
2206
  "Similarity": 90.71,
2207
  "Inclusion": 96.84,
2208
  "Comparison": 90.29,
2209
-
2210
  "Compound-Overall": 93.99,
2211
  "Imagination": 94.39,
2212
- "Feature matching": 93.10,
2213
-
2214
  "Grammar-Overall": 94.46,
2215
  "Pronoun Reference": 95.97,
2216
  "Consistency": 91.67,
2217
  "Negation": 95.65,
2218
-
2219
  "Layout-Overall": 93.59,
2220
  "2D": 94.29,
2221
- "3D": 92.70,
2222
-
2223
  "Logical Reasoning": 91.02,
2224
-
2225
  "Text": 83.79
2226
  },
2227
  {
@@ -2230,65 +1810,50 @@
2230
  "hf": "-",
2231
  "open_source": false,
2232
  "release_date": "2025-06",
2233
-
2234
  "Overall": 80.99,
2235
-
2236
  "Style": 97.18,
2237
-
2238
  "World Knowledge": 93.79,
2239
-
2240
- "Attribute-Overall": 91.90,
2241
  "Quantity": 83.51,
2242
  "Expression": 81.25,
2243
  "Material": 93.07,
2244
  "Size": 88.26,
2245
  "Shape": 90.03,
2246
  "Color": 97.48,
2247
-
2248
  "Action-Overall": 79.94,
2249
  "Hand": 77.88,
2250
  "Full body": 84.69,
2251
  "Animal": 78.26,
2252
  "Non Contact": 74.11,
2253
  "Contact": 71.84,
2254
- "State": 83.60,
2255
-
2256
  "Relationship-Overall": 83.41,
2257
  "Composition": 81.63,
2258
  "Similarity": 79.17,
2259
  "Inclusion": 87.64,
2260
  "Comparison": 86.41,
2261
-
2262
  "Compound-Overall": 81.03,
2263
  "Imagination": 80.49,
2264
  "Feature matching": 82.24,
2265
-
2266
  "Grammar-Overall": 75.13,
2267
  "Pronoun Reference": 90.48,
2268
  "Consistency": 80.56,
2269
  "Negation": 56.69,
2270
-
2271
  "Layout-Overall": 88.41,
2272
  "2D": 87.85,
2273
  "3D": 89.13,
2274
-
2275
  "Logical Reasoning": 62.62,
2276
-
2277
  "Text": 56.52
2278
- },
2279
  {
2280
  "model": "Imagen-4.0-Fast-preview-06-06",
2281
  "link": "https://deepmind.google/models/imagen/",
2282
  "hf": "-",
2283
  "open_source": false,
2284
  "release_date": "2025-06",
2285
-
2286
  "Overall": 81.54,
2287
-
2288
  "Style": 93.77,
2289
-
2290
  "World Knowledge": 93.64,
2291
-
2292
  "Attribute-Overall": 90.33,
2293
  "Quantity": 78.72,
2294
  "Expression": 78.89,
@@ -2296,36 +1861,29 @@
2296
  "Size": 90.15,
2297
  "Shape": 86.89,
2298
  "Color": 96.33,
2299
-
2300
  "Action-Overall": 80.18,
2301
  "Hand": 82.05,
2302
  "Full body": 84.06,
2303
  "Animal": 81.88,
2304
- "Non Contact": 75.00,
2305
  "Contact": 74.71,
2306
  "State": 80.93,
2307
-
2308
  "Relationship-Overall": 84.05,
2309
  "Composition": 82.53,
2310
  "Similarity": 80.13,
2311
  "Inclusion": 92.82,
2312
  "Comparison": 82.52,
2313
-
2314
  "Compound-Overall": 84.01,
2315
  "Imagination": 86.18,
2316
  "Feature matching": 79.21,
2317
-
2318
  "Grammar-Overall": 79.57,
2319
  "Pronoun Reference": 91.27,
2320
  "Consistency": 81.35,
2321
  "Negation": 67.61,
2322
-
2323
  "Layout-Overall": 90.48,
2324
  "2D": 90.11,
2325
  "3D": 90.94,
2326
-
2327
  "Logical Reasoning": 67.72,
2328
-
2329
  "Text": 51.63
2330
  },
2331
  {
@@ -2334,51 +1892,40 @@
2334
  "hf": "-",
2335
  "open_source": false,
2336
  "release_date": "2025-06",
2337
-
2338
  "Overall": 90.95,
2339
-
2340
  "Style": 97.67,
2341
-
2342
  "World Knowledge": 98.26,
2343
-
2344
  "Attribute-Overall": 93.21,
2345
  "Quantity": 89.84,
2346
  "Expression": 83.17,
2347
- "Material": 94.20,
2348
  "Size": 94.69,
2349
  "Shape": 89.86,
2350
  "Color": 97.22,
2351
-
2352
  "Action-Overall": 86.91,
2353
- "Hand": 89.10,
2354
  "Full body": 86.56,
2355
  "Animal": 85.14,
2356
  "Non Contact": 86.61,
2357
  "Contact": 81.84,
2358
  "State": 88.63,
2359
-
2360
  "Relationship-Overall": 90.57,
2361
  "Composition": 90.05,
2362
  "Similarity": 84.62,
2363
  "Inclusion": 94.52,
2364
  "Comparison": 92.72,
2365
-
2366
  "Compound-Overall": 91.42,
2367
  "Imagination": 92.82,
2368
  "Feature matching": 88.32,
2369
-
2370
  "Grammar-Overall": 88.07,
2371
  "Pronoun Reference": 96.83,
2372
- "Consistency": 87.70,
2373
  "Negation": 80.63,
2374
-
2375
  "Layout-Overall": 93.49,
2376
  "2D": 92.64,
2377
  "3D": 94.57,
2378
-
2379
- "Logical Reasoning": 83.50,
2380
-
2381
  "Text": 86.41
2382
  }
2383
  ]
2384
- }
 
6
  "hf": "https://huggingface.co/black-forest-labs/FLUX.2-dev",
7
  "open_source": true,
8
  "release_date": "2025-11",
 
9
  "Overall": 90.31,
10
  "Style": 99.17,
11
  "World Knowledge": 96.39,
 
12
  "Attribute-Overall": 94.57,
13
  "Quantity": 82.98,
14
  "Expression": 88.47,
 
16
  "Size": 92.42,
17
  "Shape": 91.43,
18
  "Color": 98.69,
 
19
  "Action-Overall": 86.17,
20
  "Hand": 84.94,
21
  "Full body": 85.94,
22
  "Animal": 85.51,
23
  "Non Contact": 86.16,
24
  "Contact": 82.47,
25
+ "State": 88.1,
26
+ "Relationship-Overall": 91.7,
 
27
  "Composition": 91.33,
28
  "Similarity": 89.42,
29
  "Inclusion": 95.69,
30
  "Comparison": 90.78,
 
31
  "Compound-Overall": 90.16,
32
  "Imagination": 89.94,
33
  "Feature matching": 90.65,
 
34
  "Grammar-Overall": 84.52,
35
  "Pronoun Reference": 94.84,
36
  "Consistency": 82.94,
37
  "Negation": 76.76,
 
38
  "Layout-Overall": 92.22,
39
  "2D": 92.94,
40
+ "3D": 91.3,
41
+ "Logical Reasoning": 79.9,
 
 
42
  "Text": 88.32
43
  },
44
  {
 
47
  "hf": "-",
48
  "open_source": false,
49
  "release_date": "2025-11",
50
+ "Overall": 94.2,
 
 
51
  "Style": 99.58,
 
52
  "World Knowledge": 97.83,
 
53
  "Attribute-Overall": 95.94,
54
  "Quantity": 89.36,
55
  "Expression": 90.69,
56
  "Material": 97.52,
57
  "Size": 96.97,
58
  "Shape": 91.43,
59
+ "Color": 98.53,
 
60
  "Action-Overall": 89.19,
61
  "Hand": 86.22,
62
  "Full body": 89.69,
 
64
  "Non Contact": 89.29,
65
  "Contact": 89.37,
66
  "State": 89.38,
 
67
  "Relationship-Overall": 94.29,
68
  "Composition": 94.39,
69
  "Similarity": 91.99,
70
  "Inclusion": 98.28,
71
  "Comparison": 92.48,
72
+ "Compound-Overall": 94.1,
 
73
  "Imagination": 94.92,
74
  "Feature matching": 92.29,
 
75
  "Grammar-Overall": 93.15,
76
+ "Pronoun Reference": 99.6,
77
  "Consistency": 90.08,
78
  "Negation": 90.14,
 
79
  "Layout-Overall": 93.73,
80
  "2D": 93.79,
81
  "3D": 93.66,
 
82
  "Logical Reasoning": 87.75,
 
83
  "Text": 96.47
84
  },
85
  {
 
88
  "hf": "https://huggingface.co/Tongyi-MAI/Z-Image-Turbo",
89
  "open_source": true,
90
  "release_date": "2025-11",
91
+ "Overall": 80.72,
 
92
  "Style": 93.19,
93
  "World Knowledge": 93.93,
 
94
  "Attribute-Overall": 89.34,
95
  "Quantity": 82.98,
96
  "Expression": 76.11,
97
  "Material": 91.72,
98
+ "Size": 87.5,
99
  "Shape": 80.77,
100
  "Color": 96.38,
101
+ "Action-Overall": 74.2,
 
102
  "Hand": 75.64,
103
  "Full body": 74.06,
104
  "Animal": 71.01,
105
  "Non Contact": 71.43,
106
  "Contact": 66.38,
107
  "State": 77.98,
 
108
  "Relationship-Overall": 80.44,
109
  "Composition": 78.32,
110
  "Similarity": 73.08,
111
  "Inclusion": 87.93,
112
  "Comparison": 83.74,
 
113
  "Compound-Overall": 76.46,
114
  "Imagination": 77.75,
115
+ "Feature matching": 73.6,
 
116
  "Grammar-Overall": 76.65,
117
  "Pronoun Reference": 91.27,
118
  "Consistency": 69.84,
119
  "Negation": 69.72,
 
120
  "Layout-Overall": 86.67,
121
  "2D": 87.71,
122
  "3D": 85.33,
 
123
  "Logical Reasoning": 66.18,
 
124
  "Text": 70.11
125
  },
126
  {
 
129
  "hf": "-",
130
  "open_source": false,
131
  "release_date": "2025-09",
132
+ "Overall": 84.56,
133
+ "Style": 96.5,
134
+ "World Knowledge": 96.24,
135
+ "Attribute-Overall": 91.17,
 
 
136
  "Quantity": 85.64,
137
+ "Expression": 79.61,
138
+ "Material": 93.73,
139
+ "Size": 88.36,
140
+ "Shape": 87.68,
141
  "Color": 96.11,
142
+ "Action-Overall": 78.98,
143
+ "Hand": 78.21,
 
144
  "Full body": 82.91,
145
+ "Animal": 78.68,
146
+ "Non Contact": 74.07,
147
+ "Contact": 72.13,
148
+ "State": 81.5,
149
+ "Relationship-Overall": 87.01,
150
+ "Composition": 86.03,
151
+ "Similarity": 79.17,
152
+ "Inclusion": 94.77,
153
+ "Comparison": 88.35,
154
+ "Compound-Overall": 86.22,
155
+ "Imagination": 87.61,
156
+ "Feature matching": 83.18,
157
+ "Grammar-Overall": 77.68,
158
+ "Pronoun Reference": 93.25,
159
+ "Consistency": 75.81,
 
 
 
160
  "Negation": 65.49,
161
+ "Layout-Overall": 87.26,
162
+ "2D": 88.42,
 
163
  "3D": 85.77,
164
+ "Logical Reasoning": 72.28,
165
+ "Text": 72.28
 
 
166
  },
167
+ {
168
  "model": "Emu3",
169
  "link": "https://arxiv.org/pdf/2409.18869",
170
  "hf": "https://huggingface.co/BAAI/Emu3-Gen",
171
  "open_source": true,
172
  "release_date": "2024-09",
 
173
  "Overall": 50.95,
 
174
  "Style": 89.36,
 
175
  "World Knowledge": 76.16,
 
176
  "Attribute-Overall": 66.81,
177
  "Quantity": 44.68,
178
  "Expression": 48.47,
 
180
  "Size": 73.24,
181
  "Shape": 54.29,
182
  "Color": 76.61,
183
+ "Action-Overall": 43.8,
 
184
  "Hand": 28.85,
185
  "Full body": 46.25,
186
  "Animal": 43.48,
187
  "Non Contact": 30.49,
188
  "Contact": 25.57,
189
  "State": 56.92,
190
+ "Relationship-Overall": 51.7,
 
191
  "Composition": 53.77,
192
  "Similarity": 42.31,
193
  "Inclusion": 59.48,
194
+ "Comparison": 48.3,
195
+ "Compound-Overall": 46.0,
 
196
  "Imagination": 51.69,
197
  "Feature matching": 33.41,
 
198
  "Grammar-Overall": 50.25,
199
  "Pronoun Reference": 55.95,
200
  "Consistency": 42.46,
201
  "Negation": 52.11,
 
202
  "Layout-Overall": 56.67,
203
  "2D": 56.36,
204
  "3D": 57.07,
 
205
  "Logical Reasoning": 27.43,
 
206
  "Text": 1.36
207
  },
208
+ {
209
  "model": "UniWorld-V1",
210
  "link": "https://arxiv.org/pdf/2506.03147",
211
  "hf": "https://huggingface.co/LanguageBind/UniWorld-V1",
212
  "open_source": true,
213
  "release_date": "2025-06",
214
+ "Overall": 69.6,
 
215
  "Style": 93.19,
216
+ "World Knowledge": 84.1,
 
217
  "Attribute-Overall": 79.94,
218
  "Quantity": 66.49,
219
  "Expression": 72.64,
 
221
  "Size": 81.06,
222
  "Shape": 72.38,
223
  "Color": 87.95,
 
224
  "Action-Overall": 65.81,
225
  "Hand": 63.78,
226
  "Full body": 64.38,
 
228
  "Non Contact": 62.95,
229
  "Contact": 55.17,
230
  "State": 70.85,
 
231
  "Relationship-Overall": 68.91,
232
  "Composition": 66.96,
233
  "Similarity": 67.31,
234
  "Inclusion": 72.99,
235
  "Comparison": 70.39,
 
236
  "Compound-Overall": 71.37,
237
  "Imagination": 74.16,
238
  "Feature matching": 65.19,
 
239
  "Grammar-Overall": 75.13,
240
  "Pronoun Reference": 84.13,
241
  "Consistency": 69.44,
242
  "Negation": 72.18,
243
+ "Layout-Overall": 79.6,
 
244
  "2D": 83.33,
245
  "3D": 74.82,
 
246
  "Logical Reasoning": 57.04,
 
247
  "Text": 20.92
248
  },
249
+ {
250
  "model": "Echo-4o",
251
  "link": "https://arxiv.org/pdf/2508.09987",
252
  "hf": "https://huggingface.co/Yejy53/Echo-4o",
253
  "open_source": true,
254
  "release_date": "2025-8",
 
255
  "Overall": 76.41,
256
+ "Style": 96.1,
257
  "World Knowledge": 90.17,
 
258
  "Attribute-Overall": 90.24,
259
+ "Quantity": 73.4,
260
  "Expression": 82.08,
261
  "Material": 92.39,
262
+ "Size": 89.2,
263
  "Shape": 84.44,
264
  "Color": 95.49,
 
265
  "Action-Overall": 73.56,
266
  "Hand": 72.12,
267
  "Full body": 76.56,
 
269
  "Non Contact": 66.96,
270
  "Contact": 65.23,
271
  "State": 77.47,
 
272
  "Relationship-Overall": 82.81,
273
+ "Composition": 83.8,
274
  "Similarity": 78.21,
275
  "Inclusion": 84.77,
276
  "Comparison": 82.77,
 
277
  "Compound-Overall": 84.88,
278
  "Imagination": 85.44,
279
  "Feature matching": 83.64,
 
280
  "Grammar-Overall": 82.36,
281
  "Pronoun Reference": 86.11,
282
  "Consistency": 83.33,
283
  "Negation": 78.17,
 
284
  "Layout-Overall": 86.43,
285
+ "2D": 88.7,
286
  "3D": 83.51,
 
287
  "Logical Reasoning": 69.42,
 
288
  "Text": 8.15
289
+ },
290
+ {
291
  "model": "SD-3.5-Medium",
292
  "link": "https://stability.ai/news/introducing-stable-diffusion-3-5",
293
  "hf": "stabilityai/stable-diffusion-3.5-medium",
294
  "open_source": true,
295
  "release_date": "2024-10",
 
296
  "Overall": 64.67,
297
  "Style": 92.19,
298
  "World Knowledge": 86.56,
 
299
  "Attribute-Overall": 80.24,
300
+ "Quantity": 61.7,
301
  "Expression": 62.64,
302
  "Material": 83.73,
303
  "Size": 82.01,
304
+ "Shape": 73.6,
305
  "Color": 87.79,
 
306
  "Action-Overall": 58.59,
307
  "Hand": 58.01,
308
  "Full body": 56.56,
 
310
  "Non Contact": 42.86,
311
  "Contact": 46.55,
312
  "State": 68.18,
 
313
  "Relationship-Overall": 69.88,
314
  "Composition": 70.15,
315
  "Similarity": 62.82,
316
  "Inclusion": 75.86,
317
  "Comparison": 69.66,
 
318
  "Compound-Overall": 62.86,
319
  "Imagination": 65.61,
320
  "Feature matching": 56.78,
 
321
  "Grammar-Overall": 65.86,
322
  "Pronoun Reference": 79.37,
323
  "Consistency": 61.11,
324
+ "Negation": 58.1,
 
325
  "Layout-Overall": 73.25,
326
  "2D": 73.59,
327
  "3D": 72.83,
 
328
  "Logical Reasoning": 45.87,
 
329
  "Text": 11.41
330
  },
331
  {
 
334
  "hf": "https://huggingface.co/Alpha-VLLM/Lumina-DiMOO",
335
  "open_source": true,
336
  "release_date": "2025-09",
 
337
  "Overall": 71.81,
338
  "Style": 86.88,
339
  "World Knowledge": 88.58,
 
340
  "Attribute-Overall": 83.71,
341
  "Quantity": 74.47,
342
  "Expression": 76.11,
343
+ "Material": 80.8,
344
  "Size": 84.47,
345
  "Shape": 78.67,
346
  "Color": 90.83,
 
347
  "Action-Overall": 69.66,
348
  "Hand": 67.63,
349
  "Full body": 71.56,
 
351
  "Non Contact": 65.18,
352
  "Contact": 57.18,
353
  "State": 74.21,
 
354
  "Relationship-Overall": 73.33,
355
  "Composition": 69.77,
356
  "Similarity": 72.76,
357
  "Inclusion": 82.18,
358
  "Comparison": 73.06,
 
359
  "Compound-Overall": 74.93,
360
+ "Imagination": 77.0,
361
  "Feature matching": 70.33,
 
362
  "Grammar-Overall": 74.49,
363
  "Pronoun Reference": 89.68,
364
  "Consistency": 66.67,
365
  "Negation": 67.96,
 
366
  "Layout-Overall": 84.84,
367
  "2D": 90.11,
368
  "3D": 78.08,
 
369
  "Logical Reasoning": 58.01,
 
370
  "Text": 23.64
371
  },
372
  {
 
375
  "hf": "https://huggingface.co/Gen-Verse/MMaDA-8B-MixCoT",
376
  "open_source": true,
377
  "release_date": "2025-05",
378
+ "Overall": 40.1,
 
379
  "Style": 75.83,
380
  "World Knowledge": 52.75,
381
+ "Attribute-Overall": 49.9,
 
382
  "Quantity": 50.53,
383
  "Expression": 37.22,
384
  "Material": 47.52,
385
  "Size": 54.55,
386
  "Shape": 40.56,
387
  "Color": 57.81,
 
388
  "Action-Overall": 32.42,
389
  "Hand": 16.67,
390
  "Full body": 30.63,
 
392
  "Non Contact": 19.64,
393
  "Contact": 17.24,
394
  "State": 44.17,
 
395
  "Relationship-Overall": 39.06,
396
  "Composition": 39.16,
397
  "Similarity": 33.97,
398
  "Inclusion": 48.56,
399
  "Comparison": 34.71,
 
400
  "Compound-Overall": 38.37,
401
  "Imagination": 45.99,
402
+ "Feature matching": 21.5,
403
+ "Grammar-Overall": 50.0,
 
404
  "Pronoun Reference": 53.97,
405
  "Consistency": 39.29,
406
  "Negation": 55.99,
 
407
  "Layout-Overall": 43.02,
408
  "2D": 47.46,
409
  "3D": 37.32,
 
410
  "Logical Reasoning": 19.42,
 
411
  "Text": 0.27
412
  },
413
  {
 
416
  "hf": "https://huggingface.co/OmniGen2/OmniGen2",
417
  "open_source": true,
418
  "release_date": "2025-06",
 
419
  "Overall": 71.39,
420
  "Style": 94.35,
421
  "World Knowledge": 84.83,
 
422
  "Attribute-Overall": 83.03,
423
  "Quantity": 66.49,
424
  "Expression": 73.89,
425
  "Material": 81.78,
426
  "Size": 81.63,
427
+ "Shape": 77.8,
428
  "Color": 90.93,
 
429
  "Action-Overall": 66.57,
430
  "Hand": 67.31,
431
  "Full body": 64.06,
432
  "Animal": 65.22,
433
  "Non Contact": 64.29,
434
+ "Contact": 54.6,
435
  "State": 72.13,
 
436
  "Relationship-Overall": 73.06,
437
  "Composition": 67.73,
438
  "Similarity": 72.76,
439
+ "Inclusion": 81.9,
440
  "Comparison": 75.97,
 
441
  "Compound-Overall": 70.49,
442
  "Imagination": 72.47,
443
  "Feature matching": 66.12,
444
+ "Grammar-Overall": 76.4,
 
445
  "Pronoun Reference": 84.52,
446
  "Consistency": 75.79,
447
  "Negation": 69.72,
 
448
  "Layout-Overall": 80.63,
449
+ "2D": 82.2,
450
  "3D": 78.62,
 
451
  "Logical Reasoning": 56.55,
 
452
  "Text": 27.99
453
  },
454
  {
 
457
  "hf": "https://huggingface.co/black-forest-labs/FLUX.1-Krea-dev",
458
  "open_source": true,
459
  "release_date": "2025-08",
 
460
  "Overall": 78.45,
461
+ "Style": 94.1,
462
  "World Knowledge": 93.79,
 
463
  "Attribute-Overall": 89.55,
464
  "Quantity": 81.38,
465
  "Expression": 76.81,
 
467
  "Size": 88.64,
468
  "Shape": 85.31,
469
  "Color": 95.44,
 
470
  "Action-Overall": 76.28,
471
+ "Hand": 75.0,
472
  "Full body": 76.25,
473
  "Animal": 72.46,
474
+ "Non Contact": 69.2,
475
  "Contact": 72.99,
476
  "State": 80.43,
 
477
  "Relationship-Overall": 81.73,
478
  "Composition": 80.87,
479
  "Similarity": 73.08,
480
  "Inclusion": 88.22,
481
  "Comparison": 84.47,
 
482
  "Compound-Overall": 80.67,
483
  "Imagination": 80.59,
484
  "Feature matching": 80.84,
 
485
  "Grammar-Overall": 75.25,
486
  "Pronoun Reference": 91.27,
487
  "Consistency": 74.21,
488
  "Negation": 61.97,
 
489
  "Layout-Overall": 86.59,
490
  "2D": 85.45,
491
+ "3D": 88.04,
 
492
  "Logical Reasoning": 65.53,
 
493
  "Text": 41.03
494
  },
495
  {
 
498
  "hf": "https://huggingface.co/FoundationVision/Infinity/tree/main/infinity_8b_512x512_weights",
499
  "open_source": true,
500
  "release_date": "2024-12",
 
501
  "Overall": 67.28,
502
  "Style": 92.77,
503
  "World Knowledge": 88.44,
 
508
  "Size": 82.95,
509
  "Shape": 71.15,
510
  "Color": 88.73,
 
511
  "Action-Overall": 63.28,
512
  "Hand": 58.65,
513
+ "Full body": 60.31,
514
  "Animal": 67.75,
515
  "Non Contact": 58.48,
516
  "Contact": 52.87,
517
  "State": 69.07,
 
518
  "Relationship-Overall": 70.04,
519
+ "Composition": 66.2,
520
  "Similarity": 67.63,
521
  "Inclusion": 78.45,
522
  "Comparison": 72.09,
 
523
  "Compound-Overall": 66.13,
524
  "Imagination": 68.57,
525
  "Feature matching": 60.75,
 
526
  "Grammar-Overall": 68.53,
527
  "Pronoun Reference": 76.59,
528
  "Consistency": 71.43,
529
+ "Negation": 58.8,
 
530
  "Layout-Overall": 77.54,
531
  "2D": 80.93,
532
  "3D": 73.19,
 
533
  "Logical Reasoning": 51.46,
 
534
  "Text": 13.59
535
  },
536
  {
 
539
  "hf": "https://huggingface.co/showlab/show-o2-7B",
540
  "open_source": true,
541
  "release_date": "2025-06",
 
542
  "Overall": 70.33,
543
  "Style": 93.11,
544
  "World Knowledge": 88.44,
545
  "Attribute-Overall": 86.35,
546
  "Quantity": 59.04,
547
  "Expression": 71.53,
548
+ "Material": 88.1,
549
  "Size": 87.31,
550
  "Shape": 81.12,
551
  "Color": 94.71,
 
552
  "Action-Overall": 69.02,
553
  "Hand": 53.85,
554
+ "Full body": 80.0,
555
+ "Animal": 69.2,
556
  "Non Contact": 60.27,
557
  "Contact": 55.75,
558
  "State": 76.68,
 
559
  "Relationship-Overall": 77.37,
560
  "Composition": 77.42,
561
  "Similarity": 68.59,
562
  "Inclusion": 80.17,
563
  "Comparison": 81.55,
 
564
  "Compound-Overall": 76.45,
565
  "Imagination": 77.64,
566
  "Feature matching": 73.83,
567
+ "Grammar-Overall": 70.3,
568
+ "Pronoun Reference": 87.3,
 
569
  "Consistency": 66.67,
570
  "Negation": 58.45,
 
571
  "Layout-Overall": 80.63,
572
  "2D": 80.08,
573
  "3D": 81.34,
 
574
  "Logical Reasoning": 59.71,
575
+ "Text": 1.9
 
576
  },
577
+ {
578
  "model": "OneCAT",
579
  "link": "https://arxiv.org/pdf/2509.03498",
580
  "hf": "https://huggingface.co/onecat-ai/OneCAT-3B",
581
  "open_source": true,
582
  "release_date": "2025-09",
 
583
  "Overall": 62.92,
584
  "Style": 94.93,
585
  "World Knowledge": 83.67,
586
+ "Attribute-Overall": 74.9,
587
+ "Quantity": 61.7,
 
588
  "Expression": 66.39,
589
  "Material": 78.09,
590
  "Size": 82.58,
591
  "Shape": 62.24,
592
  "Color": 78.88,
 
593
  "Action-Overall": 58.95,
594
  "Hand": 37.82,
595
  "Full body": 59.06,
 
597
  "Non Contact": 50.89,
598
  "Contact": 43.97,
599
  "State": 71.44,
 
600
  "Relationship-Overall": 65.36,
601
  "Composition": 67.47,
602
  "Similarity": 62.82,
603
  "Inclusion": 63.22,
604
  "Comparison": 65.05,
 
605
  "Compound-Overall": 63.59,
606
  "Imagination": 72.57,
607
  "Feature matching": 43.69,
 
608
  "Grammar-Overall": 63.58,
609
  "Pronoun Reference": 74.21,
610
  "Consistency": 67.46,
611
+ "Negation": 50.7,
 
612
  "Layout-Overall": 74.29,
613
  "2D": 75.28,
614
  "3D": 73.01,
 
615
  "Logical Reasoning": 48.06,
616
+ "Text": 1.9
 
617
  },
618
  {
619
  "model": "Bagel",
 
621
  "hf": "https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT",
622
  "open_source": true,
623
  "release_date": "2025-05",
 
624
  "Overall": 71.26,
625
  "Style": 92.44,
626
  "World Knowledge": 89.31,
 
631
  "Size": 86.17,
632
  "Shape": 76.92,
633
  "Color": 91.88,
 
634
  "Action-Overall": 67.62,
635
  "Hand": 68.59,
636
  "Full body": 67.19,
 
638
  "Non Contact": 58.48,
639
  "Contact": 59.77,
640
  "State": 71.94,
641
+ "Relationship-Overall": 75.7,
 
642
  "Composition": 72.19,
643
  "Similarity": 72.12,
644
  "Inclusion": 85.92,
645
  "Comparison": 76.46,
 
646
  "Compound-Overall": 74.71,
647
  "Imagination": 77.32,
648
  "Feature matching": 68.93,
 
649
  "Grammar-Overall": 74.75,
650
+ "Pronoun Reference": 87.3,
651
  "Consistency": 70.63,
652
  "Negation": 67.25,
653
+ "Layout-Overall": 81.9,
 
654
  "2D": 83.47,
655
  "3D": 79.89,
 
656
  "Logical Reasoning": 59.71,
 
657
  "Text": 12.23
658
  },
659
  {
 
662
  "hf": "https://huggingface.co/X-Omni/X-Omni-En",
663
  "open_source": true,
664
  "release_date": "2025-08",
665
+ "Overall": 67.0,
 
666
  "Style": 80.15,
667
  "World Knowledge": 82.37,
 
668
  "Attribute-Overall": 79.82,
669
  "Quantity": 66.49,
670
  "Expression": 70.83,
 
672
  "Size": 81.44,
673
  "Shape": 69.93,
674
  "Color": 86.01,
 
675
  "Action-Overall": 61.96,
676
  "Hand": 58.97,
677
  "Full body": 63.44,
 
679
  "Non Contact": 56.25,
680
  "Contact": 48.56,
681
  "State": 68.08,
 
682
  "Relationship-Overall": 64.28,
683
  "Composition": 59.69,
684
  "Similarity": 58.97,
685
  "Inclusion": 67.53,
686
  "Comparison": 74.27,
 
687
  "Compound-Overall": 64.17,
688
  "Imagination": 65.51,
689
  "Feature matching": 61.21,
 
690
  "Grammar-Overall": 68.78,
691
+ "Pronoun Reference": 82.14,
692
+ "Consistency": 61.9,
693
  "Negation": 63.03,
 
694
  "Layout-Overall": 73.33,
695
  "2D": 78.25,
696
  "3D": 67.03,
697
+ "Logical Reasoning": 51.7,
 
 
698
  "Text": 43.48
699
  },
700
  {
 
703
  "hf": "https://huggingface.co/HiDream-ai/HiDream-I1-Full",
704
  "open_source": true,
705
  "release_date": "2025-05",
 
706
  "Overall": 74.25,
707
  "Style": 93.11,
708
  "World Knowledge": 92.63,
 
709
  "Attribute-Overall": 83.49,
710
+ "Quantity": 73.4,
711
  "Expression": 68.47,
712
  "Material": 83.51,
713
  "Size": 84.47,
714
+ "Shape": 75.7,
715
  "Color": 92.19,
 
716
  "Action-Overall": 68.82,
717
  "Hand": 65.06,
718
  "Full body": 68.44,
719
  "Animal": 62.32,
720
  "Non Contact": 71.43,
721
  "Contact": 57.47,
722
+ "State": 75.2,
723
+ "Relationship-Overall": 74.3,
 
724
  "Composition": 72.07,
725
+ "Similarity": 73.4,
726
  "Inclusion": 78.74,
727
  "Comparison": 75.49,
 
728
  "Compound-Overall": 69.77,
729
  "Imagination": 73.63,
730
  "Feature matching": 61.21,
 
731
  "Grammar-Overall": 72.59,
732
  "Pronoun Reference": 86.51,
733
  "Consistency": 69.84,
734
  "Negation": 62.68,
 
735
  "Layout-Overall": 79.92,
736
  "2D": 82.63,
737
  "3D": 76.45,
 
738
  "Logical Reasoning": 50.24,
 
739
  "Text": 57.61
740
  },
741
  {
 
744
  "hf": "https://huggingface.co/spaces/tencent/HunyuanImage-2.1",
745
  "open_source": true,
746
  "release_date": "2025-09",
 
747
  "Overall": 82.19,
748
  "Style": 94.52,
749
  "World Knowledge": 93.35,
 
753
  "Material": 93.75,
754
  "Size": 90.34,
755
  "Shape": 87.24,
756
+ "Color": 97.9,
 
757
  "Action-Overall": 81.14,
758
  "Hand": 82.05,
759
  "Full body": 81.88,
760
  "Animal": 79.71,
761
  "Non Contact": 76.79,
762
+ "Contact": 75.0,
763
  "State": 84.09,
 
764
  "Relationship-Overall": 85.13,
765
  "Composition": 83.93,
766
  "Similarity": 78.53,
767
  "Inclusion": 92.82,
768
  "Comparison": 85.92,
 
769
  "Compound-Overall": 82.49,
770
  "Imagination": 82.28,
771
  "Feature matching": 82.94,
 
772
  "Grammar-Overall": 77.41,
773
  "Pronoun Reference": 91.27,
774
  "Consistency": 75.79,
775
  "Negation": 66.55,
 
776
  "Layout-Overall": 88.65,
777
  "2D": 90.25,
778
  "3D": 86.59,
779
+ "Logical Reasoning": 68.2,
 
 
780
  "Text": 58.15
781
+ },
782
  {
783
+ "model": "BLIP3-o",
784
  "link": "https://arxiv.org/pdf/2505.09568",
785
  "hf": "https://huggingface.co/BLIP3o/BLIP3o-NEXT-SFT-3B",
786
  "open_source": true,
787
  "release_date": "2025-08",
 
788
  "Overall": 61.01,
789
  "Style": 91.61,
790
  "World Knowledge": 74.42,
 
795
  "Size": 78.22,
796
  "Shape": 57.87,
797
  "Color": 78.88,
 
798
  "Action-Overall": 55.38,
799
  "Hand": 48.08,
800
  "Full body": 54.69,
 
802
  "Non Contact": 46.88,
803
  "Contact": 35.92,
804
  "State": 64.82,
 
805
  "Relationship-Overall": 62.61,
806
  "Composition": 60.97,
807
  "Similarity": 57.69,
808
  "Inclusion": 62.36,
809
  "Comparison": 69.66,
 
810
  "Compound-Overall": 65.55,
811
  "Imagination": 70.89,
812
  "Feature matching": 53.74,
 
813
  "Grammar-Overall": 65.36,
814
+ "Pronoun Reference": 74.6,
815
+ "Consistency": 62.3,
816
  "Negation": 59.86,
 
817
  "Layout-Overall": 74.21,
818
+ "2D": 77.4,
819
  "3D": 70.11,
820
+ "Logical Reasoning": 48.3,
 
 
821
  "Text": 1.36
822
  },
823
+ {
824
  "model": "Janus-flow",
825
  "link": "https://arxiv.org/pdf/2411.07975",
826
  "hf": "https://huggingface.co/deepseek-ai/JanusFlow-1.3B",
827
  "open_source": true,
828
  "release_date": "2024-11",
829
+ "Overall": 54.8,
830
+ "Style": 88.7,
831
+ "World Knowledge": 65.9,
832
+ "Attribute-Overall": 63.6,
 
 
 
 
833
  "Quantity": 42.55,
834
  "Expression": 43.89,
835
  "Material": 63.18,
836
  "Size": 71.59,
837
  "Shape": 45.98,
838
  "Color": 76.47,
 
839
  "Action-Overall": 48.68,
840
+ "Hand": 26.6,
841
  "Full body": 50.94,
842
  "Animal": 53.26,
843
  "Non Contact": 39.29,
844
  "Contact": 35.92,
845
  "State": 59.98,
 
846
  "Relationship-Overall": 58.24,
847
  "Composition": 58.55,
848
  "Similarity": 52.88,
849
  "Inclusion": 60.34,
850
  "Comparison": 59.95,
 
851
  "Compound-Overall": 55.16,
852
  "Imagination": 62.34,
853
  "Feature matching": 39.25,
 
854
  "Grammar-Overall": 63.83,
855
  "Pronoun Reference": 71.03,
856
+ "Consistency": 50.0,
857
  "Negation": 69.72,
 
858
  "Layout-Overall": 60.48,
859
  "2D": 60.03,
860
  "3D": 61.05,
 
861
  "Logical Reasoning": 41.75,
 
862
  "Text": 1.63
863
+ },
864
+ {
865
  "model": "BLIP3-o-Next",
866
  "link": "https://arxiv.org/pdf/2505.09568",
867
  "hf": "https://huggingface.co/BLIP3o/BLIP3o-NEXT-SFT-3B",
868
  "open_source": true,
869
  "release_date": "2025-08",
 
870
  "Overall": 71.03,
871
+ "Style": 94.6,
872
  "World Knowledge": 88.87,
873
  "Attribute-Overall": 80.57,
874
  "Quantity": 70.74,
875
+ "Expression": 80.0,
876
  "Material": 81.93,
877
  "Size": 86.36,
878
  "Shape": 71.85,
879
  "Color": 81.81,
 
880
  "Action-Overall": 70.18,
881
  "Hand": 65.71,
882
  "Full body": 68.44,
 
884
  "Non Contact": 60.71,
885
  "Contact": 60.63,
886
  "State": 76.58,
 
887
  "Relationship-Overall": 74.68,
888
  "Composition": 72.32,
889
  "Similarity": 70.19,
890
  "Inclusion": 81.03,
891
  "Comparison": 77.18,
 
892
  "Compound-Overall": 74.27,
893
+ "Imagination": 78.8,
894
  "Feature matching": 64.25,
 
895
  "Grammar-Overall": 76.02,
896
  "Pronoun Reference": 83.33,
897
  "Consistency": 73.02,
898
  "Negation": 72.18,
 
899
  "Layout-Overall": 80.71,
900
+ "2D": 82.2,
901
+ "3D": 78.8,
 
902
  "Logical Reasoning": 65.53,
 
903
  "Text": 4.89
904
  },
905
  {
 
908
  "hf": "https://huggingface.co/deepseek-ai/Janus-1.3B",
909
  "open_source": true,
910
  "release_date": "2024-10",
 
911
  "Overall": 60.37,
 
912
  "Style": 92.03,
 
913
  "World Knowledge": 73.27,
 
914
  "Attribute-Overall": 70.67,
915
  "Quantity": 42.55,
916
  "Expression": 48.61,
 
918
  "Size": 79.17,
919
  "Shape": 57.69,
920
  "Color": 82.86,
 
921
  "Action-Overall": 55.78,
922
  "Hand": 39.42,
923
  "Full body": 57.19,
 
925
  "Non Contact": 51.34,
926
  "Contact": 40.23,
927
  "State": 64.23,
 
928
  "Relationship-Overall": 63.25,
929
  "Composition": 62.76,
930
  "Similarity": 60.26,
931
  "Inclusion": 67.82,
932
  "Comparison": 62.62,
 
933
  "Compound-Overall": 61.85,
934
  "Imagination": 69.73,
935
  "Feature matching": 44.39,
 
936
  "Grammar-Overall": 67.26,
937
  "Pronoun Reference": 74.21,
938
  "Consistency": 59.52,
939
  "Negation": 67.96,
 
940
  "Layout-Overall": 64.13,
941
  "2D": 62.85,
942
  "3D": 65.76,
 
943
  "Logical Reasoning": 54.37,
 
944
  "Text": 1.09
945
  },
946
  {
 
949
  "hf": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
950
  "open_source": true,
951
  "release_date": "2023-07",
 
952
  "Overall": 41.48,
 
953
  "Style": 81.81,
 
954
  "World Knowledge": 69.51,
 
955
  "Attribute-Overall": 54.31,
956
  "Quantity": 39.36,
957
  "Expression": 44.03,
 
959
  "Size": 58.14,
960
  "Shape": 43.01,
961
  "Color": 58.81,
 
962
  "Action-Overall": 31.18,
963
  "Hand": 19.23,
964
  "Full body": 29.69,
 
966
  "Non Contact": 17.41,
967
  "Contact": 16.67,
968
  "State": 43.87,
 
969
  "Relationship-Overall": 36.26,
970
  "Composition": 41.07,
971
  "Similarity": 27.88,
972
  "Inclusion": 42.24,
973
+ "Comparison": 28.4,
974
+ "Compound-Overall": 34.3,
 
975
  "Imagination": 41.24,
976
  "Feature matching": 18.93,
 
977
  "Grammar-Overall": 46.83,
978
  "Pronoun Reference": 53.57,
979
+ "Consistency": 37.7,
980
  "Negation": 48.94,
981
+ "Layout-Overall": 40.4,
 
982
  "2D": 39.12,
983
  "3D": 42.03,
 
984
  "Logical Reasoning": 19.42,
 
985
  "Text": 0.82
986
  },
987
  {
 
990
  "hf": "https://huggingface.co/zai-org/CogView4-6B",
991
  "open_source": true,
992
  "release_date": "2024-03",
 
993
  "Overall": 67.68,
994
  "Style": 88.29,
995
  "World Knowledge": 89.45,
 
998
  "Expression": 66.53,
999
  "Material": 79.74,
1000
  "Size": 83.14,
1001
+ "Shape": 74.3,
1002
  "Color": 88.21,
 
1003
  "Action-Overall": 64.33,
1004
  "Hand": 68.91,
1005
  "Full body": 60.31,
 
1007
  "Non Contact": 53.12,
1008
  "Contact": 56.32,
1009
  "State": 68.97,
 
1010
  "Relationship-Overall": 66.97,
1011
  "Composition": 61.86,
1012
+ "Similarity": 64.1,
1013
  "Inclusion": 76.44,
1014
  "Comparison": 70.87,
 
1015
  "Compound-Overall": 66.86,
1016
  "Imagination": 68.99,
1017
  "Feature matching": 62.15,
1018
+ "Grammar-Overall": 71.7,
 
1019
  "Pronoun Reference": 86.51,
1020
  "Consistency": 67.46,
1021
  "Negation": 62.32,
 
1022
  "Layout-Overall": 79.84,
1023
  "2D": 83.62,
1024
+ "3D": 75.0,
 
1025
  "Logical Reasoning": 49.76,
 
1026
  "Text": 19.02
1027
  },
1028
  {
 
1031
  "hf": "https://huggingface.co/Kwai-Kolors/Kolors",
1032
  "open_source": true,
1033
  "release_date": "2024-7",
1034
+ "Overall": 53.6,
 
1035
  "Style": 86.54,
1036
  "World Knowledge": 76.01,
 
1037
  "Attribute-Overall": 68.12,
1038
  "Quantity": 61.17,
1039
  "Expression": 50.42,
 
1041
  "Size": 71.97,
1042
  "Shape": 58.74,
1043
  "Color": 74.06,
 
1044
  "Action-Overall": 49.96,
1045
  "Hand": 39.74,
1046
  "Full body": 38.44,
1047
  "Animal": 50.36,
1048
  "Non Contact": 44.64,
1049
+ "Contact": 34.2,
1050
  "State": 63.24,
 
1051
  "Relationship-Overall": 58.51,
1052
  "Composition": 58.04,
1053
  "Similarity": 58.01,
1054
  "Inclusion": 62.36,
1055
  "Comparison": 56.55,
 
1056
  "Compound-Overall": 47.24,
1057
  "Imagination": 52.11,
1058
  "Feature matching": 36.45,
1059
+ "Grammar-Overall": 55.2,
 
1060
  "Pronoun Reference": 72.22,
1061
  "Consistency": 53.57,
1062
  "Negation": 41.55,
 
1063
  "Layout-Overall": 60.95,
1064
  "2D": 61.02,
1065
  "3D": 60.87,
 
1066
  "Logical Reasoning": 31.31,
 
1067
  "Text": 2.17
1068
  },
1069
  {
 
1072
  "hf": "https://huggingface.co/deepseek-ai/Janus-Pro-7B",
1073
  "open_source": true,
1074
  "release_date": "2025-01",
 
1075
  "Overall": 71.11,
1076
  "Style": 94.02,
1077
  "World Knowledge": 88.15,
 
1081
  "Material": 83.43,
1082
  "Size": 85.42,
1083
  "Shape": 75.87,
1084
+ "Color": 89.2,
 
1085
  "Action-Overall": 69.14,
1086
  "Hand": 57.69,
1087
  "Full body": 73.44,
 
1089
  "Non Contact": 62.95,
1090
  "Contact": 61.21,
1091
  "State": 73.52,
 
1092
  "Relationship-Overall": 77.96,
1093
  "Composition": 77.42,
1094
  "Similarity": 71.15,
1095
  "Inclusion": 82.18,
1096
  "Comparison": 80.58,
 
1097
  "Compound-Overall": 76.53,
1098
  "Imagination": 80.59,
1099
  "Feature matching": 67.52,
 
1100
  "Grammar-Overall": 74.62,
1101
+ "Pronoun Reference": 87.3,
1102
  "Consistency": 73.81,
1103
  "Negation": 64.08,
 
1104
  "Layout-Overall": 82.14,
1105
  "2D": 81.78,
1106
  "3D": 82.61,
 
1107
  "Logical Reasoning": 62.62,
 
1108
  "Text": 4.08
1109
  },
1110
  {
 
1113
  "hf": "https://huggingface.co/stabilityai/stable-diffusion-3.5-large",
1114
  "open_source": true,
1115
  "release_date": "2024-10",
 
1116
  "Overall": 64.35,
1117
  "Style": 88.12,
1118
  "World Knowledge": 88.15,
 
1119
  "Attribute-Overall": 78.78,
1120
  "Quantity": 68.62,
1121
  "Expression": 62.22,
 
1123
  "Size": 78.79,
1124
  "Shape": 70.63,
1125
  "Color": 86.32,
 
1126
  "Action-Overall": 59.63,
1127
  "Hand": 57.69,
1128
  "Full body": 52.81,
 
1130
  "Non Contact": 50.89,
1131
  "Contact": 48.85,
1132
  "State": 68.68,
 
1133
  "Relationship-Overall": 67.62,
1134
  "Composition": 70.15,
1135
  "Similarity": 62.18,
1136
  "Inclusion": 70.11,
1137
  "Comparison": 64.81,
 
1138
  "Compound-Overall": 62.21,
1139
  "Imagination": 65.82,
1140
  "Feature matching": 54.21,
 
1141
  "Grammar-Overall": 65.23,
1142
  "Pronoun Reference": 75.79,
1143
  "Consistency": 61.51,
1144
  "Negation": 59.15,
 
1145
  "Layout-Overall": 71.19,
1146
  "2D": 73.45,
1147
+ "3D": 68.3,
1148
+ "Logical Reasoning": 44.9,
 
 
1149
  "Text": 17.66
1150
  },
1151
  {
 
1154
  "hf": "-",
1155
  "open_source": false,
1156
  "release_date": "2025-09",
 
1157
  "Overall": 89.77,
 
1158
  "Style": 98.42,
 
1159
  "World Knowledge": 95.95,
 
1160
  "Attribute-Overall": 95.06,
1161
  "Quantity": 92.02,
1162
  "Expression": 89.31,
1163
  "Material": 95.26,
1164
+ "Size": 94.7,
1165
  "Shape": 92.48,
1166
  "Color": 98.27,
 
1167
  "Action-Overall": 86.76,
1168
  "Hand": 83.01,
1169
+ "Full body": 87.5,
1170
  "Animal": 81.52,
1171
  "Non Contact": 88.39,
1172
  "Contact": 83.62,
1173
  "State": 89.82,
 
1174
  "Relationship-Overall": 88.69,
1175
  "Composition": 87.37,
1176
  "Similarity": 80.77,
1177
  "Inclusion": 93.97,
1178
  "Comparison": 92.72,
 
1179
  "Compound-Overall": 87.79,
1180
  "Imagination": 88.19,
1181
  "Feature matching": 86.92,
 
1182
  "Grammar-Overall": 82.74,
1183
  "Pronoun Reference": 95.63,
1184
  "Consistency": 83.33,
1185
  "Negation": 70.77,
 
1186
  "Layout-Overall": 92.38,
1187
  "2D": 92.94,
1188
  "3D": 91.67,
 
1189
  "Logical Reasoning": 79.13,
 
1190
  "Text": 90.76
1191
+ },
1192
  {
1193
  "model": "DALL-E-3",
1194
  "link": "https://openai.com/zh-Hans-CN/index/dall-e-3/",
1195
  "hf": "-",
1196
  "open_source": false,
1197
  "release_date": "2023-09",
 
1198
  "Overall": 70.82,
 
1199
  "Style": 95.08,
 
1200
  "World Knowledge": 92.71,
 
1201
  "Attribute-Overall": 84.98,
1202
  "Quantity": 64.67,
1203
  "Expression": 72.59,
 
1205
  "Size": 89.48,
1206
  "Shape": 77.14,
1207
  "Color": 90.15,
 
1208
  "Action-Overall": 68.36,
1209
  "Hand": 63.49,
1210
  "Full body": 63.96,
 
1212
  "Non Contact": 59.55,
1213
  "Contact": 60.17,
1214
  "State": 76.29,
1215
+ "Relationship-Overall": 77.9,
 
1216
  "Composition": 80.57,
1217
  "Similarity": 70.51,
1218
  "Inclusion": 83.53,
1219
  "Comparison": 73.76,
 
1220
  "Compound-Overall": 73.88,
1221
  "Imagination": 77.67,
1222
+ "Feature matching": 65.0,
 
1223
  "Grammar-Overall": 68.19,
1224
  "Pronoun Reference": 82.92,
1225
  "Consistency": 66.27,
1226
  "Negation": 56.99,
 
1227
  "Layout-Overall": 71.76,
1228
  "2D": 69.22,
1229
+ "3D": 75.0,
 
1230
  "Logical Reasoning": 57.11,
 
1231
  "Text": 18.26
1232
  },
1233
+ {
1234
  "model": "Runway-Gen4-Image",
1235
  "link": "https://docs.dev.runwayml.com/api/#tag/Start-generating/paths/~1v1~1text_to_image/post",
1236
  "hf": "-",
1237
  "open_source": false,
1238
  "release_date": "2024-11",
 
1239
  "Overall": 68.29,
 
1240
  "Style": 91.72,
 
1241
  "World Knowledge": 88.82,
 
1242
  "Attribute-Overall": 79.83,
1243
  "Quantity": 70.65,
1244
  "Expression": 65.43,
 
1246
  "Size": 81.01,
1247
  "Shape": 67.38,
1248
  "Color": 85.64,
1249
+ "Action-Overall": 64.3,
 
1250
  "Hand": 55.33,
1251
  "Full body": 63.92,
1252
  "Animal": 70.65,
1253
  "Non Contact": 56.82,
1254
+ "Contact": 56.1,
1255
  "State": 69.76,
 
1256
  "Relationship-Overall": 69.53,
1257
  "Composition": 70.05,
1258
  "Similarity": 59.09,
1259
  "Inclusion": 76.76,
1260
  "Comparison": 70.39,
 
1261
  "Compound-Overall": 68.57,
1262
  "Imagination": 69.47,
1263
+ "Feature matching": 66.5,
 
1264
  "Grammar-Overall": 70.55,
1265
  "Pronoun Reference": 76.23,
1266
+ "Consistency": 62.7,
1267
  "Negation": 72.76,
 
1268
  "Layout-Overall": 73.79,
1269
  "2D": 72.56,
1270
  "3D": 75.37,
 
1271
  "Logical Reasoning": 48.28,
 
1272
  "Text": 27.47
1273
  },
1274
+ {
1275
  "model": "Imagen-4.0-generate-preview-06-06",
1276
  "link": "https://deepmind.google/models/imagen/",
1277
  "hf": "-",
1278
  "open_source": false,
1279
  "release_date": "2025-01",
 
1280
  "Overall": 85.34,
 
1281
  "Style": 94.44,
 
1282
  "World Knowledge": 97.11,
 
1283
  "Attribute-Overall": 90.14,
1284
  "Quantity": 82.45,
1285
  "Expression": 77.64,
1286
  "Material": 90.96,
1287
  "Size": 92.23,
1288
  "Shape": 86.36,
1289
+ "Color": 95.6,
 
1290
  "Action-Overall": 82.62,
1291
  "Hand": 83.65,
1292
  "Full body": 82.81,
 
1294
  "Non Contact": 85.27,
1295
  "Contact": 78.74,
1296
  "State": 84.09,
 
1297
  "Relationship-Overall": 86.42,
1298
  "Composition": 86.48,
1299
  "Similarity": 80.13,
1300
  "Inclusion": 91.38,
1301
  "Comparison": 86.89,
 
1302
  "Compound-Overall": 86.56,
1303
  "Imagination": 86.81,
1304
  "Feature matching": 85.98,
 
1305
  "Grammar-Overall": 81.35,
1306
  "Pronoun Reference": 94.05,
1307
  "Consistency": 80.56,
1308
  "Negation": 70.77,
 
1309
  "Layout-Overall": 90.24,
1310
+ "2D": 90.4,
1311
  "3D": 90.04,
 
1312
  "Logical Reasoning": 72.82,
 
1313
  "Text": 71.74
1314
  },
1315
+ {
1316
  "model": "Imagen-3.0-generate-002",
1317
  "link": "https://arxiv.org/pdf/2408.07009",
1318
  "hf": "-",
1319
  "open_source": false,
1320
  "release_date": "2025-02",
 
1321
  "Overall": 75.76,
 
1322
  "Style": 92.41,
 
1323
  "World Knowledge": 94.19,
 
1324
  "Attribute-Overall": 86.32,
1325
  "Quantity": 75.58,
1326
  "Expression": 71.41,
 
1328
  "Size": 88.52,
1329
  "Shape": 78.27,
1330
  "Color": 93.13,
 
1331
  "Action-Overall": 75.81,
1332
  "Hand": 73.63,
1333
  "Full body": 77.12,
 
1335
  "Non Contact": 69.44,
1336
  "Contact": 65.48,
1337
  "State": 80.62,
 
1338
  "Relationship-Overall": 80.76,
1339
  "Composition": 80.15,
1340
  "Similarity": 74.17,
1341
  "Inclusion": 90.59,
1342
  "Comparison": 78.54,
1343
+ "Compound-Overall": 78.7,
 
1344
  "Imagination": 81.14,
1345
  "Feature matching": 73.22,
 
1346
  "Grammar-Overall": 77.96,
1347
  "Pronoun Reference": 91.67,
1348
  "Consistency": 76.61,
1349
  "Negation": 66.67,
 
1350
  "Layout-Overall": 86.06,
1351
  "2D": 83.97,
1352
  "3D": 88.69,
 
1353
  "Logical Reasoning": 61.25,
 
1354
  "Text": 24.18
1355
  },
1356
  {
 
1359
  "hf": "-",
1360
  "open_source": false,
1361
  "release_date": "2024-06",
 
1362
  "Overall": 62.01,
 
1363
  "Style": 85.63,
 
1364
  "World Knowledge": 86.71,
 
1365
  "Attribute-Overall": 74.73,
1366
  "Quantity": 66.49,
1367
  "Expression": 55.69,
 
1369
  "Size": 77.27,
1370
  "Shape": 67.48,
1371
  "Color": 83.02,
 
1372
  "Action-Overall": 58.27,
1373
  "Hand": 58.33,
1374
  "Full body": 49.38,
1375
  "Animal": 59.42,
1376
  "Non Contact": 52.23,
1377
  "Contact": 45.98,
1378
+ "State": 66.3,
 
1379
  "Relationship-Overall": 63.63,
1380
  "Composition": 64.92,
1381
  "Similarity": 56.73,
1382
  "Inclusion": 67.53,
1383
  "Comparison": 63.11,
 
1384
  "Compound-Overall": 58.28,
1385
  "Imagination": 62.66,
1386
+ "Feature matching": 48.6,
1387
+ "Grammar-Overall": 65.1,
 
1388
  "Pronoun Reference": 76.19,
1389
  "Consistency": 61.11,
1390
+ "Negation": 58.8,
 
1391
  "Layout-Overall": 71.67,
1392
  "2D": 74.86,
1393
  "3D": 67.57,
 
1394
  "Logical Reasoning": 40.29,
 
1395
  "Text": 15.76
1396
  },
1397
+ {
1398
  "model": "FLUX-pro-1.1-Ultra",
1399
  "link": "https://bfl.ai/",
1400
  "hf": "-",
1401
  "open_source": false,
1402
  "release_date": "2024-11",
1403
+ "Overall": 75.4,
 
 
1404
  "Style": 91.36,
 
1405
  "World Knowledge": 91.76,
 
1406
  "Attribute-Overall": 84.97,
1407
  "Quantity": 79.26,
1408
  "Expression": 68.58,
 
1410
  "Size": 89.96,
1411
  "Shape": 80.59,
1412
  "Color": 93.01,
 
1413
  "Action-Overall": 72.43,
1414
  "Hand": 67.31,
1415
  "Full body": 66.25,
 
1417
  "Non Contact": 66.96,
1418
  "Contact": 62.07,
1419
  "State": 80.53,
1420
+ "Relationship-Overall": 81.9,
 
1421
  "Composition": 81.89,
1422
  "Similarity": 74.04,
1423
  "Inclusion": 90.52,
1424
  "Comparison": 80.58,
 
1425
  "Compound-Overall": 78.07,
1426
+ "Imagination": 80.4,
1427
  "Feature matching": 72.88,
 
1428
  "Grammar-Overall": 71.94,
1429
  "Pronoun Reference": 84.52,
1430
  "Consistency": 68.55,
1431
  "Negation": 63.73,
 
1432
  "Layout-Overall": 82.62,
1433
  "2D": 81.78,
1434
+ "3D": 83.7,
 
1435
  "Logical Reasoning": 60.92,
 
1436
  "Text": 38.04
1437
  },
1438
  {
 
1441
  "hf": "https://huggingface.co/Qwen/Qwen-Image",
1442
  "open_source": true,
1443
  "release_date": "2025-08",
 
1444
  "Overall": 83.94,
 
1445
  "Style": 96.93,
 
1446
  "World Knowledge": 95.09,
 
1447
  "Attribute-Overall": 93.65,
1448
  "Quantity": 92.02,
1449
  "Expression": 89.86,
1450
+ "Material": 94.5,
1451
  "Size": 89.58,
1452
  "Shape": 86.71,
1453
  "Color": 97.85,
 
1454
  "Action-Overall": 81.86,
1455
  "Hand": 78.53,
1456
  "Full body": 81.88,
1457
+ "Animal": 83.7,
1458
  "Non Contact": 83.04,
1459
  "Contact": 71.84,
1460
  "State": 85.57,
 
1461
  "Relationship-Overall": 83.41,
1462
  "Composition": 81.76,
1463
  "Similarity": 79.17,
1464
  "Inclusion": 88.79,
1465
  "Comparison": 85.19,
 
1466
  "Compound-Overall": 81.98,
1467
  "Imagination": 82.38,
1468
  "Feature matching": 81.07,
 
1469
  "Grammar-Overall": 73.86,
1470
  "Pronoun Reference": 90.48,
1471
  "Consistency": 78.57,
1472
  "Negation": 54.93,
 
1473
  "Layout-Overall": 88.97,
1474
  "2D": 91.24,
1475
  "3D": 86.05,
 
1476
  "Logical Reasoning": 66.75,
1477
+ "Text": 76.9
 
1478
  },
1479
  {
1480
  "model": "FLUX-kontext-pro",
 
1482
  "hf": "-",
1483
  "open_source": false,
1484
  "release_date": "2025-05",
 
1485
  "Overall": 78.58,
1486
  "Style": 94.83,
1487
+ "World Knowledge": 93.6,
 
1488
  "Attribute-Overall": 86.24,
1489
  "Quantity": 74.47,
1490
+ "Expression": 75.0,
1491
  "Material": 85.47,
1492
  "Size": 89.58,
1493
  "Shape": 80.63,
1494
  "Color": 92.89,
 
1495
  "Action-Overall": 74.44,
1496
  "Hand": 73.05,
1497
  "Full body": 73.12,
1498
+ "Animal": 75.0,
1499
  "Non Contact": 67.73,
1500
+ "Contact": 70.4,
1501
  "State": 77.98,
1502
+ "Relationship-Overall": 78.4,
 
1503
  "Composition": 73.85,
1504
  "Similarity": 72.08,
1505
  "Inclusion": 89.08,
1506
  "Comparison": 82.77,
 
1507
  "Compound-Overall": 79.75,
1508
  "Imagination": 83.58,
1509
  "Feature matching": 71.23,
 
1510
  "Grammar-Overall": 77.05,
1511
  "Pronoun Reference": 90.32,
1512
+ "Consistency": 75.4,
1513
+ "Negation": 66.9,
 
1514
  "Layout-Overall": 85.46,
1515
  "2D": 84.09,
1516
  "3D": 87.23,
 
1517
  "Logical Reasoning": 66.26,
 
1518
  "Text": 49.73
1519
+ },
1520
  {
1521
  "model": "Hunyuan-DiT",
1522
  "link": "https://arxiv.org/pdf/2405.08748",
1523
  "hf": "https://huggingface.co/Tencent-Hunyuan/HunyuanDiT",
1524
  "open_source": true,
1525
  "release_date": "2024-05",
 
1526
  "Overall": 54.88,
 
1527
  "Style": 92.94,
 
1528
  "World Knowledge": 80.06,
 
1529
  "Attribute-Overall": 69.47,
1530
  "Quantity": 65.43,
1531
  "Expression": 52.22,
 
1533
  "Size": 75.19,
1534
  "Shape": 58.22,
1535
  "Color": 76.31,
1536
+ "Action-Overall": 48.8,
1537
+ "Hand": 39.1,
 
1538
  "Full body": 46.25,
1539
  "Animal": 47.46,
1540
  "Non Contact": 41.07,
1541
  "Contact": 34.48,
1542
  "State": 59.58,
 
1543
  "Relationship-Overall": 55.66,
1544
  "Composition": 56.89,
1545
  "Similarity": 55.45,
1546
  "Inclusion": 57.18,
1547
  "Comparison": 52.18,
 
1548
  "Compound-Overall": 50.22,
1549
  "Imagination": 55.49,
1550
  "Feature matching": 38.55,
 
1551
  "Grammar-Overall": 58.76,
1552
  "Pronoun Reference": 64.68,
1553
  "Consistency": 59.52,
1554
  "Negation": 52.82,
 
1555
  "Layout-Overall": 61.43,
1556
  "2D": 60.45,
1557
  "3D": 62.68,
 
1558
  "Logical Reasoning": 29.85,
 
1559
  "Text": 1.63
1560
  },
1561
  {
 
1564
  "hf": "https://huggingface.co/black-forest-labs/FLUX.1-dev",
1565
  "open_source": true,
1566
  "release_date": "2024-08",
 
1567
  "Overall": 69.42,
1568
  "Style": 89.29,
1569
  "World Knowledge": 89.45,
1570
+ "Attribute-Overall": 79.9,
 
1571
  "Quantity": 73.94,
1572
  "Expression": 64.44,
1573
  "Material": 80.05,
1574
  "Size": 84.47,
1575
+ "Shape": 71.5,
1576
  "Color": 87.47,
1577
+ "Action-Overall": 64.53,
 
1578
  "Hand": 63.78,
1579
+ "Full body": 62.5,
1580
  "Animal": 65.94,
1581
+ "Non Contact": 56.7,
1582
  "Contact": 56.32,
1583
  "State": 69.57,
1584
+ "Relationship-Overall": 69.4,
 
1585
  "Composition": 65.05,
1586
  "Similarity": 66.03,
1587
+ "Inclusion": 79.6,
1588
+ "Comparison": 71.6,
 
1589
  "Compound-Overall": 68.46,
1590
+ "Imagination": 71.1,
1591
  "Feature matching": 62.62,
 
1592
  "Grammar-Overall": 70.56,
1593
  "Pronoun Reference": 83.33,
1594
  "Consistency": 67.46,
1595
  "Negation": 61.97,
 
1596
  "Layout-Overall": 77.54,
1597
  "2D": 81.21,
1598
  "3D": 72.83,
 
1599
  "Logical Reasoning": 54.37,
 
1600
  "Text": 30.71
1601
  },
1602
  {
 
1605
  "hf": "-",
1606
  "open_source": false,
1607
  "release_date": "2025-05",
 
1608
  "Overall": 80.88,
1609
  "Style": 96.51,
1610
  "World Knowledge": 93.35,
 
1611
  "Attribute-Overall": 87.45,
1612
  "Quantity": 79.79,
1613
  "Expression": 76.68,
 
1615
  "Size": 88.83,
1616
  "Shape": 81.51,
1617
  "Color": 93.74,
 
1618
  "Action-Overall": 75.52,
1619
  "Hand": 73.08,
1620
  "Full body": 75.94,
 
1622
  "Non Contact": 66.82,
1623
  "Contact": 71.55,
1624
  "State": 79.76,
 
1625
  "Relationship-Overall": 80.78,
1626
+ "Composition": 77.3,
1627
  "Similarity": 73.05,
1628
  "Inclusion": 89.94,
1629
  "Comparison": 85.44,
 
1630
  "Compound-Overall": 82.24,
1631
  "Imagination": 84.75,
1632
  "Feature matching": 76.65,
 
1633
  "Grammar-Overall": 79.34,
1634
  "Pronoun Reference": 90.08,
1635
  "Consistency": 76.61,
1636
  "Negation": 72.18,
 
1637
  "Layout-Overall": 87.58,
1638
  "2D": 85.73,
1639
  "3D": 89.96,
 
1640
  "Logical Reasoning": 71.12,
 
1641
  "Text": 54.89
1642
+ },
1643
  {
1644
  "model": "Recraft",
1645
  "link": "https://www.recraft.ai/docs#generate-image",
1646
  "hf": "-",
1647
  "open_source": false,
1648
  "release_date": "2024-12",
 
1649
  "Overall": 60.93,
1650
  "Style": 87.13,
1651
  "World Knowledge": 86.99,
 
1652
  "Attribute-Overall": 73.23,
1653
  "Quantity": 56.38,
1654
  "Expression": 57.22,
 
1656
  "Size": 76.89,
1657
  "Shape": 63.64,
1658
  "Color": 83.07,
 
1659
  "Action-Overall": 51.77,
1660
  "Hand": 40.06,
1661
  "Full body": 54.37,
 
1663
  "Non Contact": 45.09,
1664
  "Contact": 37.36,
1665
  "State": 60.08,
 
1666
  "Relationship-Overall": 55.82,
1667
  "Composition": 51.79,
1668
  "Similarity": 46.47,
1669
  "Inclusion": 66.09,
1670
  "Comparison": 61.89,
 
1671
  "Compound-Overall": 49.56,
1672
  "Imagination": 50.21,
1673
  "Feature matching": 48.13,
 
1674
  "Grammar-Overall": 60.28,
1675
  "Pronoun Reference": 73.41,
1676
  "Consistency": 55.56,
1677
  "Negation": 52.82,
 
1678
  "Layout-Overall": 63.81,
1679
  "2D": 65.96,
1680
  "3D": 61.05,
 
1681
  "Logical Reasoning": 34.22,
 
1682
  "Text": 46.47
1683
+ },
1684
  {
1685
  "model": "wan2.2-t2i-plus",
1686
  "link": "https://help.aliyun.com/zh/model-studio/text-to-image-v2-api-reference",
1687
  "hf": "-",
1688
  "open_source": false,
1689
  "release_date": "2025-07",
 
1690
  "Overall": 68.76,
1691
  "Style": 90.28,
1692
  "World Knowledge": 87.57,
 
1693
  "Attribute-Overall": 81.08,
1694
  "Quantity": 78.19,
1695
  "Expression": 69.17,
1696
  "Material": 80.42,
1697
  "Size": 82.77,
1698
+ "Shape": 73.6,
1699
+ "Color": 88.1,
 
1700
  "Action-Overall": 66.49,
1701
+ "Hand": 64.1,
1702
  "Full body": 60.94,
1703
  "Animal": 70.29,
1704
  "Non Contact": 59.38,
1705
  "Contact": 55.46,
1706
  "State": 73.32,
 
1707
  "Relationship-Overall": 72.79,
1708
  "Composition": 69.13,
1709
  "Similarity": 66.67,
1710
  "Inclusion": 81.03,
1711
  "Comparison": 77.43,
 
1712
  "Compound-Overall": 71.73,
1713
  "Imagination": 74.16,
1714
  "Feature matching": 66.36,
 
1715
  "Grammar-Overall": 70.18,
1716
+ "Pronoun Reference": 86.9,
1717
  "Consistency": 61.11,
1718
  "Negation": 63.38,
 
1719
  "Layout-Overall": 79.13,
1720
  "2D": 82.34,
1721
+ "3D": 75.0,
 
1722
  "Logical Reasoning": 55.58,
 
1723
  "Text": 12.77
1724
  },
1725
  {
 
1728
  "hf": "-",
1729
  "open_source": false,
1730
  "release_date": "2025-08",
 
1731
  "Overall": 88.82,
 
1732
  "Style": 98.83,
 
1733
  "World Knowledge": 95.78,
 
1734
  "Attribute-Overall": 93.06,
1735
  "Quantity": 88.24,
1736
  "Expression": 86.09,
1737
  "Material": 93.05,
1738
+ "Size": 93.7,
1739
  "Shape": 88.73,
1740
  "Color": 97.31,
 
1741
  "Action-Overall": 83.93,
1742
  "Hand": 84.57,
1743
  "Full body": 84.95,
 
1745
  "Non Contact": 83.41,
1746
  "Contact": 78.16,
1747
  "State": 86.28,
 
1748
  "Relationship-Overall": 91.59,
1749
  "Composition": 90.98,
1750
  "Similarity": 91.32,
1751
+ "Inclusion": 92.8,
1752
  "Comparison": 91.91,
 
1753
  "Compound-Overall": 90.63,
1754
  "Imagination": 92.15,
1755
  "Feature matching": 87.23,
 
1756
  "Grammar-Overall": 89.33,
1757
  "Pronoun Reference": 94.84,
1758
  "Consistency": 89.24,
1759
  "Negation": 84.51,
 
1760
  "Layout-Overall": 94.04,
1761
  "2D": 94.77,
1762
  "3D": 93.12,
 
1763
  "Logical Reasoning": 81.27,
 
1764
  "Text": 69.75
1765
  },
1766
  {
 
1769
  "hf": "-",
1770
  "open_source": false,
1771
  "release_date": "2025-03",
 
1772
  "Overall": 92.63,
 
1773
  "Style": 99.08,
 
1774
  "World Knowledge": 97.95,
 
1775
  "Attribute-Overall": 93.53,
1776
+ "Quantity": 86.7,
1777
  "Expression": 93.44,
1778
  "Material": 92.45,
1779
  "Size": 94.89,
1780
  "Shape": 92.48,
1781
  "Color": 94.95,
 
1782
  "Action-Overall": 87.78,
1783
  "Hand": 89.94,
1784
  "Full body": 87.19,
 
1786
  "Non Contact": 89.29,
1787
  "Contact": 83.05,
1788
  "State": 87.75,
 
1789
  "Relationship-Overall": 91.13,
1790
  "Composition": 89.18,
1791
  "Similarity": 90.71,
1792
  "Inclusion": 96.84,
1793
  "Comparison": 90.29,
 
1794
  "Compound-Overall": 93.99,
1795
  "Imagination": 94.39,
1796
+ "Feature matching": 93.1,
 
1797
  "Grammar-Overall": 94.46,
1798
  "Pronoun Reference": 95.97,
1799
  "Consistency": 91.67,
1800
  "Negation": 95.65,
 
1801
  "Layout-Overall": 93.59,
1802
  "2D": 94.29,
1803
+ "3D": 92.7,
 
1804
  "Logical Reasoning": 91.02,
 
1805
  "Text": 83.79
1806
  },
1807
  {
 
1810
  "hf": "-",
1811
  "open_source": false,
1812
  "release_date": "2025-06",
 
1813
  "Overall": 80.99,
 
1814
  "Style": 97.18,
 
1815
  "World Knowledge": 93.79,
1816
+ "Attribute-Overall": 91.9,
 
1817
  "Quantity": 83.51,
1818
  "Expression": 81.25,
1819
  "Material": 93.07,
1820
  "Size": 88.26,
1821
  "Shape": 90.03,
1822
  "Color": 97.48,
 
1823
  "Action-Overall": 79.94,
1824
  "Hand": 77.88,
1825
  "Full body": 84.69,
1826
  "Animal": 78.26,
1827
  "Non Contact": 74.11,
1828
  "Contact": 71.84,
1829
+ "State": 83.6,
 
1830
  "Relationship-Overall": 83.41,
1831
  "Composition": 81.63,
1832
  "Similarity": 79.17,
1833
  "Inclusion": 87.64,
1834
  "Comparison": 86.41,
 
1835
  "Compound-Overall": 81.03,
1836
  "Imagination": 80.49,
1837
  "Feature matching": 82.24,
 
1838
  "Grammar-Overall": 75.13,
1839
  "Pronoun Reference": 90.48,
1840
  "Consistency": 80.56,
1841
  "Negation": 56.69,
 
1842
  "Layout-Overall": 88.41,
1843
  "2D": 87.85,
1844
  "3D": 89.13,
 
1845
  "Logical Reasoning": 62.62,
 
1846
  "Text": 56.52
1847
+ },
1848
  {
1849
  "model": "Imagen-4.0-Fast-preview-06-06",
1850
  "link": "https://deepmind.google/models/imagen/",
1851
  "hf": "-",
1852
  "open_source": false,
1853
  "release_date": "2025-06",
 
1854
  "Overall": 81.54,
 
1855
  "Style": 93.77,
 
1856
  "World Knowledge": 93.64,
 
1857
  "Attribute-Overall": 90.33,
1858
  "Quantity": 78.72,
1859
  "Expression": 78.89,
 
1861
  "Size": 90.15,
1862
  "Shape": 86.89,
1863
  "Color": 96.33,
 
1864
  "Action-Overall": 80.18,
1865
  "Hand": 82.05,
1866
  "Full body": 84.06,
1867
  "Animal": 81.88,
1868
+ "Non Contact": 75.0,
1869
  "Contact": 74.71,
1870
  "State": 80.93,
 
1871
  "Relationship-Overall": 84.05,
1872
  "Composition": 82.53,
1873
  "Similarity": 80.13,
1874
  "Inclusion": 92.82,
1875
  "Comparison": 82.52,
 
1876
  "Compound-Overall": 84.01,
1877
  "Imagination": 86.18,
1878
  "Feature matching": 79.21,
 
1879
  "Grammar-Overall": 79.57,
1880
  "Pronoun Reference": 91.27,
1881
  "Consistency": 81.35,
1882
  "Negation": 67.61,
 
1883
  "Layout-Overall": 90.48,
1884
  "2D": 90.11,
1885
  "3D": 90.94,
 
1886
  "Logical Reasoning": 67.72,
 
1887
  "Text": 51.63
1888
  },
1889
  {
 
1892
  "hf": "-",
1893
  "open_source": false,
1894
  "release_date": "2025-06",
 
1895
  "Overall": 90.95,
 
1896
  "Style": 97.67,
 
1897
  "World Knowledge": 98.26,
 
1898
  "Attribute-Overall": 93.21,
1899
  "Quantity": 89.84,
1900
  "Expression": 83.17,
1901
+ "Material": 94.2,
1902
  "Size": 94.69,
1903
  "Shape": 89.86,
1904
  "Color": 97.22,
 
1905
  "Action-Overall": 86.91,
1906
+ "Hand": 89.1,
1907
  "Full body": 86.56,
1908
  "Animal": 85.14,
1909
  "Non Contact": 86.61,
1910
  "Contact": 81.84,
1911
  "State": 88.63,
 
1912
  "Relationship-Overall": 90.57,
1913
  "Composition": 90.05,
1914
  "Similarity": 84.62,
1915
  "Inclusion": 94.52,
1916
  "Comparison": 92.72,
 
1917
  "Compound-Overall": 91.42,
1918
  "Imagination": 92.82,
1919
  "Feature matching": 88.32,
 
1920
  "Grammar-Overall": 88.07,
1921
  "Pronoun Reference": 96.83,
1922
+ "Consistency": 87.7,
1923
  "Negation": 80.63,
 
1924
  "Layout-Overall": 93.49,
1925
  "2D": 92.64,
1926
  "3D": 94.57,
1927
+ "Logical Reasoning": 83.5,
 
 
1928
  "Text": 86.41
1929
  }
1930
  ]
1931
+ }