Spaces:
Running
Running
File size: 144,554 Bytes
971b586 1d5de93 005af91 1d5de93 6ea760c 1d5de93 6ea760c 1d5de93 6ea760c 1d5de93 6ea760c 1d5de93 6ea760c 1d5de93 1641614 1cc98cf 286aae5 5da533e 971b586 39a61da 5ea40ce 39a61da 5ea40ce 39a61da 5ea40ce 39a61da 971b586 1cc98cf 5ea40ce 1cc98cf 5ea40ce f5d37e0 1cc98cf 971b586 1cc98cf 468dae5 1cc98cf 5ea40ce 616acfe 5ea40ce 0659e8b 5ea40ce 5dfaccc 5ea40ce f5d37e0 5ea40ce 1cc98cf 5ea40ce 3cf1b0c 5b3ae9f 971b586 1cc98cf 971b586 1cc98cf b64beb5 5ea40ce b64beb5 5ea40ce 3cf1b0c b64beb5 1cc98cf 50571e5 5ea40ce c30d8ed 5ea40ce c30d8ed 5ea40ce c30d8ed 5ea40ce c30d8ed 0659e8b c30d8ed 0659e8b c30d8ed 0659e8b c30d8ed 0659e8b c30d8ed 0659e8b c30d8ed 0659e8b c30d8ed 0659e8b c30d8ed 0659e8b c30d8ed 0659e8b d835cbf 5ea40ce c30d8ed 6a0d3d3 c30d8ed 6a0d3d3 c30d8ed 6a0d3d3 c30d8ed 6a0d3d3 c30d8ed 6a0d3d3 c30d8ed 6a0d3d3 c30d8ed 6a0d3d3 c30d8ed 6a0d3d3 c30d8ed 5ea40ce c30d8ed 5ea40ce c30d8ed 5ea40ce 468dae5 6a0d3d3 5ea40ce 3cf1b0c 5ea40ce 5da533e 5ea40ce 5da533e 5ea40ce 3cf1b0c 5ea40ce 5da533e 5ea40ce 3cf1b0c 5ea40ce 5da533e 5ea40ce 5da533e 5ea40ce 21329c2 0659e8b 21329c2 4833b78 21329c2 5ea40ce 3cf1b0c 971b586 50571e5 1cc98cf 1d3e72f d835cbf 1d3e72f dfacb41 d835cbf 5da533e dfacb41 5da533e 1d3e72f 1cc98cf 971b586 5ea40ce 971b586 812b476 1cc98cf 7ae5802 5da533e 1cc98cf 812b476 971b586 1cc98cf 971b586 1cc98cf 971b586 5da533e e9eb9ae 5ea40ce e9eb9ae 5ea40ce e9eb9ae 971b586 5ea40ce 1d3e72f 971b586 5ea40ce 971b586 1cc98cf 5ea40ce 5da533e 5ea40ce 1cc98cf 5ea40ce 3cf1b0c 5ea40ce 3cf1b0c 5ea40ce 3cf1b0c 5ea40ce 1cc98cf 971b586 5ea40ce 971b586 812b476 1d3e72f 7ae5802 1cc98cf 7ae5802 1cc98cf 812b476 971b586 1cc98cf 971b586 b64beb5 5ea40ce b64beb5 971b586 5da533e 971b586 5da533e 50571e5 971b586 5ea40ce 971b586 1cc98cf 5ea40ce 1d3e72f 5ea40ce 3cf1b0c 5ea40ce 3cf1b0c 5ea40ce 1d3e72f 5ea40ce 1d3e72f 7ae5802 1d3e72f 7ae5802 1d3e72f 7ae5802 1d3e72f 5ea40ce 41739ad 5ea40ce 1cc98cf 5ea40ce 3cf1b0c 5ea40ce 3cf1b0c 5ea40ce 1cc98cf 971b586 5ea40ce 971b586 812b476 1cc98cf 7ae5802 1cc98cf 7ae5802 1cc98cf 812b476 971b586 1cc98cf 971b586 1d3e72f 971b586 1d3e72f 971b586 5da533e 971b586 1cc98cf 971b586 50571e5 971b586 1cc98cf 5ea40ce 2c5ccd8 5ea40ce 1cc98cf 5ea40ce 3cf1b0c 5ea40ce 3cf1b0c 5ea40ce 2c5ccd8 5ea40ce 1cc98cf 971b586 5ea40ce 971b586 39a61da 1cc98cf 7ae5802 1cc98cf 7ae5802 1cc98cf 39a61da 971b586 1cc98cf 286aae5 7ec25a8 5ea40ce 7ec25a8 286aae5 5509817 f5d37e0 5ea40ce 7ec25a8 1f90847 5ea40ce 7ec25a8 5ea40ce 7ec25a8 1f90847 5ea40ce 7ec25a8 5ea40ce 7ec25a8 5509817 1f90847 286aae5 5ea40ce 286aae5 5ea40ce 286aae5 7ec25a8 286aae5 5ea40ce 286aae5 1cc98cf f5d37e0 1cc98cf 971b586 5ea40ce 971b586 812b476 1cc98cf 7ae5802 1cc98cf 7ae5802 1cc98cf 812b476 971b586 1cc98cf f039f0b 7ec25a8 1f90847 f039f0b 7ec25a8 f039f0b 468dae5 f039f0b 1f90847 f039f0b 7ec25a8 f039f0b 5ea40ce f039f0b 5ea40ce 468dae5 f039f0b 1cc98cf 5ea40ce 1cc98cf 971b586 1f90847 5ea40ce 1f90847 812b476 1cc98cf a54539b 1cc98cf a54539b 1cc98cf 812b476 1f90847 1cc98cf 1f90847 f5d37e0 1f90847 5ea40ce 1f90847 5ea40ce 1f90847 1cc98cf a54539b 1cc98cf 1f90847 971b586 3cf1b0c 971b586 3cf1b0c 971b586 50571e5 1cc98cf 5ea40ce d835cbf 5ea40ce d835cbf 5ea40ce d835cbf 5ea40ce 5da533e b9445bd d3b6881 5ea40ce 1cc98cf 971b586 5ea40ce 971b586 1cc98cf 971b586 5ea40ce 971b586 5ea40ce 971b586 1cc98cf 971b586 1cc98cf 971b586 1cc98cf 50571e5 971b586 1cc98cf 971b586 1cc98cf 5ea40ce 1cc98cf 5ea40ce 1cc98cf 971b586 1cc98cf 971b586 50571e5 971b586 5ea40ce 971b586 1cc98cf 971b586 5ea40ce 971b586 1cc98cf 971b586 5ea40ce 971b586 1cc98cf 971b586 1cc98cf 971b586 5ea40ce 971b586 1cc98cf 971b586 d3b6881 b9445bd d3b6881 b9445bd d3b6881 b9445bd d3b6881 b9445bd d3b6881 b9445bd d3b6881 b9445bd d3b6881 b9445bd a36d457 d3b6881 b9445bd d3b6881 a36d457 d3b6881 a36d457 d3b6881 a36d457 d3b6881 a36d457 d3b6881 a36d457 d3b6881 a36d457 b9445bd a36d457 b9445bd a36d457 b9445bd a36d457 b9445bd a36d457 b9445bd a36d457 b9445bd a36d457 d3b6881 971b586 5ea40ce 971b586 1cc98cf d3b6881 971b586 5ea40ce 971b586 1cc98cf 971b586 1cc98cf 971b586 3cf1b0c 5ea40ce 971b586 1cc98cf 50571e5 971b586 1cc98cf 971b586 5ea40ce 971b586 1cc98cf d3b6881 971b586 5ea40ce 971b586 1cc98cf 971b586 1cc98cf 971b586 5ea40ce d3b6881 5ea40ce d3b6881 5ea40ce f5d37e0 5ea40ce 5da533e 5ea40ce f5d37e0 5ea40ce d3b6881 5ea40ce 5645f7d 1d5de93 5ea40ce 971b586 1cc98cf 971b586 3cf1b0c 971b586 5b3ae9f 971b586 50571e5 1cc98cf 971b586 1cc98cf 971b586 5ea40ce b64beb5 3cf1b0c b64beb5 971b586 b64beb5 971b586 b64beb5 971b586 b64beb5 971b586 b64beb5 5ea40ce b64beb5 971b586 b64beb5 5ea40ce b64beb5 5ea40ce b64beb5 971b586 b64beb5 971b586 5dfaccc 39a61da 5ea40ce 39a61da 5ea40ce 39a61da 5ea40ce 1d5de93 39a61da 971b586 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 | <!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Carbon · an open-source autoregressive genomic foundation model</title>
<!-- ============================================================ -->
<!-- Hash router for sibling pages that aren't reachable from the -->
<!-- HF Spaces parent URL. -->
<!-- -->
<!-- On huggingface.co the demo is loaded as an iframe whose src is -->
<!-- pinned to the Space's root path; the parent URL accepts a hash -->
<!-- and forwards it into the iframe but you can't deep-link into -->
<!-- /social-banner directly (the parent would treat the slug as a -->
<!-- Space subpath and 404). The workaround: ship known hashes from -->
<!-- the parent and bounce them inside the iframe before the demo -->
<!-- starts hydrating. Shareable as e.g. -->
<!-- https://huggingface.co/spaces/<org>/<space>#banner -->
<!-- -->
<!-- This runs synchronously before any <link>/<script> below so -->
<!-- there's no flash of the wrong page; documentElement.visibility -->
<!-- is hidden as a safety net for slow CPUs where the redirect -->
<!-- might still race a first paint. -->
<!-- ============================================================ -->
<script>
(function () {
var route = (location.hash || "").replace(/^#\/?/, "").toLowerCase();
var social = { banner: 1, "social-banner": 1, press: 1, share: 1 };
if (route in social) {
document.documentElement.style.visibility = "hidden";
location.replace("/social-banner" + location.search);
}
})();
</script>
<!-- ============================================================ -->
<!-- Discoverability: SEO + social previews + AI-agent metadata. -->
<!-- {{SITE_URL}} is substituted at request time by app.py with -->
<!-- the absolute base URL (scheme + host) the page was served -->
<!-- under, so og:image / og:url stay correct whether we're on -->
<!-- the HF Space, a preview deploy, or localhost. -->
<!-- ============================================================ -->
<meta name="description" content="Carbon is Hugging Face's open-source family of autoregressive genomic foundation models for DNA. Explore an interactive demo of what the 3B checkpoint learned: streaming continuation, variant effect prediction, ESMFold structure prediction, a UMAP of half a million gene embeddings, and the full training recipe.">
<meta name="keywords" content="Carbon, DNA, genomics, foundation model, Hugging Face, autoregressive, language model, bioinformatics, variant effect prediction, ESMFold, UMAP, gene embeddings, open source">
<meta name="author" content="Hugging Face Bio">
<meta name="theme-color" content="#f7f5ee">
<meta name="color-scheme" content="light">
<link rel="canonical" href="{{SITE_URL}}/">
<!-- Favicon. The SVG covers every modern browser engine (Chrome,
Safari ≥ 14, Firefox, Edge). We dropped the PNG raster fallback
when img/logo.png was retired in favour of img/thumb.png (the
dedicated social-card asset), since none of the browsers that
still need a raster favicon are in the demo's target audience. -->
<link rel="icon" type="image/svg+xml" href="/img/logo.svg">
<!-- Open Graph (Facebook, LinkedIn, Slack, Discord, iMessage…). -->
<!-- og:image points at /img/thumb.png, the 2x export of the OG -->
<!-- preview tile rendered by /social-banner (2392×1258, drop-in -->
<!-- 1200×630 ratio at retina resolution). -->
<meta property="og:type" content="website">
<meta property="og:site_name" content="Carbon">
<meta property="og:title" content="Carbon · an open-source autoregressive genomic foundation model">
<meta property="og:description" content="An interactive editorial demo of Carbon, Hugging Face's open-source DNA foundation model. Streaming continuation, variant scoring, protein folding, gene-embedding UMAP, and the full training recipe.">
<meta property="og:url" content="{{SITE_URL}}/">
<meta property="og:image" content="{{SITE_URL}}/img/thumb.png">
<meta property="og:image:width" content="2392">
<meta property="og:image:height" content="1258">
<meta property="og:image:alt" content="Carbon — wordmark and four-strand DNA helix on cream paper.">
<meta property="og:locale" content="en_US">
<!-- Twitter / X card. summary_large_image renders the OG image -->
<!-- as a full-bleed preview tile. -->
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:site" content="@huggingface">
<meta name="twitter:creator" content="@huggingface">
<meta name="twitter:title" content="Carbon · an open-source autoregressive genomic foundation model">
<meta name="twitter:description" content="An interactive editorial demo of Carbon, Hugging Face's open-source DNA foundation model. Streaming continuation, variant scoring, protein folding, gene-embedding UMAP, and the full training recipe.">
<meta name="twitter:image" content="{{SITE_URL}}/img/thumb.png">
<meta name="twitter:image:alt" content="Carbon — wordmark and four-strand DNA helix on cream paper.">
<!-- JSON-LD structured data. Helps search engines and LLM- -->
<!-- powered answer engines (Perplexity, ChatGPT browsing, etc.) -->
<!-- understand what this page is: a tech article about an open- -->
<!-- source software model, with links back to its model card, -->
<!-- code, and dataset. -->
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@graph": [
{
"@type": "TechArticle",
"@id": "{{SITE_URL}}/#article",
"headline": "Carbon · an open-source autoregressive genomic foundation model",
"description": "An interactive editorial walkthrough of Carbon, Hugging Face's open-source DNA foundation model: streaming continuation, variant effect prediction, ESMFold-based protein structure prediction, a UMAP of ~500k gene embeddings, and the full training recipe (tokenizer, loss, dataset, results).",
"url": "{{SITE_URL}}/",
"image": "{{SITE_URL}}/img/thumb.png",
"inLanguage": "en",
"author": { "@type": "Organization", "name": "Hugging Face Bio", "url": "https://huggingface.co/HuggingFaceBio" },
"publisher": { "@type": "Organization", "name": "Hugging Face", "url": "https://huggingface.co" },
"about": {
"@type": "SoftwareApplication",
"name": "Carbon-3B",
"applicationCategory": "ScienceApplication",
"operatingSystem": "Any",
"url": "https://huggingface.co/HuggingFaceBio/Carbon-3B",
"description": "Autoregressive genomic foundation model. 3B parameters, 393,216 bp context, 6-mer tokenizer, trained on 1T tokens of DNA across the tree of life.",
"license": "https://huggingface.co/HuggingFaceBio/Carbon-3B",
"isAccessibleForFree": true
},
"isPartOf": {
"@type": "WebSite",
"name": "Carbon",
"url": "{{SITE_URL}}/"
}
}
]
}
</script>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;600;700;800&family=Inter:wght@300;400;500;600&display=swap">
<!-- 3Dmol.js: lightweight WebGL molecular viewer, used by §5 (folding) to
render ESMFold-predicted protein cartoons. Pinned for reproducibility. -->
<script defer src="https://cdn.jsdelivr.net/npm/3dmol@2.5.1/build/3Dmol-min.js"></script>
<!-- highlight.js: syntax-highlights the Python snippets inside every
<details class="code-snippet"> "Run this from code" block. We load
the official browser distribution from the `cdn-release` repo (the
/npm/ path serves CommonJS modules that throw `require is not
defined` in the browser). Bundle ships Python pre-registered. We
intentionally do NOT load a hljs theme stylesheet, code-snippet.css
defines our own token colours so the snippets stay on-brand with
the editorial palette. -->
<script defer src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.9.0/build/highlight.min.js"></script>
<!-- Modular CSS, served from /assets/styles/. Order matters because
several keyframes (pulse) and shared atoms (.seq-block, .seq-label,
.demo-toolbar) are defined once and consumed by multiple sections;
load globals first, then per-section overrides. -->
<link rel="stylesheet" href="/assets/styles/base.css">
<link rel="stylesheet" href="/assets/styles/header.css">
<link rel="stylesheet" href="/assets/styles/banner.css">
<link rel="stylesheet" href="/assets/styles/layout.css">
<link rel="stylesheet" href="/assets/styles/controls.css">
<link rel="stylesheet" href="/assets/styles/sequence.css">
<link rel="stylesheet" href="/assets/styles/section-intro.css">
<link rel="stylesheet" href="/assets/styles/section-folding.css">
<link rel="stylesheet" href="/assets/styles/section-umap.css">
<link rel="stylesheet" href="/assets/styles/section-tree.css">
<link rel="stylesheet" href="/assets/styles/section-vep.css">
<link rel="stylesheet" href="/assets/styles/section-species.css">
<link rel="stylesheet" href="/assets/styles/section-data.css">
<link rel="stylesheet" href="/assets/styles/code-snippet.css">
<link rel="stylesheet" href="/assets/styles/recipe.css">
<link rel="stylesheet" href="/assets/styles/sandbox.css">
<link rel="stylesheet" href="/assets/styles/footer.css">
</head>
<body>
<!-- Carbon banner. Combines the model-card identity (logo + path + wordmark +
subtitle) with the section navigation (Intro / DNA Lab / Carbon Recipe / Sandbox tabs) into a
single editorial hero. The DNA helix is rendered on a <canvas> positioned
to the right, rotated for a slight technical tilt; see banner.js. -->
<header class="carbon-banner" aria-label="Carbon DNA model banner">
<div class="banner-inner">
<div class="banner-left">
<!-- Top row: HF-style model-card identity. The square logo card mirrors
the thumbnail you'd find on a Hugging Face model page; the title +
path beside it functions as a breadcrumb / model identifier. -->
<div class="banner-identity">
<a class="logo-card" href="#" aria-label="Carbon, go to top">
<img class="logo-img" src="/img/logo.svg" alt="" width="44" height="44">
</a>
<div class="banner-breadcrumb">
<div class="banner-title">CARBON</div>
<div class="banner-path" id="meta">huggingfacebio/carbon-3b</div>
</div>
</div>
<!-- Headline: oversized wordmark + tagline. The blinking caret after the
"N" is the visual echo of the §1 demo (model streaming a continuation
token by token). -->
<div class="banner-headline">
<h1 class="banner-wordmark"><span>CARBON</span><span class="banner-cursor" aria-hidden="true"></span></h1>
<p class="banner-subtitle">Autoregressive Genomic Foundation Model</p>
<ul class="banner-specs" aria-label="Model specs">
<li class="banner-spec"><strong>393,216</strong> bp context</li>
<li class="banner-spec"><strong>6-mer</strong> tokenizer</li>
<li class="banner-spec"><strong>1T</strong> train tokens</li>
</ul>
<ul class="banner-links" aria-label="Resources">
<li>
<a href="https://huggingface.co/collections/HuggingFaceBio/carbon" target="_blank" rel="noopener">
Models<span class="arrow" aria-hidden="true">↗</span>
</a>
</li>
<li>
<a href="https://huggingface.co/datasets/HuggingFaceBio/carbon-pretraining-corpus" target="_blank" rel="noopener">
Dataset<span class="arrow" aria-hidden="true">↗</span>
</a>
</li>
<li>
<a href="https://paperswithcode.co/paper/83340" target="_blank" rel="noopener">
Tech report<span class="arrow" aria-hidden="true">↗</span>
</a>
</li>
<li>
<a href="https://github.com/huggingface/carbon" target="_blank" rel="noopener">
Code<span class="arrow" aria-hidden="true">↗</span>
</a>
</li>
</ul>
</div>
<!-- Tabs anchored to the bottom of the banner; they sit on the hairline
that separates the banner from the page content (margin-bottom: -1px). -->
<nav id="tab-nav" class="banner-tabs">
<button class="tab active" data-tab="intro">Intro</button>
<button class="tab" data-tab="dna-lab">DNA Lab</button>
<button class="tab" data-tab="recipe">Carbon Recipe</button>
<button class="tab" data-tab="sandbox">Sandbox</button>
</nav>
</div>
<!-- Big vertical DNA helix on the right. The canvas paints upright; CSS
applies a small clockwise tilt for a "blueprint-on-the-bench" feel. -->
<div class="banner-helix" aria-hidden="true">
<canvas class="cb-helix-canvas"></canvas>
</div>
</div>
</header>
<!-- Sticky tab strip: a duplicate of the in-banner nav that slides down from
the top once the user has scrolled past the original tabs. Kept in sync
with the in-banner set via tabs.js (both NodeLists are wired to the same
setTab() handler). The body gets .is-tabs-stuck toggled by an
IntersectionObserver watching the original #tab-nav. -->
<nav id="tab-nav-sticky" class="sticky-nav" aria-label="Section navigation (sticky)">
<div class="sticky-nav__inner">
<!-- Mini breadcrumb on the left: same identity as the in-banner
.banner-breadcrumb (title + model path stacked) so the sticky
strip carries the "you're on the Carbon model card" cue even
after the hero has scrolled out of view. -->
<a class="sticky-nav__brand" href="#" aria-label="Carbon, go to top">
<span class="banner-title">CARBON</span>
<span class="banner-path">huggingfacebio/carbon-3b</span>
</a>
<div class="sticky-nav__tabs">
<button class="tab active" data-tab="intro">Intro</button>
<button class="tab" data-tab="dna-lab">DNA Lab</button>
<button class="tab" data-tab="recipe">Carbon Recipe</button>
<button class="tab" data-tab="sandbox">Sandbox</button>
</div>
</div>
</nav>
<!-- ============================================================ -->
<!-- INTRO TAB · release announcement + tab-navigation guide + -->
<!-- optional bio primer ("the central dogma"). -->
<!-- ============================================================ -->
<!-- Default landing tab. The release hero uses .tab-lede so it
reads consistently with the existing per-tab intros. The
three guide cards under it are buttons (NOT links) wired by
sections/intro.js to window.setTab so deep-linking and tab
state stay in sync. The bio primer below reuses
section--two-col / .demo for visual parity with §1-§7. -->
<div class="tab-panel active section--intro" id="panel-intro" data-tab="intro">
<!-- Hero: two-column split. Left rail (eyebrow + announcement) is sticky
so the message stays in view while the visitor scrolls past the
Pareto figure on the right. The figure was previously stacked
beneath the text inside .tab-lede__rail; the split layout pulls
it out as a sibling so the two read as anchor + evidence. -->
<div class="tab-lede tab-lede--split">
<div class="tab-lede__rail">
<h2 class="tab-lede__title">The fastest open-source foundation model for DNA.</h2>
<p>
Today we're releasing <strong>Carbon</strong> — three model sizes
(<em>500M</em>, <em>3B</em>, and <em>8B</em> parameters), shipping with the full
training code, the data pipeline, and the model weights.
All open-source on the Hugging Face Hub.
</p>
<!-- Figure caption pulled out of the right-column .tab-lede__figure
so the descriptive sentence sits under the announcement prose
instead of dangling below the chart. The visual flow is
lede → context paragraph → figure caption, all in the same
column; the chart on the right reads as the visual evidence
the prose is referring to. -->
<p class="tab-lede__figcaption">
<span class="pareto-figcaption-tag">Fig · Benchmark</span>
Throughput (base pairs per second, log scale) vs win rate across open DNA foundation models. Carbon 3B matches Evo2 7B's win rate at roughly 275× the throughput.
</p>
</div>
<!-- Pareto chart, drawn natively as inline SVG so the figure scales
sharply, picks up the page's typography, and can be tuned in
CSS without a matplotlib re-export. Source data lives in
pareto/pareto_data.csv; geometry mirrors the matplotlib
reference (scratch/plot_pareto_winrate_throughput_8b_32k_hf.py):
log-scale throughput on x, linear win-rate % on y, family
badges sitting on each data point with a plain text label
below. Chrome is pulled back to match the editorial blog
tone — hairline frame + tick lines, mono tabular tick
labels, mono-uppercase "better/faster" eyebrow indicator —
and the data labels use a paint-order halo (see
.pareto-label in section-intro.css) instead of pill boxes.
Carbon points scale up + use a heavier label per the source
script's HIGHLIGHT_LOGO_SCALE so the eye lands on them. -->
<figure class="tab-lede__figure tab-lede__figure--pareto">
<!-- viewBox tightly cropped around the actual visible content
(rotated "Win rate (%)" Y title, "100" Y tick label, rightmost
data label "GENERator-v2 1.2B", and "Throughput" X title
descender). No internal margin is left inside the SVG itself —
the visual breathing around the chart is provided entirely by
the parent .tab-lede__figure--pareto's 24px card padding (see
section-intro.css), otherwise we'd be stacking SVG margins
onto CSS padding and the chart would read as floating inside
an oversized frame. The data coordinates further down still
use the original 1000×600 reference grid; only the visible
window is shifted/shrunk. -->
<svg
class="pareto-chart"
viewBox="20 50 910 530"
xmlns="http://www.w3.org/2000/svg"
role="img"
aria-labelledby="pareto-title pareto-desc"
>
<title id="pareto-title">Throughput vs win rate across open DNA foundation models</title>
<desc id="pareto-desc">Log-scale throughput in base pairs per second on the x-axis and win-rate percentage on the y-axis. Carbon 3B and 8B sit at roughly 275 times the throughput of Arc Evo2 7B at comparable or better win rates.</desc>
<!-- Plot interior. -->
<rect class="pareto-bg" x="100" y="30" width="870" height="470"/>
<!-- Axis lines · L-shape (left + bottom) bordering the data
area. The full rectangular frame is dropped so the chart
sits transparent on the page; just the two lines that
anchor the ticks remain, the editorial chart minimum. -->
<g class="pareto-axis-lines">
<line x1="100" y1="30" x2="100" y2="500"/>
<line x1="100" y1="500" x2="970" y2="500"/>
</g>
<!-- Y axis: linear win-rate %, ticks at 0/20/40/60/80/100. The
plot range runs −12..108 (matches matplotlib padding) so
the data points have headroom above 100 and below 0 for
labels; only the canonical 0..100 ticks are drawn. -->
<g class="pareto-axis pareto-axis--y">
<line x1="94" y1="61.3" x2="100" y2="61.3"/>
<line x1="94" y1="139.7" x2="100" y2="139.7"/>
<line x1="94" y1="218.0" x2="100" y2="218.0"/>
<line x1="94" y1="296.3" x2="100" y2="296.3"/>
<line x1="94" y1="374.7" x2="100" y2="374.7"/>
<line x1="94" y1="453.0" x2="100" y2="453.0"/>
<text x="86" y="61.3">100</text>
<text x="86" y="139.7">80</text>
<text x="86" y="218.0">60</text>
<text x="86" y="296.3">40</text>
<text x="86" y="374.7">20</text>
<text x="86" y="453.0">0</text>
</g>
<!-- X axis: log10 base pairs/s. x-range chosen to mirror the
matplotlib auto-padding (left_pad/right_pad in the source);
ticks drop at decade + half-decade boundaries that fall
inside the range. -->
<g class="pareto-axis pareto-axis--x">
<line x1="163.4" y1="500" x2="163.4" y2="506"/>
<line x1="263.9" y1="500" x2="263.9" y2="506"/>
<line x1="339.9" y1="500" x2="339.9" y2="506"/>
<line x1="415.9" y1="500" x2="415.9" y2="506"/>
<line x1="516.4" y1="500" x2="516.4" y2="506"/>
<line x1="592.4" y1="500" x2="592.4" y2="506"/>
<line x1="668.5" y1="500" x2="668.5" y2="506"/>
<line x1="768.9" y1="500" x2="768.9" y2="506"/>
<line x1="844.9" y1="500" x2="844.9" y2="506"/>
<line x1="920.9" y1="500" x2="920.9" y2="506"/>
<text x="163.4" y="520">200</text>
<text x="263.9" y="520">500</text>
<text x="339.9" y="520">1k</text>
<text x="415.9" y="520">2k</text>
<text x="516.4" y="520">5k</text>
<text x="592.4" y="520">10k</text>
<text x="668.5" y="520">20k</text>
<text x="768.9" y="520">50k</text>
<text x="844.9" y="520">100k</text>
<text x="920.9" y="520">200k</text>
</g>
<!-- Plot frame drawn after the axis grid so the thick black
border sits cleanly on top of the tick lines. -->
<rect class="pareto-frame" x="100" y="30" width="870" height="470"/>
<!-- Axes-of-improvement indicator: a small ⌐ of grey arrows in
the lower-left labelled "better"/"faster", same as the
matplotlib reference. Placed at the 0-winrate gridline,
just inside the y-axis. -->
<g class="pareto-indicator" transform="translate(170 450)">
<line x1="0" y1="0" x2="0" y2="-70"/>
<polygon points="0,-78 -7,-66 7,-66"/>
<text class="pareto-indicator-text" transform="translate(-14 -35) rotate(-90)">better</text>
<line x1="0" y1="0" x2="70" y2="0"/>
<polygon points="78,0 66,-7 66,7"/>
<text class="pareto-indicator-text" x="35" y="20">faster</text>
</g>
<!-- 275× speedup callout: a single horizontal arrow from
just-right-of Evo2 7B to just-left-of Carbon 3B, split in
two segments around a centred "275×" label that sits
on-axis. The label cuts the shaft instead of floating
above it, so the number reads as part of the arrow
itself. y=215 lands between Evo2 7B (64.3%) and Carbon
3B (59.5%) so the arrow reads level with both endpoints. -->
<g class="pareto-speedup">
<line x1="290" y1="215" x2="508" y2="215"/>
<line x1="618" y1="215" x2="822" y2="215"/>
<polygon points="836,215 820,206 820,224"/>
<text class="pareto-speedup-label" x="563" y="218">275×</text>
</g>
<!-- Data points. Coordinates baked in from pareto_data.csv:
x = 100 + (log10(T) − 2.0499) / 3.4452 × 870
y = 500 − (win_rate + 12) × 3.9167
Logos sit centered on each point (32×32 for non-highlight,
43×43 for Carbon). Labels are pinned below the logo. -->
<!-- Evo2 20B · 177.5 bp/s, 95.24% -->
<g class="pareto-point">
<image href="/img/arc.webp" x="134.3" y="64.0" width="32" height="32"/>
<text class="pareto-label" x="150.3" y="110">Evo2 20B</text>
</g>
<!-- Evo2 7B · 453.8 bp/s, 64.29% -->
<g class="pareto-point">
<image href="/img/arc.webp" x="237.3" y="185.2" width="32" height="32"/>
<text class="pareto-label" x="253.3" y="231">Evo2 7B</text>
</g>
<!-- Evo2 1B · 1342.5 bp/s, 2.38% -->
<g class="pareto-point">
<image href="/img/arc.webp" x="356.2" y="427.7" width="32" height="32"/>
<text class="pareto-label" x="372.2" y="473">Evo2 1B</text>
</g>
<!-- GENERator-v2 3B · 98494.4 bp/s, 35.71% -->
<g class="pareto-point">
<image href="/img/generator.webp" x="828.7" y="297.1" width="32" height="32"/>
<text class="pareto-label" x="844.7" y="343">GENERator-v2 3B</text>
</g>
<!-- GENERator-v2 1.2B · 123219.2 bp/s, 14.29% -->
<g class="pareto-point">
<image href="/img/generator.webp" x="853.3" y="381.0" width="32" height="32"/>
<text class="pareto-label" x="869.3" y="427">GENERator-v2 1.2B</text>
</g>
<!-- Carbon 8B · 76582.7 bp/s, 78.57% (highlighted) -->
<g class="pareto-point pareto-point--highlight">
<image href="/img/logo.svg" x="795.6" y="123.7" width="43" height="43"/>
<text class="pareto-label" x="817.1" y="180">Carbon 8B</text>
</g>
<!-- Carbon 3B · 125130.8 bp/s, 59.52% (highlighted) -->
<g class="pareto-point pareto-point--highlight">
<image href="/img/logo.svg" x="849.5" y="198.3" width="43" height="43"/>
<text class="pareto-label" x="871.0" y="255">Carbon 3B</text>
</g>
<!-- Axis titles. Y title rotated -90 along the left margin,
X title centred under the X axis. The italic "Base pairs
per second" subtitle that used to sit under "Throughput"
was removed: the units carry less weight than the
headline measure, and the chart reads cleaner without it. -->
<text class="pareto-axis-title" transform="translate(34 265) rotate(-90)">Win rate (%)</text>
<text class="pareto-axis-title" x="535" y="572">Throughput</text>
</svg>
</figure>
</div>
<!-- Site map · full-width independent band that signposts the four
destinations of the page (Intro primer / DNA Lab / Carbon Recipe /
Sandbox). Pulled out of .container.wide so the band can extend
edge-to-edge with its own paper tone, reading as the deliberate
hand-off between the release lede above and the bio primer below.
Each step is a numbered card with a mono uppercase label and a
short gloss; the anchors still feed tabs.js's hashchange listener
(#primer scroll-anchors here, #dna-lab/#recipe/#sandbox switch tab). -->
<nav class="intro-sitemap" aria-label="Site map">
<div class="intro-sitemap__inner">
<header class="intro-sitemap__heading">
<span class="intro-sitemap__eyebrow">Site map</span>
<h2 class="intro-sitemap__title">What's inside</h2>
<p class="intro-sitemap__subtitle">Four ways to explore Carbon, from background to hands-on.</p>
</header>
<ol class="intro-sitemap__steps">
<li class="intro-sitemap__step">
<a class="intro-sitemap__link" href="#primer">
<span class="intro-sitemap__icon" aria-hidden="true">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.2" stroke-linecap="round" stroke-linejoin="round">
<path d="M3 5.5h6.5a2.5 2.5 0 0 1 2.5 2.5V20"/>
<path d="M21 5.5h-6.5A2.5 2.5 0 0 0 12 8"/>
<path d="M3 5.5V18a1 1 0 0 0 1 1h6"/>
<path d="M21 5.5V18a1 1 0 0 1-1 1h-6"/>
<path d="M6 9h3.5"/>
<path d="M6 12h3.5"/>
<path d="M14.5 9H18"/>
<path d="M14.5 12H18"/>
</svg>
</span>
<span class="intro-sitemap__label">
<span class="intro-sitemap__title">Intro</span>
<span class="intro-sitemap__arrow" aria-hidden="true">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
<path d="M5 12h14"/>
<path d="M13 6l6 6-6 6"/>
</svg>
</span>
</span>
<span class="intro-sitemap__desc">A short primer on the basics of genetics — the alphabet Carbon reads.</span>
</a>
</li>
<li class="intro-sitemap__step">
<a class="intro-sitemap__link" href="#dna-lab">
<span class="intro-sitemap__icon" aria-hidden="true">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.2" stroke-linecap="round" stroke-linejoin="round">
<path d="M8 3c0 4 8 4.5 8 9s-8 5-8 9"/>
<path d="M16 3c0 4-8 4.5-8 9s8 5 8 9"/>
<path d="M9 5h6"/>
<path d="M10 7.5h4"/>
<path d="M8.5 10.5h7"/>
<path d="M8.5 13.5h7"/>
<path d="M10 16.5h4"/>
<path d="M9 19h6"/>
</svg>
</span>
<span class="intro-sitemap__label">
<span class="intro-sitemap__title">DNA Lab</span>
<span class="intro-sitemap__arrow" aria-hidden="true">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
<path d="M5 12h14"/>
<path d="M13 6l6 6-6 6"/>
</svg>
</span>
</span>
<span class="intro-sitemap__desc">Live interactions with the 3B checkpoint: explore what the model can do.</span>
</a>
</li>
<li class="intro-sitemap__step">
<a class="intro-sitemap__link" href="#recipe">
<span class="intro-sitemap__icon" aria-hidden="true">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.2" stroke-linecap="round" stroke-linejoin="round">
<path d="M9 3h6"/>
<path d="M10 3v6.4L4.7 18.5a1.5 1.5 0 0 0 1.3 2.3h12a1.5 1.5 0 0 0 1.3-2.3L14 9.4V3"/>
<path d="M7.3 14h9.4"/>
<circle cx="10" cy="17" r="0.9" fill="currentColor" stroke="none"/>
<circle cx="13.6" cy="17.8" r="0.9" fill="currentColor" stroke="none"/>
</svg>
</span>
<span class="intro-sitemap__label">
<span class="intro-sitemap__title">Carbon Recipe</span>
<span class="intro-sitemap__arrow" aria-hidden="true">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
<path d="M5 12h14"/>
<path d="M13 6l6 6-6 6"/>
</svg>
</span>
</span>
<span class="intro-sitemap__desc">How Carbon was trained: tokenizer, loss, dataset, and results.</span>
</a>
</li>
<li class="intro-sitemap__step">
<a class="intro-sitemap__link" href="#sandbox">
<span class="intro-sitemap__icon" aria-hidden="true">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.2" stroke-linecap="round" stroke-linejoin="round">
<rect x="3" y="4.5" width="18" height="15" rx="2"/>
<path d="M7 10l3 2-3 2"/>
<path d="M13 14h4"/>
</svg>
</span>
<span class="intro-sitemap__label">
<span class="intro-sitemap__title">Sandbox</span>
<span class="intro-sitemap__arrow" aria-hidden="true">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
<path d="M5 12h14"/>
<path d="M13 6l6 6-6 6"/>
</svg>
</span>
</span>
<span class="intro-sitemap__desc">Run Carbon on your own DNA sequences, end-to-end.</span>
</a>
</li>
</ol>
</div>
</nav>
<div class="container wide">
<!-- Optional bio primer below. Subsections are §1-§5 within this tab.
id="primer" so the "Intro → continue reading…" link in the
site map above can scroll-anchor here via tabs.js's SECTION_TO_TAB
routing. -->
<div class="intro-primer-heading" id="primer">
<div class="section-num">Background</div>
<h2>What Carbon reads</h2>
<!-- Standfirst: editorial lede framed by a green left-rule (same motif
as .takeaway in layout.css). The four bases are surfaced as mono
tokens using the conventional sequence-viewer palette shared with
tokenizer.js (A green / C blue / G amber / T pink) so the primer
opens with the same visual vocabulary used throughout the demo.
The final sentence is isolated as a kicker — it carries the thesis
of the whole tab ("what they mean is what it has to learn") and
deserves its own visual beat. -->
<div class="intro-primer-lede">
<p>
<!-- data-letter (not data-base): intro.js auto-injects the
skeletal-formula molecule SVGs into every [data-base]
element it finds inside the intro root. We just want
coloured mono glyphs here, not the full molecule
diagrams that live in the §1 demo card below. -->
The model is fed long strings of four letters:
<span class="intro-base" data-letter="A">A</span>,
<span class="intro-base" data-letter="C">C</span>,
<span class="intro-base" data-letter="G">G</span>,
<span class="intro-base" data-letter="T">T</span>.
Those letters are the bases of <em>DNA</em>. Stretches of it are <em>genes</em>,
which cells copy into <em>RNA</em> and translate into <em>proteins</em>.
A century of molecular biology has been spent working out how.
Carbon is given only the letters.
</p>
<p class="intro-primer-lede__kicker">
What they mean is what it has to learn.
</p>
</div>
</div>
<!-- §0.1 · BASES -->
<div class="section--two-col intro-subsection">
<div class="section-narrative">
<div class="section-num">§1 · Bases</div>
<div class="section-title">A four-letter alphabet</div>
<p class="lede">
DNA is written in <em>four small molecules</em>: adenine, cytosine, guanine, thymine.
Two are purines (A and G, twin-ring), two are pyrimidines (C and T, single-ring).
Everything that follows is built from these four.
</p>
</div>
<div class="section-body">
<div class="demo">
<div class="cd-mols">
<div class="cd-mol-wrap"><div class="cd-mol-svg" data-base="A"></div><div class="cd-mol-label"><b>A</b> adenine</div></div>
<div class="cd-mol-wrap"><div class="cd-mol-svg" data-base="C"></div><div class="cd-mol-label"><b>C</b> cytosine</div></div>
<div class="cd-mol-wrap"><div class="cd-mol-svg" data-base="G"></div><div class="cd-mol-label"><b>G</b> guanine</div></div>
<div class="cd-mol-wrap"><div class="cd-mol-svg" data-base="T"></div><div class="cd-mol-label"><b>T</b> thymine</div></div>
</div>
</div>
</div>
</div>
<!-- §0.2 · DNA -->
<div class="section--two-col intro-subsection">
<div class="section-narrative">
<div class="section-num">§2 · DNA</div>
<div class="section-title">The double helix</div>
<p class="lede">
Each base hangs off a sugar-phosphate backbone. Two backbones run anti-parallel and
<em>twist</em> into a double helix. The bases on opposite strands pair by chemistry:
<em>A always with T, G always with C</em>, so one strand fully determines the other.
A human genome is about <em>3 billion</em> base pairs of this.
</p>
</div>
<div class="section-body">
<div class="demo">
<div class="cd-helix-wrap" data-helix></div>
<!-- Pairing legend: two big A=T / G≡C tiles with an H-bond
sub-label that turns the visual difference between =
and ≡ into the actual chemistry (2 vs 3 hydrogen bonds).
Caption sits below the pair row, centred. -->
<div class="cd-helix-rules">
<div class="cd-helix-rules-pairs">
<div class="cd-pair">
<div class="cd-pair-formula"><span class="cd-pair-letter">A</span><span class="cd-pair-bond">═</span><span class="cd-pair-letter">T</span></div>
<div class="cd-pair-meta">2 H bonds</div>
</div>
<div class="cd-pair">
<div class="cd-pair-formula"><span class="cd-pair-letter">G</span><span class="cd-pair-bond">≡</span><span class="cd-pair-letter">C</span></div>
<div class="cd-pair-meta">3 H bonds</div>
</div>
</div>
<div class="cd-pair-caption">complementary base pairing</div>
</div>
</div>
</div>
</div>
<!-- §0.3 · GENE -->
<div class="section--two-col intro-subsection">
<div class="section-narrative">
<div class="section-num">§3 · Gene</div>
<div class="section-title">Promoter, exons, introns</div>
<p class="lede">
A gene is a stretch of DNA that the cell turns into protein. Most of the genome is
not. Each gene begins with a <em>promoter</em>, where the cell starts reading. What
follows is broken into two kinds of segment: <em>exons</em>, which the cell keeps,
and <em>introns</em>, which it splices out and often serve regulatory purposes.
</p>
</div>
<div class="section-body">
<div class="demo">
<div class="cd-gene-strip"><span class="cd-genex cd-genex--promoter"><span class="cd-genex-bar"></span><span class="cd-genex-text">TATAAA</span></span><span class="cd-genex cd-genex--exon"><span class="cd-genex-bar"></span><span class="cd-genex-text">ATGGCCGAACTG</span></span><span class="cd-genex cd-genex--intron"><span class="cd-genex-bar"></span><span class="cd-genex-text">GTAAGCATATAG</span></span><span class="cd-genex cd-genex--exon"><span class="cd-genex-bar"></span><span class="cd-genex-text">CCCGGGTGGTTC</span></span><span class="cd-genex cd-genex--intron"><span class="cd-genex-bar"></span><span class="cd-genex-text">GTACGCCATTAG</span></span><span class="cd-genex cd-genex--exon"><span class="cd-genex-bar"></span><span class="cd-genex-text">AGCCGT</span></span></div>
<div class="cd-track-labels">
<span class="cd-track-labels__title">Legend</span>
<span><span class="sw" style="background: var(--promoter)"></span>promoter</span>
<span><span class="sw" style="background: var(--green)"></span>exon</span>
<span><span class="sw" style="background: transparent; border-top: 1px solid var(--intron); height: 1px; margin-top: 4px;"></span>intron</span>
</div>
</div>
</div>
</div>
<!-- §0.4 · RNA / splicing -->
<div class="section--two-col intro-subsection">
<div class="section-narrative">
<div class="section-num">§4 · RNA</div>
<div class="section-title">Splicing into the working copy</div>
<p class="lede">
The cell copies the gene into RNA. Then it <em>splices out the introns</em> and
<em>joins the exons together</em>. What's left is the working mRNA: just the exons,
in order. (T is rewritten as U along the way: a small alphabet quirk between DNA
and RNA.)
</p>
</div>
<div class="section-body">
<div class="demo">
<div class="cd-splice">
<div class="cd-gene-strip"><span class="cd-genex cd-genex--promoter"><span class="cd-genex-bar"></span><span class="cd-genex-text">TATAAA</span></span><span class="cd-genex cd-genex--exon"><span class="cd-genex-bar"></span><span class="cd-genex-text">ATGGCCGAACTG</span></span><span class="cd-genex cd-genex--intron"><span class="cd-genex-bar"></span><span class="cd-genex-text">GTAAGCATATAG</span></span><span class="cd-genex cd-genex--exon"><span class="cd-genex-bar"></span><span class="cd-genex-text">CCCGGGTGGTTC</span></span><span class="cd-genex cd-genex--intron"><span class="cd-genex-bar"></span><span class="cd-genex-text">GTACGCCATTAG</span></span><span class="cd-genex cd-genex--exon"><span class="cd-genex-bar"></span><span class="cd-genex-text">AGCCGT</span></span></div>
<svg class="cd-splice-arrows" viewBox="0 0 60 6" aria-hidden="true">
<text x="0.5" y="2.2" font-family='"JetBrains Mono", monospace' font-size="1.4" font-weight="500" fill="#5b5b56">transcribe</text>
<g fill="none" stroke="#317f3f" stroke-width="0.2" stroke-linecap="round">
<path d="M 12 0 C 12 3, 21 3, 21 5"/>
<path d="M 36 0 C 36 3, 33 3, 33 5"/>
<path d="M 57 0 C 57 3, 42 3, 42 5"/>
</g>
<g fill="#317f3f">
<polygon points="20.3,5 21.7,5 21,6"/>
<polygon points="32.3,5 33.7,5 33,6"/>
<polygon points="41.3,5 42.7,5 42,6"/>
</g>
</svg>
<div class="cd-gene-strip cd-mrna-strip"><span class="cd-genex cd-genex--exon"><span class="cd-genex-bar"></span><span class="cd-genex-text">AUGGCCGAACUG</span></span><span class="cd-genex cd-genex--exon"><span class="cd-genex-bar"></span><span class="cd-genex-text">CCCGGGUGGUUC</span></span><span class="cd-genex cd-genex--exon"><span class="cd-genex-bar"></span><span class="cd-genex-text">AGCCGU</span></span></div>
</div>
<div class="cd-track-labels">
<span class="cd-track-labels__title">Legend</span>
<span><span class="sw" style="background: var(--promoter)"></span>promoter</span>
<span><span class="sw" style="background: var(--green)"></span>exon</span>
<span><span class="sw" style="background: transparent; border-top: 1px solid var(--intron); height: 1px; margin-top: 4px;"></span>intron</span>
</div>
</div>
</div>
</div>
<!-- §0.5 · PROTEIN -->
<div class="section--two-col intro-subsection">
<div class="section-narrative">
<div class="section-num">§5 · Protein</div>
<div class="section-title">From chain to function</div>
<p class="lede">
Every three RNA letters (a <em>codon</em>) encode one <em>amino acid</em>. There are only
<em>20</em> amino acids in the standard alphabet; every protein in nature is built from
this same set. The chain then folds into a 3D shape, and that shape <em>is</em> the
function: hemoglobin · insulin · collagen · antibodies · enzymes.
</p>
</div>
<div class="section-body">
<div class="demo">
<div class="cd-translate">
<span class="cd-trow-label">mRNA</span>
<span class="cd-tcodon">AUG</span><span class="cd-tcodon">GCC</span><span class="cd-tcodon">GAA</span><span class="cd-tcodon">CUG</span><span class="cd-tcodon">CCC</span><span class="cd-tcodon">GGG</span><span class="cd-tcodon">UGG</span><span class="cd-tcodon">UUC</span><span class="cd-tcodon">AGC</span><span class="cd-tcodon">CGU</span>
<span></span>
<span class="cd-tarrow">↓</span><span class="cd-tarrow">↓</span><span class="cd-tarrow">↓</span><span class="cd-tarrow">↓</span><span class="cd-tarrow">↓</span><span class="cd-tarrow">↓</span><span class="cd-tarrow">↓</span><span class="cd-tarrow">↓</span><span class="cd-tarrow">↓</span><span class="cd-tarrow">↓</span>
<span class="cd-trow-label">amino acids</span>
<span class="cd-taa">M</span><span class="cd-taa">A</span><span class="cd-taa">E</span><span class="cd-taa">L</span><span class="cd-taa">P</span><span class="cd-taa">G</span><span class="cd-taa">W</span><span class="cd-taa">F</span><span class="cd-taa">S</span><span class="cd-taa">R</span>
<span></span>
<span class="cd-tname">Met</span><span class="cd-tname">Ala</span><span class="cd-tname">Glu</span><span class="cd-tname">Leu</span><span class="cd-tname">Pro</span><span class="cd-tname">Gly</span><span class="cd-tname">Trp</span><span class="cd-tname">Phe</span><span class="cd-tname">Ser</span><span class="cd-tname">Arg</span>
</div>
<div class="cd-fold-arrow">
<div class="cd-fold-arrow-icon">↓</div>
<div class="cd-fold-arrow-label">fold</div>
</div>
<div class="cd-protein-3d" id="cd-protein-3d">
<div class="cd-protein-3d-loading">loading hemoglobin…</div>
</div>
<div class="cd-protein-caption">
<div class="cd-protein-caption__title">Human hemoglobin</div>
<div class="cd-protein-caption__desc">the molecule that carries oxygen in your blood</div>
<div class="cd-protein-caption__meta">4 chains · PDB <a href="https://www.rcsb.org/structure/1A3N" target="_blank" rel="noopener">1A3N</a></div>
</div>
</div>
</div>
</div>
<!-- §0.6 · APPLICATIONS -->
<div class="section--two-col intro-subsection">
<div class="section-narrative">
<div class="section-num">§6 · Applications</div>
<div class="section-title">What can the model do in the real world?</div>
<p class="lede">
A model that understands and writes DNA is useful wherever DNA is the
input or the output. This can be used for a variety of tasks, such as
tuning the genetics of the food we grow, designing the regulatory and
coding sequences that drive biomanufacturing, and helping interpret
the variants that show up in clinical sequencing.
</p>
</div>
<div class="section-body">
<div class="demo" style="display:grid;gap:14px;padding:18px">
<div style="padding:14px 16px;background:#fafaf6;border:1px solid #eee;border-radius:3px">
<div style="font-family:'JetBrains Mono',monospace;font-size:10px;letter-spacing:1.4px;text-transform:uppercase;color:#6b7a6e;margin-bottom:6px">Biotechnology · precision breeding</div>
<div style="font-weight:600;font-size:14px;margin-bottom:8px;color:#1f1f1d">Crops and livestock</div>
<p style="margin:0;font-size:13px;line-height:1.6;color:#3a3a3a">
Map genotype to phenotype across crops and livestock: surface the
variants that drive yield, quality, disease and pest resistance,
and tolerance to drought, heat, cold, or salinity, so breeders
can select for them directly.
</p>
</div>
<div style="padding:14px 16px;background:#fafaf6;border:1px solid #eee;border-radius:3px">
<div style="font-family:'JetBrains Mono',monospace;font-size:10px;letter-spacing:1.4px;text-transform:uppercase;color:#6b7a6e;margin-bottom:6px">Synthetic biology · biomanufacturing</div>
<div style="font-weight:600;font-size:14px;margin-bottom:8px;color:#1f1f1d">Designing what cells express, and how</div>
<p style="margin:0;font-size:13px;line-height:1.6;color:#3a3a3a">
Design and tune promoters, enhancers, UTRs, and terminators to
control expression strength, tissue specificity, timing, and
inducibility. The same machinery powers codon optimization and
host-specific engineering, letting microbial strains turn out
enzymes, chemicals, fuels, antibiotics, and natural products
more efficiently.
</p>
</div>
<div style="padding:14px 16px;background:#fafaf6;border:1px solid #eee;border-radius:3px">
<div style="font-family:'JetBrains Mono',monospace;font-size:10px;letter-spacing:1.4px;text-transform:uppercase;color:#6b7a6e;margin-bottom:6px">Biomedicine · diagnosis and personalized medicine</div>
<div style="font-weight:600;font-size:14px;margin-bottom:8px;color:#1f1f1d">Triaging variants, designing therapies</div>
<p style="margin:0;font-size:13px;line-height:1.6;color:#3a3a3a">
Help prioritize the variants of uncertain significance that crowd
clinical sequencing in rare disease and cancer, where it's often
unclear whether a DNA change is actually driving the phenotype.
Further out, support patient-tailored therapeutic design: mRNA
vaccines, therapeutic proteins, enzymes, and antimicrobial
peptides, with expression efficiency, stability, and
manufacturability in the loop.
</p>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="tab-panel" id="panel-dna-lab" data-tab="dna-lab">
<div class="tab-lede">
<div class="tab-lede__rail">
<span class="tab-lede__eyebrow">Intro</span>
<p>
<strong>Carbon-3B</strong> is a 3-billion-parameter language model for DNA. It is trained on
roughly 1 trillion tokens (6 trillion base pairs) of genomic sequence with a simple
objective: given some DNA, predict what comes next (six bases at a time, autoregressively).
Even though the objective is simple the resulting model is versatile. In the DNA lab you can
explore all the cool things we can do with a DNA model.
</p>
<p class="tab-lede__note">
Carbon-3B was trained unsupervised besides some simple tags for species and gene biotypes.
It wasn't trained to tell which mutations are pathogenic or how genes differ between species.
The sections below highlight what it picked up
anyway: autocomplete a gene <a class="lede-chip" href="#completion">§1</a>, see
structure emerge in its confidence <a class="lede-chip" href="#track">§2</a>, score
a disease variant against a healthy one <a class="lede-chip" href="#vep">§3</a>,
recognise a gene's species of origin <a class="lede-chip" href="#species">§4</a>,
and then push further into folded protein structure
<a class="lede-chip" href="#folding">§5</a>, the embedding manifold
<a class="lede-chip" href="#umap">§6</a>, and the species tree
<a class="lede-chip" href="#speciesTree">§7</a>. Each demo runs against the public
<code>HuggingFaceBio/Carbon-3B</code> checkpoint behind a live inference endpoint.
</p>
</div>
</div>
<div class="container wide">
<!-- ============================================================ -->
<!-- §1 · GENE COMPLETION + ANNOTATION OVERLAY -->
<!-- ============================================================ -->
<section id="completion" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§1 · Autocomplete</div>
<div class="section-title">Autocomplete for the genome</div>
<p class="lede">
Same idea as GPT completing a sentence, but for DNA. We feed the model a DNA sequence
as input and the model produces an output sequence. The model streams the bases one
6-base token at a time. The model is better at predicting sequences of a gene's exons
because they are the protein-coding parts of a gene and are under strong evolutionary
constraint. As such they should be the most predictable stretches of DNA. The introns
serve regulatory purposes on the other hand and are harder to predict. We overlay the
<em>real</em> exon/intron annotations on top of the output so you can compare what
Carbon produces to what's actually there.
</p>
</div>
<div class="section-body">
<div class="demo" id="demo1">
<div class="demo-toolbar">
<span>gene</span>
<span id="d1-pills" class="pills"></span>
<span class="spacer"></span>
<!-- Status sits BEFORE the buttons so that when its text width changes
(idle → generating… → done · 432 bp), the slack is absorbed by the
flex spacer to its left rather than shifting the buttons leftward
on every state transition. Buttons stay pinned to the right edge. -->
<span class="status is-hidden" id="d1-status"><span class="dot"></span><span></span></span>
<button id="d1-go" class="action primary">▶ generate</button>
<button id="d1-stop" class="action" disabled>stop</button>
</div>
<div class="gene-info" id="d1-info">loading genes…</div>
<svg class="gene-track draggable" id="d1-track" viewBox="0 0 1000 52" preserveAspectRatio="none"></svg>
<div class="track-axis-label" style="justify-content:flex-end;gap:20px;align-items:center">
<span class="legend-tip"
data-tip="Exon: coding segment of the gene. Stays in the mature mRNA and gets translated into protein."
style="display:inline-flex;align-items:center;gap:6px">
<svg width="44" height="12" viewBox="0 0 44 12" style="overflow:visible">
<line x1="0" y1="6" x2="14" y2="6" stroke="#aaa" stroke-width="1"/>
<rect x="14" y="0" width="16" height="12" fill="#317f3f"/>
<line x1="30" y1="6" x2="44" y2="6" stroke="#aaa" stroke-width="1"/>
</svg>
exon
</span>
<span class="legend-tip"
data-tip="Intron: non-coding stretch between exons. Spliced out of the pre-mRNA before translation."
style="display:inline-flex;align-items:center;gap:6px">
<svg width="44" height="12" viewBox="0 0 44 12" style="overflow:visible">
<rect x="0" y="0" width="6" height="12" fill="#317f3f"/>
<line x1="6" y1="6" x2="38" y2="6" stroke="#aaa" stroke-width="1"/>
<rect x="38" y="0" width="6" height="12" fill="#317f3f"/>
</svg>
intron
</span>
<span class="legend-tip"
data-tip="Drag the dark ▼ and ▲ markers to set the DNA window fed to the model (the prompt). Drag the green ▼ marker to set where generation stops. The model fills in the green region."
style="display:inline-flex;align-items:center;gap:6px">
<svg width="100" height="20" viewBox="0 0 100 20" style="overflow:visible">
<!-- prompt-region (faint dark) between start and end -->
<rect x="10" y="4" width="30" height="12" fill="#1f1f1d" opacity="0.06"/>
<!-- gen-region (muted green) between end and gen-end -->
<rect x="40" y="4" width="50" height="12" fill="#317f3f" opacity="0.15"/>
<!-- start handle: ▼ on top, line through body -->
<line x1="10" y1="4" x2="10" y2="16" stroke="#1f1f1d" stroke-width="1.5"/>
<polygon points="7,0 13,0 10,4" fill="#1f1f1d"/>
<!-- end handle: ▲ on bottom, line through body -->
<line x1="40" y1="4" x2="40" y2="16" stroke="#1f1f1d" stroke-width="1.5"/>
<polygon points="40,16 37,20 43,20" fill="#1f1f1d"/>
<!-- gen-end handle: ▼ on top, GREEN, line through body -->
<line x1="90" y1="4" x2="90" y2="16" stroke="#317f3f" stroke-width="1.5"/>
<polygon points="87,0 93,0 90,4" fill="#317f3f"/>
</svg>
prompt → generated
</span>
</div>
<div class="seq-block" id="d1-seq">pick a gene and hit generate</div>
<div class="seq-label">model output · <span style="color:#aaa">prompt in gray</span> · <span>generated colored by logprob (red = uncertain)</span> · <span><span style="color:#317f3f;font-weight:600">_</span> match</span> · <span><span style="color:#b00020;font-weight:600">_</span> mismatch</span></div>
<div class="stat-row" id="d1-stats">
<div class="stat-pair"><span class="stat-pair-label">identity</span><span class="stat-pair-val muted" id="d1-id">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">in-exon</span><span class="stat-pair-val muted" id="d1-id-exon">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">in-intron</span><span class="stat-pair-val muted" id="d1-id-intron">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">tokens</span><span class="stat-pair-val muted" id="d1-tok">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">mean logprob</span><span class="stat-pair-val muted" id="d1-lp">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">perplexity</span><span class="stat-pair-val muted" id="d1-ppl">·</span></div>
</div>
</div>
<div class="takeaway">
<p>
<strong>Try it</strong>
Drag the dark ▼ ▲ markers to slide the prompt window and the green ▼ to set
where generation stops, then hit ▶ generate. Land the green-shaded region
inside an exon (dark green block) and note the count of green-underlined matches;
repeat with a similar-length window over an intron and compare.
</p>
<p>
<strong>What to look for</strong>
Exons are under selection pressure, so getting them right takes real biological
understanding, not just DNA statistics. Boundaries between high- and low-confidence
stretches in Carbon's output also tend to fall near real exon/intron edges, even
though the model has never seen a single annotation.
</p>
</div>
<details class="code-snippet">
<summary>Run this from code</summary>
<div class="code-snippet__body">
<div class="code-snippet__tabs">
<button class="code-snippet__tab active" data-tab="endpoint" type="button">API</button>
<button class="code-snippet__tab" data-tab="local" type="button">transformers</button>
</div>
<button class="code-snippet__copy" type="button">Copy</button>
<div class="code-snippet__panel active" data-tab="endpoint"><pre><code>from huggingface_hub import get_token
from openai import OpenAI
# Carbon-3B can be served behind any OpenAI-compatible API (vLLM, TGI, an
# HF inference endpoint, etc.). Point base_url at your deployment.
client = OpenAI(
base_url="https://<your-endpoint>/v1/",
api_key=get_token(),
)
# First ~60 bp of HBB. Replace with whatever gene opening you want.
prompt = "<dna>AGCCCTCCAGGACAGGCTGCATCAGAAGAGGCCATCAAGCAGGTCTGTTCCAAGGGCCTT"
r = client.completions.create(
model="HuggingFaceBio/Carbon-3B",
prompt=prompt,
max_tokens=10, # 10 6-mer tokens ~= 60 bp of continuation
temperature=0.5, top_p=0.9,
)
print(r.choices[0].text)</code></pre></div>
<div class="code-snippet__panel" data-tab="local"><pre><code>from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
tok = AutoTokenizer.from_pretrained(
"HuggingFaceBio/Carbon-3B", trust_remote_code=True,
)
model = AutoModelForCausalLM.from_pretrained(
"HuggingFaceBio/Carbon-3B",
trust_remote_code=True,
dtype=torch.bfloat16,
).to("cuda").eval()
prompt = "<dna>AGCCCTCCAGGACAGGCTGCATCAGAAGAGGCCATCAAGCAGGTCTGTTCCAAGGGCCTT"
inputs = tok(prompt, return_tensors="pt", add_special_tokens=False).to("cuda")
with torch.inference_mode():
out = model.generate(
**inputs,
max_new_tokens=10, # ~60 bp at 6 bp / token
temperature=0.5, top_p=0.9, do_sample=True,
)
# Slice off the prompt so we just print the continuation.
new_ids = out[0, inputs["input_ids"].shape[1]:]
print(tok.decode(new_ids))</code></pre></div>
</div>
</details>
</div>
</section>
<!-- ============================================================ -->
<!-- §2 · LIKELIHOOD TRACK ACROSS A REAL GENE -->
<!-- ============================================================ -->
<section id="track" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§2 · Structure</div>
<div class="section-title">Recognizing gene structure</div>
<p class="lede">
The Carbon model assigns every 6-base chunk a log-probability under the surrounding
context: how "expected" or "likely" that stretch of DNA is. The plot with the scores
along a real gene shows the curve dips and rises. We overlay the exon/intron annotation
on top: confidence reliably climbs in protein-coding regions and falls in repetitive or
unconstrained intronic stretches, even though the model never saw a single label. The
same score, summed up, is what powers the variant-effect call in §3 below.
</p>
</div>
<div class="section-body">
<div class="demo" id="demo3">
<!-- Likelihood tracks are precomputed (each gene ships with its
token logprobs in data/genes.json), so this toolbar is just
the gene selector, selecting a pill renders the track from
cache instantly, no live /score call needed. -->
<div class="demo-toolbar">
<span>gene</span>
<span id="d3-pills" class="pills"></span>
</div>
<div class="gene-info" id="d3-info">loading genes…</div>
<svg class="gene-track" id="d3-track" viewBox="0 0 1000 40" preserveAspectRatio="none"></svg>
<svg id="d3-chart" style="display:block;width:100%;height:140px;background:#fff;border:1px solid #eee;margin-top:6px" preserveAspectRatio="none" viewBox="0 0 1000 140"></svg>
<div class="track-axis-label" style="padding-top:8px">
<span><span class="legend-swatch" style="background:#317f3f"></span>exon (shaded)</span>
<span style="color:#aaa">y-axis: log P per 6-bp token (higher = more confident)</span>
<span id="d3-bp-label" style="color:#888">0 bp</span>
</div>
<div class="stat-row" id="d3-stats">
<div class="stat-pair"><span class="stat-pair-label">mean (exon)</span><span class="stat-pair-val muted" id="d3-mean-exon">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">mean (intron)</span><span class="stat-pair-val muted" id="d3-mean-intron">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">Δ (exon − intron)</span><span class="stat-pair-val muted" id="d3-delta">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">tokens</span><span class="stat-pair-val muted" id="d3-tokens">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">mean (overall)</span><span class="stat-pair-val muted" id="d3-mean">·</span></div>
</div>
</div>
<div class="takeaway">
<p>
<strong>Try it</strong>
Pick a gene and watch its per-token confidence curve. Each gene's exons are
highlighted in green; the curve underneath is Carbon's log-probability for each 6-base
token along the sequence.
</p>
<p>
<strong>What to look for</strong>
Exons, especially the protein-coding portions, tend to score noticeably higher than
introns because they're evolutionarily conserved and full of constrained patterns the
model has learned to predict. The Δ tells you how strongly Carbon "noticed" the
difference for this gene. Keep this curve in mind for §3: a variant that flips a base
inside a high-confidence exon stretch is the kind of edit that should make Carbon
surprised.
</p>
</div>
<details class="code-snippet">
<summary>Run this from code</summary>
<div class="code-snippet__body">
<div class="code-snippet__tabs">
<button class="code-snippet__tab active" data-tab="endpoint" type="button">API</button>
<button class="code-snippet__tab" data-tab="local" type="button">transformers</button>
</div>
<button class="code-snippet__copy" type="button">Copy</button>
<div class="code-snippet__panel active" data-tab="endpoint"><pre><code>from huggingface_hub import get_token
from openai import OpenAI
client = OpenAI(
base_url="https://<your-endpoint>/v1/",
api_key=get_token(),
)
# Echoed scoring: forward-pass the prompt and return per-token logprobs
# (no generation). The score per 6-mer chunk is what the per-base
# confidence track is built from.
prompt = "<dna>" + gene_sequence # full gene, up to ~32k tokens
r = client.completions.create(
model="HuggingFaceBio/Carbon-3B",
prompt=prompt,
max_tokens=0, echo=True, logprobs=1, temperature=0,
)
for tok, lp in zip(r.choices[0].logprobs.tokens,
r.choices[0].logprobs.token_logprobs):
print(f"{tok}\t{lp}")</code></pre></div>
<div class="code-snippet__panel" data-tab="local"><pre><code>from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import torch.nn.functional as F
tok = AutoTokenizer.from_pretrained("HuggingFaceBio/Carbon-3B", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"HuggingFaceBio/Carbon-3B",
trust_remote_code=True,
dtype=torch.bfloat16,
).to("cuda").eval()
ids = tok("<dna>" + gene_sequence, return_tensors="pt",
add_special_tokens=False).input_ids.to("cuda")
with torch.inference_mode():
logits = model(ids).logits
# Per-token log-prob of the actual next token (the standard "echo" pattern).
logp = F.log_softmax(logits.float(), dim=-1)[:, :-1, :]
per_tok_lp = logp.gather(2, ids[:, 1:].unsqueeze(-1)).squeeze(-1)[0]
for t, lp in zip(tok.convert_ids_to_tokens(ids[0, 1:].tolist()),
per_tok_lp.tolist()):
print(f"{t}\t{lp:.3f}")</code></pre></div>
</div>
</details>
</div>
</section>
<!-- ============================================================ -->
<!-- §3 · VEP: original vs mutation likelihood -->
<!-- ============================================================ -->
<section id="vep" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§3 · Variant effect</div>
<div class="section-title">Predicting mutation effects</div>
<p class="lede">
§2 showed that Carbon's per-base confidence rises and falls in step with gene structure.
Now we use the same log-likelihood, but as a measure for individual mutations. For a
real ClinVar variant we score a ~4 kb window of human DNA two ways: once with the
original base, once with the mutation. Then we check which version looks more like
real, functioning human sequence. Carbon was never trained on what "pathogenic" means;
it just learned what natural DNA looks like. Variants that disrupt protein-coding or
regulatory function show up as less likely sequence under the model's distribution.
</p>
</div>
<div class="section-body">
<div class="demo" id="demo2">
<div class="demo-toolbar">
<span>variant</span>
<span id="d2-pills" class="pills"></span>
</div>
<div class="vep-gene-box" id="d2-gene-box">loading variants…</div>
<div class="vep-window">
<!-- Status pill: hidden by default, surfaces when an edit triggers
a live rescore (or on the initial auto-score for a variant that
isn't yet in the precomputed cache). Lives outside the content
div below so it survives the innerHTML rebuilds in vep.js. -->
<span class="status is-hidden" id="d2-status"><span class="dot"></span><span></span></span>
<div id="d2-window"></div>
</div>
<svg id="d2-bars" style="display:block;width:100%;height:auto;background:#fff;border:1px solid #eee;margin-top:12px" preserveAspectRatio="xMinYMin meet"></svg>
</div>
<div class="takeaway">
<p>
<strong>Try it</strong>
Pick a known variant from the pills, then click any base in the mutation row to
introduce a different change. The model re-scores on every edit.
</p>
<p>
<strong>What to look for</strong>
Read each row two ways: the <em>dot color</em> is what ClinVar says (red = pathogenic,
orange = risk, green = benign); the <em>bar direction</em> is what Carbon says (red bar
pointing left = mutation less likely than original; charcoal bar pointing right =
mutation looks fine or more likely). Watch the two VHL rows for the cleanest
demonstration: a premature stop codon (c.475A>T) swings the bar hundreds of nats to
the left, while a common 3' UTR variant (c.*820A>G) in the very same gene sits at
zero. Same model, same window length, opposite verdicts. Carbon learned the
distinction from raw sequence alone, with no labels.
</p>
</div>
<details class="code-snippet">
<summary>Run this from code</summary>
<div class="code-snippet__body">
<div class="code-snippet__tabs">
<button class="code-snippet__tab active" data-tab="endpoint" type="button">API</button>
<button class="code-snippet__tab" data-tab="local" type="button">transformers</button>
</div>
<button class="code-snippet__copy" type="button">Copy</button>
<div class="code-snippet__panel active" data-tab="endpoint"><pre><code>from huggingface_hub import get_token
from openai import OpenAI
client = OpenAI(
base_url="https://<your-endpoint>/v1/",
api_key=get_token(),
)
def score_sum(seq):
"""Sum of per-token log-probs for the given DNA sequence."""
r = client.completions.create(
model="HuggingFaceBio/Carbon-3B",
prompt="<dna>" + seq,
max_tokens=0, echo=True, logprobs=1, temperature=0,
)
return sum(lp for lp in r.choices[0].logprobs.token_logprobs if lp is not None)
# Score the same ~4 kb window two ways: original vs the one-base mutation.
delta = score_sum(var_seq) - score_sum(ref_seq)
print(f"delta = {delta:+.2f} (less likely if negative)")</code></pre></div>
<div class="code-snippet__panel" data-tab="local"><pre><code>from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import torch.nn.functional as F
tok = AutoTokenizer.from_pretrained("HuggingFaceBio/Carbon-3B", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"HuggingFaceBio/Carbon-3B",
trust_remote_code=True,
dtype=torch.bfloat16,
).to("cuda").eval()
def score_sum(seq):
ids = tok("<dna>" + seq, return_tensors="pt",
add_special_tokens=False).input_ids.to("cuda")
with torch.inference_mode():
logits = model(ids).logits
logp = F.log_softmax(logits.float(), dim=-1)[:, :-1, :]
return logp.gather(2, ids[:, 1:].unsqueeze(-1)).sum().item()
delta = score_sum(var_seq) - score_sum(ref_seq)
print(f"delta = {delta:+.2f} (less likely if negative)")</code></pre></div>
</div>
</details>
</div>
</section>
<!-- ============================================================ -->
<!-- §4 · SAME GENE, DIFFERENT SPECIES -->
<!-- ============================================================ -->
<section id="species" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§4 · Species</div>
<div class="section-title">Species specific generation</div>
<p class="lede">
The same gene (insulin, p53) exists in humans, mouse and chicken, but the surrounding
sequence has accumulated different mutations along each lineage for hundreds of millions
of years. For each species we feed Carbon up to ~400 bp and ask it to continue. Each
continuation should match that species' real DNA better than another species' would.
The model handles closely-related species well (mouse, chicken, even though they're
~300 My from human); the further you go back in evolutionary time, the more the
surrounding sequence drifts and the harder this setup becomes.
</p>
</div>
<div class="section-body">
<div class="demo" id="demo4">
<div class="demo-toolbar">
<span>gene</span>
<span id="d4-pills" class="pills"></span>
<span>prefix</span>
<span id="d4-prefix-pills" class="pills">
<button class="pill" data-prefix="200">200</button>
<button class="pill active" data-prefix="400">400</button>
<button class="pill" data-prefix="600">600</button>
</span>
<span>generate</span>
<span id="d4-gen-pills" class="pills">
<button class="pill active" data-gen="60">60</button>
<button class="pill" data-gen="200">200</button>
</span>
<span class="spacer"></span>
<button id="d4-go" class="action primary">▶ run all</button>
<span class="status is-hidden" id="d4-status"><span class="dot"></span><span></span></span>
</div>
<div class="gene-info" id="d4-info">loading species…</div>
<div id="d4-rows"></div>
<div class="track-axis-label" style="margin-top:14px">
<span style="color:#aaa">prompt in gray</span>
<span style="color:#1f1f1d">generated colored by logprob</span>
<span style="color:#b00020">mismatches in reference highlighted</span>
</div>
</div>
<div class="takeaway">
<p>
<strong>Try it</strong>
Pick a gene shared across species, set the prefix length, then hit <kbd>run all</kbd>
to score every species in parallel. Try the same gene at prefix 200 vs 400 and watch
the per-species identity respond.
</p>
<p>
<strong>What to look for</strong>
With 400 bp of context the model usually recognises which species' DNA it's been
given and continues in that species' style; identity to that species' reference often
runs 65–90% on the next 60 bp. Cut the prefix to 200 and the signal collapses to
near-random: a few hundred bases is what it takes to "lock in" on a lineage.
The gap between mouse and chicken is where you can read the evolutionary signal: 300+
My since the last common ancestor is enough drift that a 400 bp prefix still locks
Carbon in, but the per-base identity sits a notch below mouse.
</p>
</div>
<details class="code-snippet">
<summary>Run this from code</summary>
<div class="code-snippet__body">
<div class="code-snippet__tabs">
<button class="code-snippet__tab active" data-tab="endpoint" type="button">API</button>
<button class="code-snippet__tab" data-tab="local" type="button">transformers</button>
</div>
<button class="code-snippet__copy" type="button">Copy</button>
<div class="code-snippet__panel active" data-tab="endpoint"><pre><code>from huggingface_hub import get_token
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor
client = OpenAI(
base_url="https://<your-endpoint>/v1/",
api_key=get_token(),
)
def continue_species(species_prefix):
r = client.completions.create(
model="HuggingFaceBio/Carbon-3B",
prompt="<dna>" + species_prefix,
max_tokens=10,
temperature=0.5, top_p=0.9,
)
return r.choices[0].text
# species_prefixes = { "human": ..., "mouse": ..., "chicken": ... }
with ThreadPoolExecutor() as pool:
results = dict(zip(species_prefixes, pool.map(continue_species, species_prefixes.values())))
for name, cont in results.items():
print(f"{name:10s} {cont}")</code></pre></div>
<div class="code-snippet__panel" data-tab="local"><pre><code>from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
tok = AutoTokenizer.from_pretrained("HuggingFaceBio/Carbon-3B", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"HuggingFaceBio/Carbon-3B",
trust_remote_code=True,
dtype=torch.bfloat16,
).to("cuda").eval()
tok.padding_side = "left"
if tok.pad_token is None: tok.pad_token = tok.eos_token
# Batch all species in one forward pass via left-padding.
prompts = ["<dna>" + p for p in species_prefixes.values()]
enc = tok(prompts, return_tensors="pt", padding=True, add_special_tokens=False).to("cuda")
with torch.inference_mode():
out = model.generate(
**enc, max_new_tokens=10,
temperature=0.5, top_p=0.9, do_sample=True,
)
new_ids = out[:, enc["input_ids"].shape[1]:]
for name, ids in zip(species_prefixes, new_ids):
print(f"{name:10s} {tok.decode(ids)}")</code></pre></div>
</div>
</details>
</div>
</section>
<!-- ============================================================ -->
<!-- §5 · FOLDING (DNA → protein → 3D structure via ESMFold) -->
<!-- ============================================================ -->
<section id="folding" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§5 · Folding</div>
<div class="section-title">From DNA to proteins</div>
<p class="lede">
When Carbon completes a protein coding region in a gene, the resulting bases translate
to a protein: a protein that folds. We feed the resulting sequence into ESMFold
(similar to AlphaFold) and render the 3D structure inline, alongside the same protein
folded from the reference sequence so you can see whether Carbon's continuation
produced something similar.
</p>
</div>
<div class="section-body">
<div class="demo" id="demoFold">
<!-- Cached-only UI: live fold UI (prefix selector, ▶ fold button,
status indicator) is intentionally not rendered. The pipeline
JS (runFold/streamGenerate/postFold) and the backend /fold
endpoint are still in place, see commit history or app.py if
you want to wire interactivity back in. -->
<div class="demo-toolbar">
<span>gene</span>
<span id="dfold-pills" class="pills"></span>
</div>
<div class="gene-info" id="dfold-info">loading genes…</div>
<!-- Materialises the §5 lede's "75% prompt → 25% prediction → fold"
pipeline for the currently selected gene, so the visitor sees how
many bp Carbon was given vs how many it had to predict before any
folding happens. -->
<div class="mrna-info" id="dfold-mrna">·</div>
<div class="fold-aa-grid">
<div class="fold-aa-col">
<div class="seq-label" id="dfold-aa-label">
<span class="seq-tag carbon">carbon</span>
<span class="aa-len-tag">· aa</span>
</div>
<div class="seq-block" id="dfold-aa">click fold</div>
</div>
<div class="fold-aa-col">
<div class="seq-label" id="dfold-ref-aa-label">
<span class="seq-tag ref">reference</span>
<span class="aa-len-tag">· aa</span>
</div>
<div class="seq-block" id="dfold-ref-aa">·</div>
</div>
</div>
<div class="fold-aa-legend">
<span class="fold-aa-legend-swatch" aria-hidden="true"></span>
<span>mismatches vs reference</span>
<span class="fold-aa-legend-sep" aria-hidden="true">·</span>
<span>aligned position by position</span>
</div>
<div class="fold-grid">
<div class="fold-viewer-col">
<div class="fold-viewer-label">carbon completion</div>
<div class="fold-viewer" id="dfold-viewer-carbon">
<div class="fold-empty">no structure yet</div>
</div>
</div>
<div class="fold-viewer-col">
<div class="fold-viewer-label">reference</div>
<div class="fold-viewer" id="dfold-viewer-ref">
<div class="fold-empty">no structure yet</div>
</div>
</div>
</div>
<div class="fold-legend">
pLDDT
<span class="fold-legend-bar" aria-hidden="true"></span>
low → high · drag to rotate
</div>
<div class="stat-row" id="dfold-stats">
<div class="stat-pair"><span class="stat-pair-label">residues</span><span class="stat-pair-val muted" id="dfold-n">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">pLDDT mean (carbon)</span><span class="stat-pair-val muted" id="dfold-plddt-c">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">pLDDT mean (ref)</span><span class="stat-pair-val muted" id="dfold-plddt-r">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">identity (1D)</span><span class="stat-pair-val muted" id="dfold-id">·</span></div>
</div>
</div>
<div class="takeaway">
<strong>What to look for</strong>
A high <em>pLDDT</em> means ESMFold is confident in the predicted structure
at that residue. The interesting case is when Carbon's completion <em>diverges
at the base level</em> — sometimes drastically, like CFTR at ~22% identity —
but still folds with high confidence into a shape that mirrors the reference
backbone. That's the model reaching past memorization for the structural
grammar underneath the sequence.
</div>
</div>
</section>
<!-- ============================================================ -->
<!-- §6 · UMAP (interactive scatter) -->
<!-- ============================================================ -->
<section id="umap" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§6 · Embedding space</div>
<div class="section-title">Mapping out genomes</div>
<p class="lede">
We embed 571,810 genes from 27 species across six kingdoms (vertebrates,
invertebrates, plants, fungi, bacteria, viruses) with Carbon, project to 2D with UMAP,
color by attributes. Depending on the attribute, different kinds of organizations
emerge from the same points: the model's embedding space encodes multiple axes of
biology at once, most of which were never labeled.
</p>
</div>
<div class="section-body">
<div class="demo" id="demoUmap">
<div class="demo-toolbar">
<span>color by</span>
<span id="dumap-color-pills" class="pills">
<button class="pill active" data-color="species">species</button>
<button class="pill" data-color="biotype">biotype</button>
<button class="pill" data-color="strand">strand</button>
<button class="pill" data-color="gc">gc content</button>
<button class="pill" data-color="length">gene length</button>
</span>
<span class="spacer"></span>
<button id="dumap-reset" class="action" disabled>↺ reset view</button>
</div>
<div class="demo-toolbar umap-highlight-toolbar">
<span>highlights</span>
<span id="dumap-highlight-pills" class="pills"></span>
</div>
<p class="umap-mode-desc" id="dumap-mode-desc"></p>
<div class="umap-frame">
<canvas class="umap-canvas" id="dumap-canvas"></canvas>
<div class="umap-annotations" id="dumap-annotations"></div>
<div class="umap-tooltip" id="dumap-tooltip"></div>
<div class="umap-status-overlay" id="dumap-overlay">loading 571K points · ~5.8 MB gzipped</div>
</div>
<div class="umap-legend" id="dumap-legend"></div>
<div class="stat-row" id="dumap-stats">
<div class="stat-pair"><span class="stat-pair-label">points</span><span class="stat-pair-val muted" id="dumap-n">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">species</span><span class="stat-pair-val muted" id="dumap-nsp">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">embedding dim</span><span class="stat-pair-val muted">3072</span></div>
<div class="stat-pair"><span class="stat-pair-label">render</span><span class="stat-pair-val muted" id="dumap-fps">·</span></div>
<div class="umap-nav-hint">drag to pan · wheel to zoom · hover for details</div>
</div>
</div>
<div class="takeaway">
<strong>What to look for</strong>
Switch coloring from <em>species</em> to <em>biotype</em>: same points, completely
different organization emerges. The macro-clusters trace six kingdoms (vertebrates,
invertebrates, plants, fungi, bacteria, viruses), discovered from raw sequence alone.
Switch again to <em>gc content</em> and a perpendicular axis appears: AT-rich (cool
blue) vs GC-rich (warm amber) regions cut across the species clusters, revealing the
composition gradient the model has internalised. <em>Points: 571,810 real Carbon 3B
embeddings, projected to 2D via UMAP.</em>
</div>
</div>
</section>
<!-- ============================================================ -->
<!-- §7 · SPECIES TREE (Carbon-derived phylogeny) -->
<!-- ============================================================ -->
<section id="speciesTree" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§7 · Species tree</div>
<div class="section-title">How Carbon groups species from DNA</div>
<p class="lede">
If we take 571,789 of the sequences from §6 (excluding the two viruses, which are not
part of the tree of life) and average each species' embeddings into a single 3072-dim
vector, then cluster those 25 centroids with hierarchical clustering,
we can find species the model regards as closely related. This dendrogram is not
intended as a phylogenetic tree, instead, it asks a simpler question: whether a model
trained only on DNA sequences learns representations whose geometry reflects broad
biological structure. Carbon was never trained on what the relation between organisms
is. Yet the resulting tree groups vertebrates together, separates bacteria from fungi,
and pairs sister clades (primates with primates, rodents with rodents, monocots with
monocots).
</p>
</div>
<div class="section-body">
<div class="demo" id="demoSpeciesTree">
<div class="tree-toolbar">
<span>linkage</span>
<span id="dtree-link-pills" class="pills">
<button class="pill active" data-link="ward">ward</button>
<button class="pill" data-link="upgma">upgma</button>
</span>
<span style="margin-left: 14px;">vs ncbi</span>
<span id="dtree-scope-pills" class="pills">
<button class="pill active" data-scope="kingdom">kingdom-level</button>
<button class="pill" data-scope="sister">sister-level</button>
</span>
<span class="spacer"></span>
<div class="tree-score">
<div class="tree-score-headline">
<span class="tree-score-pct" id="dtree-score-pct">·</span>
<span class="tree-score-ratio" id="dtree-score">·</span>
</div>
<div class="tree-score-label" id="dtree-score-suffix">match · ncbi kingdom</div>
</div>
</div>
<div class="gene-info" id="dtree-info">hover a row to see its top neighbours · toggle linkage / scope above</div>
<div class="tree-frame">
<div class="tree-grid" id="dtree-grid">
<div class="tree-spine" id="dtree-spine">
<svg id="dtree-svg" xmlns="http://www.w3.org/2000/svg" preserveAspectRatio="none"></svg>
<div class="axis-label">cosine distance ←</div>
</div>
<div class="tree-rows" id="dtree-rows"></div>
</div>
<div class="tree-tooltip" id="dtree-tooltip"></div>
</div>
<div class="tree-legend">
<span class="tree-legend-item"><span class="tree-legend-swatch" style="background:#1f1f1d"></span>vertebrates</span>
<span class="tree-legend-item"><span class="tree-legend-swatch" style="background:#7a6242"></span>invertebrates</span>
<span class="tree-legend-item"><span class="tree-legend-swatch" style="background:#317f3f"></span>plants</span>
<span class="tree-legend-item"><span class="tree-legend-swatch" style="background:#a9762f"></span>fungi</span>
<span class="tree-legend-item"><span class="tree-legend-swatch" style="background:#b00020"></span>bacteria</span>
<span style="flex:1;"></span>
<span class="tree-legend-item"><span class="tree-legend-glyph" style="color:#317f3f">✓</span>nearest carbon neighbour shares the ncbi group</span>
<span class="tree-legend-item"><span class="tree-legend-glyph" style="color:#b00020">✗</span>doesn't</span>
<span class="tree-legend-item"><span class="tree-legend-glyph" style="color:#c8c5b9">·</span>solo (no ncbi sibling in the dataset)</span>
</div>
<div class="stat-row" id="dtree-stats">
<div class="stat-pair"><span class="stat-pair-label">species</span><span class="stat-pair-val" id="dtree-n">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">sequences</span><span class="stat-pair-val" id="dtree-nseq">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">embedding dim</span><span class="stat-pair-val">3072</span></div>
<div class="stat-pair"><span class="stat-pair-label">distance</span><span class="stat-pair-val">cosine</span></div>
</div>
</div>
<div class="takeaway">
<strong>What to look for</strong>
Toggle <em>kingdom-level</em> vs <em>sister-level</em>: at the kingdom scale the
embedding is strong and stable: animals cluster with animals, bacteria with
bacteria. At the sister scale (primate-with-primate, etc.) it's lower as distances
are extremely small, so the nearest neighbor can change with sampling, pooling, or
linkage choice. The model nails the broad strokes but blurs the fine branches at
this resolution. Switch <em>linkage</em> from Ward to UPGMA to see how much of the
structure is method-independent. <em>Tree built from species centroids of mean-pooled
Carbon-3B embeddings.</em>
</div>
</div>
</section>
</div>
</div> <!-- /panel-dna-lab -->
<div class="tab-panel" id="panel-recipe" data-tab="recipe">
<div class="tab-lede">
<div class="tab-lede__rail">
<span class="tab-lede__eyebrow">Intro</span>
<p>
Carbon's architecture is deliberately vanilla. What's <em>not</em> vanilla, and what
gets the headline numbers in the DNA Lab tab, is three things: a <strong>6-mer
tokenizer</strong> that lets the model see ~6× more genomic context per
forward pass, a <strong>Factorized Nucleotide Supervision (FNS)</strong> loss
that gives the model partial credit for near-miss tokens once cross-entropy
training starts to wobble, and a <strong>multi-stage curated data mixture</strong>,
biased toward functional genomic regions. Everything else (architecture, optimizer)
is standard recipe. The technical report details each choice and the ablations
behind it.
</p>
<p class="tab-lede__note">
The sections below walk through each of those choices: how the tokenizer changes
what a "token" means in DNA <a class="lede-chip" href="#tokenizer">§1</a>, how
FNS rescues training in the BF16 regime <a class="lede-chip" href="#loss">§2</a>,
how bp-level generation and scoring fall out of the same marginalization
<a class="lede-chip" href="#bpinference">§3</a>, what's in the training corpus
<a class="lede-chip" href="#data">§4</a>, what the architecture looks like
<a class="lede-chip" href="#architecture">§5</a>, how 8k-token pretraining reaches
786 kbp at inference <a class="lede-chip" href="#longcontext">§6</a>, how Carbon
stacks up against Evo2-7B and GENERator-v2 on the full training-free suite
<a class="lede-chip" href="#results">§7</a>, and why the model runs so fast
<a class="lede-chip" href="#efficiency">§8</a>.
</p>
</div>
</div>
<div class="container wide">
<!-- ============================================================ -->
<!-- §7 · TOKENIZER -->
<!-- ============================================================ -->
<section id="tokenizer" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§1 · Tokenizer</div>
<div class="section-title">Read DNA in 6-base chunks</div>
<p class="lede">
The most direct way to model DNA is one base per token. It works, but for a
<code>L</code>-base sequence Transformer attention costs <code>L²</code>, and DNA contexts
are long. Carbon instead reads in fixed 6-base blocks. Same DNA span, ⅙ the tokens, and
because attention is quadratic, up to <strong>36× cheaper</strong> at the same coverage.
BPE was a tempting middle ground, but its variable-length tokens collide badly with
autoregressive next-token prediction: DNA doesn't have stable "words."
</p>
</div>
<div class="section-body">
<div class="demo" id="demo7">
<div class="demo-toolbar">
<span>type DNA</span>
<input id="d7-input" type="text" spellcheck="false" autocapitalize="characters"
value="ATGGCCAAGCTGACCAGCGAGCTGCTGGCC"
style="font-family:'JetBrains Mono',monospace;font-size:12px;padding:6px 10px;border:1px solid #ccc;border-radius:3px;flex:1 1 auto;min-width:0;letter-spacing:1px;text-transform:uppercase">
<span class="status"><span class="dot" style="background:#317f3f"></span><span id="d7-len">30 bp</span></span>
</div>
<div id="d7-cols" style="display:grid;grid-template-columns:1fr;gap:16px;margin-top:8px">
<div>
<div class="seq-label" style="margin-top:0">1-mer · one token per base</div>
<div class="seq-block" id="d7-1mer" style="min-height:60px"></div>
</div>
<div>
<div class="seq-label" style="margin-top:0">6-mer (carbon) · one token per 6 bases</div>
<div class="seq-block" id="d7-6mer" style="min-height:60px"></div>
</div>
</div>
<!-- Stats for both tokenisers, grouped under the two sequences so the
eye can compare them in one glance. Labels are prefixed with
"1-mer" / "6-mer" since the row no longer sits directly below its
own sequence block. -->
<div class="stat-row" style="margin-top:14px;padding-top:12px">
<div class="stat-pair"><span class="stat-pair-label">1-mer tokens</span><span class="stat-pair-val" id="d7-1mer-tok">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">1-mer attention</span><span class="stat-pair-val" id="d7-1mer-att">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">1-mer vocab</span><span class="stat-pair-val">4</span></div>
<div class="stat-pair"><span class="stat-pair-label">6-mer tokens</span><span class="stat-pair-val" id="d7-6mer-tok">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">6-mer attention</span><span class="stat-pair-val" id="d7-6mer-att">·</span></div>
<div class="stat-pair"><span class="stat-pair-label">6-mer vocab</span><span class="stat-pair-val">4,096</span></div>
</div>
<svg id="d7-bars" preserveAspectRatio="xMinYMin meet" style="display:block;width:100%;background:#fff;border:1px solid #eee;margin-top:14px"></svg>
<div class="track-axis-label" style="padding-top:10px">
<span>same DNA span</span>
<span style="color:#317f3f">▼ shorter token sequence = cheaper attention</span>
<span id="d7-speedup" style="color:#317f3f;font-weight:500">36× cheaper</span>
</div>
</div>
<div class="takeaway">
<strong>Why not BPE</strong>
BPE works for English because words have stable boundaries. DNA motifs don't:
the TATA box is a <em>family</em> of patterns (<code>TATATA</code>, <code>TATATT</code>, …),
not a single string. Worse, in autoregressive mode, BPE penalizes the model for predicting
a valid <em>prefix</em> of the target token. 6-mer is a deterministic, neutral compression
that avoids this trap.
</div>
</div>
</section>
<!-- ============================================================ -->
<!-- §8 · TRAINING OBJECTIVE (CE → FNS) -->
<!-- ============================================================ -->
<section id="loss" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§2 · Training objective</div>
<div class="section-title">Partial credit for near-misses</div>
<p class="lede">
Cross-entropy treats every 6-mer token as atomic: predict <code>TATATT</code> when the
target was <code>TATATA</code>, get zero credit even though five of six bases matched.
That gets brittle late in training. Carbon switches to <strong>Factorized Nucleotide
Supervision</strong>: instead of one 4096-way classification, the model is supervised on
six parallel 4-way nucleotide marginals derived from the same logits. Near-miss tokens
get partial credit proportional to how many bases they got right.
</p>
</div>
<div class="section-body">
<div class="demo" id="demo8">
<div class="demo-toolbar">
<span>target 6-mer</span>
<span id="d8-target-pills" class="pills">
<button class="pill active" data-target="TATATA">TATATA</button>
<button class="pill" data-target="ATGGCC">ATGGCC</button>
<button class="pill" data-target="GCATCG">GCATCG</button>
</span>
</div>
<div id="d8-canvas" style="margin-top:12px"></div>
</div>
<div class="takeaway">
<strong>What the switch buys you</strong>
CE first: the model learns the joint structure of bases inside each 6-mer (codon
constraints, splice signals, motif composition). FNS later, when CE turns brittle
(the "loss staircase," and BF16 inference starts diverging from FP32), FNS smooths the
objective and restores numerical robustness without giving up the joint prior CE built.
</div>
</div>
</section>
<!-- ============================================================ -->
<!-- §8.5 · BP-LEVEL INFERENCE -->
<!-- ============================================================ -->
<section id="bpinference" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§3 · BP-level inference</div>
<div class="section-title">Bases, not 6-mers</div>
<p class="lede">
The 6-mer tokenizer makes Carbon fast, but it's coarse in both directions
of inference. When <em>generating</em>, each step advances the sequence by
6 bases at once and temperature acts on a 4,096-way distribution rather
than per nucleotide. When <em>scoring</em> an existing sequence, the raw
next-token likelihood answers "how likely is this 6-mer in context?", not
"how likely is this exact base at this exact position?", which is the
version you want for variant-effect prediction. The same marginalization
that powers FNS at training time fixes both: softmax over the 6-mer
logits, then for each position <code>p</code> sum the probabilities of
every 6-mer that shares a given base at <code>p</code>, and you recover
six per-position 4-way base distributions. To generate, sample (or argmax)
each independently and force the matching 6-mer token. To score, read
<em>P(actual base | context)</em> directly off the marginals at every
position. Same logits, same math, two endpoints.
</p>
</div>
<div class="section-body">
<div class="demo" id="demobp">
<div class="seq-label" style="margin-top:0">per-step pipeline · 4,096-way 6-mer logits → 6 × 4-way base marginals → reassembled token</div>
<div style="display:grid;gap:12px;padding:14px;background:#fff;border:1px solid #eee;font-family:'JetBrains Mono',monospace">
<div>
<div style="font-size:10px;color:#888;letter-spacing:1px;text-transform:uppercase;margin-bottom:6px">step 1 · softmax over 4,096 DNA tokens</div>
<svg viewBox="0 0 800 30" preserveAspectRatio="none" style="display:block;width:100%;height:30px;background:#fafaf6;border:1px solid #eee">
<rect x="0" y="27" width="8" height="3" fill="#c4c0b3"/>
<rect x="16" y="26" width="8" height="4" fill="#c4c0b3"/>
<rect x="32" y="28" width="8" height="2" fill="#c4c0b3"/>
<rect x="48" y="25" width="8" height="5" fill="#c4c0b3"/>
<rect x="64" y="27" width="8" height="3" fill="#c4c0b3"/>
<rect x="80" y="26" width="8" height="4" fill="#c4c0b3"/>
<rect x="96" y="28" width="8" height="2" fill="#c4c0b3"/>
<rect x="112" y="27" width="8" height="3" fill="#c4c0b3"/>
<rect x="128" y="24" width="8" height="6" fill="#c4c0b3"/>
<rect x="144" y="22" width="8" height="8" fill="#c4c0b3"/>
<rect x="160" y="26" width="8" height="4" fill="#c4c0b3"/>
<rect x="176" y="28" width="8" height="2" fill="#c4c0b3"/>
<rect x="192" y="25" width="8" height="5" fill="#c4c0b3"/>
<rect x="208" y="27" width="8" height="3" fill="#c4c0b3"/>
<rect x="224" y="2" width="8" height="28" fill="#1A7A40"/>
<rect x="240" y="20" width="8" height="10" fill="#c4c0b3"/>
<rect x="256" y="27" width="8" height="3" fill="#c4c0b3"/>
<rect x="272" y="26" width="8" height="4" fill="#c4c0b3"/>
<rect x="288" y="28" width="8" height="2" fill="#c4c0b3"/>
<rect x="304" y="25" width="8" height="5" fill="#c4c0b3"/>
<rect x="320" y="27" width="8" height="3" fill="#c4c0b3"/>
<rect x="336" y="26" width="8" height="4" fill="#c4c0b3"/>
<rect x="352" y="28" width="8" height="2" fill="#c4c0b3"/>
<rect x="368" y="25" width="8" height="5" fill="#c4c0b3"/>
<rect x="384" y="27" width="8" height="3" fill="#c4c0b3"/>
<rect x="400" y="22" width="8" height="8" fill="#c4c0b3"/>
<rect x="416" y="18" width="8" height="12" fill="#c4c0b3"/>
<rect x="432" y="26" width="8" height="4" fill="#c4c0b3"/>
<rect x="448" y="28" width="8" height="2" fill="#c4c0b3"/>
<rect x="464" y="25" width="8" height="5" fill="#c4c0b3"/>
<rect x="480" y="27" width="8" height="3" fill="#c4c0b3"/>
<rect x="496" y="14" width="8" height="16" fill="#c4c0b3"/>
<rect x="512" y="26" width="8" height="4" fill="#c4c0b3"/>
<rect x="528" y="28" width="8" height="2" fill="#c4c0b3"/>
<rect x="544" y="27" width="8" height="3" fill="#c4c0b3"/>
<rect x="560" y="25" width="8" height="5" fill="#c4c0b3"/>
<rect x="576" y="26" width="8" height="4" fill="#c4c0b3"/>
<rect x="592" y="28" width="8" height="2" fill="#c4c0b3"/>
<rect x="608" y="27" width="8" height="3" fill="#c4c0b3"/>
<rect x="624" y="24" width="8" height="6" fill="#c4c0b3"/>
<rect x="640" y="20" width="8" height="10" fill="#c4c0b3"/>
<rect x="656" y="26" width="8" height="4" fill="#c4c0b3"/>
<rect x="672" y="28" width="8" height="2" fill="#c4c0b3"/>
<rect x="688" y="25" width="8" height="5" fill="#c4c0b3"/>
<rect x="704" y="27" width="8" height="3" fill="#c4c0b3"/>
<rect x="720" y="26" width="8" height="4" fill="#c4c0b3"/>
<rect x="736" y="22" width="8" height="8" fill="#c4c0b3"/>
<rect x="752" y="28" width="8" height="2" fill="#c4c0b3"/>
<rect x="768" y="25" width="8" height="5" fill="#c4c0b3"/>
<rect x="784" y="27" width="8" height="3" fill="#c4c0b3"/>
</svg>
</div>
<div style="text-align:center;color:#888;font-size:11px">▼ sum over 6-mers sharing a base at position <em>p</em></div>
<div>
<div style="font-size:10px;color:#888;letter-spacing:1px;text-transform:uppercase;margin-bottom:6px">step 2 · six 4-way per-base distributions</div>
<div style="display:grid;grid-template-columns:repeat(6,1fr);gap:6px">
<!-- pos 1 · A dominant -->
<div style="background:#fafaf6;border:1px solid #eee;border-radius:2px;padding:6px 4px">
<div style="font-size:10px;color:#888;text-align:center;margin-bottom:4px">pos 1</div>
<div style="display:flex;align-items:flex-end;justify-content:center;gap:3px;height:34px">
<div style="width:7px;height:30px;background:#1A7A40;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:5px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:3px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:3px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
</div>
<div style="display:flex;justify-content:center;gap:3px;margin-top:3px;font-size:9px;color:#888">
<span style="width:7px;text-align:center;color:#1A7A40;font-weight:700">A</span><span style="width:7px;text-align:center">T</span><span style="width:7px;text-align:center">C</span><span style="width:7px;text-align:center">G</span>
</div>
</div>
<!-- pos 2 · C dominant -->
<div style="background:#fafaf6;border:1px solid #eee;border-radius:2px;padding:6px 4px">
<div style="font-size:10px;color:#888;text-align:center;margin-bottom:4px">pos 2</div>
<div style="display:flex;align-items:flex-end;justify-content:center;gap:3px;height:34px">
<div style="width:7px;height:4px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:4px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:28px;background:#1A7A40;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:4px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
</div>
<div style="display:flex;justify-content:center;gap:3px;margin-top:3px;font-size:9px;color:#888">
<span style="width:7px;text-align:center">A</span><span style="width:7px;text-align:center">T</span><span style="width:7px;text-align:center;color:#1A7A40;font-weight:700">C</span><span style="width:7px;text-align:center">G</span>
</div>
</div>
<!-- pos 3 · G dominant -->
<div style="background:#fafaf6;border:1px solid #eee;border-radius:2px;padding:6px 4px">
<div style="font-size:10px;color:#888;text-align:center;margin-bottom:4px">pos 3</div>
<div style="display:flex;align-items:flex-end;justify-content:center;gap:3px;height:34px">
<div style="width:7px;height:3px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:3px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:4px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:28px;background:#1A7A40;border-radius:1px 1px 0 0"></div>
</div>
<div style="display:flex;justify-content:center;gap:3px;margin-top:3px;font-size:9px;color:#888">
<span style="width:7px;text-align:center">A</span><span style="width:7px;text-align:center">T</span><span style="width:7px;text-align:center">C</span><span style="width:7px;text-align:center;color:#1A7A40;font-weight:700">G</span>
</div>
</div>
<!-- pos 4 · T dominant -->
<div style="background:#fafaf6;border:1px solid #eee;border-radius:2px;padding:6px 4px">
<div style="font-size:10px;color:#888;text-align:center;margin-bottom:4px">pos 4</div>
<div style="display:flex;align-items:flex-end;justify-content:center;gap:3px;height:34px">
<div style="width:7px;height:4px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:28px;background:#1A7A40;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:4px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:4px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
</div>
<div style="display:flex;justify-content:center;gap:3px;margin-top:3px;font-size:9px;color:#888">
<span style="width:7px;text-align:center">A</span><span style="width:7px;text-align:center;color:#1A7A40;font-weight:700">T</span><span style="width:7px;text-align:center">C</span><span style="width:7px;text-align:center">G</span>
</div>
</div>
<!-- pos 5 · A slight lead (less peaked) -->
<div style="background:#fafaf6;border:1px solid #eee;border-radius:2px;padding:6px 4px">
<div style="font-size:10px;color:#888;text-align:center;margin-bottom:4px">pos 5</div>
<div style="display:flex;align-items:flex-end;justify-content:center;gap:3px;height:34px">
<div style="width:7px;height:20px;background:#1A7A40;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:14px;background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:4px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:4px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
</div>
<div style="display:flex;justify-content:center;gap:3px;margin-top:3px;font-size:9px;color:#888">
<span style="width:7px;text-align:center;color:#1A7A40;font-weight:700">A</span><span style="width:7px;text-align:center">T</span><span style="width:7px;text-align:center">C</span><span style="width:7px;text-align:center">G</span>
</div>
</div>
<!-- pos 6 · T dominant -->
<div style="background:#fafaf6;border:1px solid #eee;border-radius:2px;padding:6px 4px">
<div style="font-size:10px;color:#888;text-align:center;margin-bottom:4px">pos 6</div>
<div style="display:flex;align-items:flex-end;justify-content:center;gap:3px;height:34px">
<div style="width:7px;height:4px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:26px;background:#1A7A40;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:4px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
<div style="width:7px;height:8px; background:#c4c0b3;border-radius:1px 1px 0 0"></div>
</div>
<div style="display:flex;justify-content:center;gap:3px;margin-top:3px;font-size:9px;color:#888">
<span style="width:7px;text-align:center">A</span><span style="width:7px;text-align:center;color:#1A7A40;font-weight:700">T</span><span style="width:7px;text-align:center">C</span><span style="width:7px;text-align:center">G</span>
</div>
</div>
</div>
</div>
<div style="text-align:center;color:#888;font-size:11px">▼ same marginals feed two endpoints: generate (force a token) or score (read off P(base))</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:10px">
<!-- step 3a · generation endpoint -->
<div>
<div style="font-size:10px;color:#888;letter-spacing:1px;text-transform:uppercase;margin-bottom:6px">step 3a · generate</div>
<div style="display:flex;flex-direction:column;align-items:center;justify-content:center;gap:6px;padding:12px;background:#fafaf6;border:1px solid #eee;height:88px;box-sizing:border-box">
<div style="display:flex;gap:6px;font-size:18px;font-weight:700;color:#1A7A40;letter-spacing:2px">
<span>A</span><span>C</span><span>G</span><span>T</span><span>A</span><span>T</span>
</div>
<div style="font-size:10px;color:#666;text-align:center;line-height:1.4">
argmax / multinomial → force matching 6-mer token
</div>
</div>
</div>
<!-- step 3b · scoring endpoint -->
<div>
<div style="font-size:10px;color:#888;letter-spacing:1px;text-transform:uppercase;margin-bottom:6px">step 3b · score</div>
<div style="display:flex;flex-direction:column;align-items:center;justify-content:center;gap:6px;padding:12px;background:#fafaf6;border:1px solid #eee;height:88px;box-sizing:border-box">
<div style="display:flex;gap:8px;font-size:11px;color:#1A7A40;font-weight:600;font-feature-settings:'tnum'">
<span>.83</span><span>.71</span><span>.92</span><span>.67</span><span>.48</span><span>.79</span>
</div>
<div style="font-size:10px;color:#666;text-align:center;line-height:1.4">
read P(actual base | context) at each position
</div>
</div>
</div>
</div>
</div>
</div>
<div class="takeaway">
<strong>When to switch on bp-level</strong>
Use plain 6-mer decoding when 6-base granularity is fine: throughput-bound
generation, long retrieval haystacks, large-scale screening. Reach for
bp-level <em>generation</em> when you need exact base counts, per-position
masks, or temperature applied at the base axis rather than the 4,096-way
6-mer axis. Reach for bp-level <em>scoring</em> whenever the task is about
a specific base: variant-effect prediction, single-nucleotide mutational
scans, comparing the likelihood of a reference and an alternate allele at
one position. Both paths ship together on the <code>fns</code> revision of
the <code>Carbon-3B</code>/<code>8B</code>/<code>500M</code> checkpoints:
plain <code>.generate()</code> already produces bp-resolution output (the
tokenizer exposes the kmer width as <code>tokenizer.k</code>), and the
model gains a <code>score_sequence(seqs)</code> method that batches a list
of sequences and returns per-base distributions plus the probability of
the observed base at every position.
</div>
<details class="code-snippet">
<summary>Run this from code</summary>
<div class="code-snippet__body">
<div class="code-snippet__tabs">
<button class="code-snippet__tab active" data-tab="generate" type="button">generate</button>
<button class="code-snippet__tab" data-tab="score" type="button">score</button>
</div>
<button class="code-snippet__copy" type="button">Copy</button>
<div class="code-snippet__panel active" data-tab="generate"><pre><code>import math
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model_id = "HuggingFaceBio/Carbon-3B"
revision = "fns"
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_id,
revision=revision,
trust_remote_code=True,
dtype=torch.bfloat16,
).to(device).eval()
context = "ATGCGCTAGCTACGATCGATCGTAGCTAGCTAGCTAGCTACG"
n_bp = 60
inputs = tokenizer(f"<dna>{context}", return_tensors="pt", add_special_tokens=False).to(device)
with torch.no_grad():
output_ids = model.generate(
**inputs,
max_new_tokens=math.ceil(n_bp / tokenizer.k),
do_sample=False,
pad_token_id=tokenizer.eos_token_id,
)
generated_ids = output_ids[0, inputs.input_ids.shape[1]:]
generated_dna = tokenizer.decode(generated_ids, skip_special_tokens=True)[:n_bp]
print(generated_dna)</code></pre></div>
<div class="code-snippet__panel" data-tab="score"><pre><code>import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model_id = "HuggingFaceBio/Carbon-3B"
revision = "fns"
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_id,
revision=revision,
trust_remote_code=True,
dtype=torch.bfloat16,
).to(device).eval()
reference = "GGGCTATAAAGGCCATCGATCGATCGATCGATCGATCGATCG"
perturbed = "GGGCGCGCGCGGCCATCGATCGATCGATCGATCGATCGATCG"
# score_sequence accepts a list of sequences and returns, for each one,
# the [seq_len, 4] marginal P(A/T/C/G | context) and the [seq_len]
# probability of the observed base.
with torch.no_grad():
bp_probs, actual_probs = model.score_sequence([reference, perturbed])
scores = [torch.log(p.clamp_min(1e-12)).mean().item() for p in actual_probs]
print(f"reference mean bp logp: {scores[0]:.4f}")
print(f"perturbed mean bp logp: {scores[1]:.4f}")
print(f"reference preferred: {scores[0] > scores[1]}")</code></pre></div>
</div>
</details>
</div>
</section>
<!-- ============================================================ -->
<!-- §9 · DATA -->
<!-- ============================================================ -->
<section id="data" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§4 · Data</div>
<div class="section-title">Genomes are mostly background</div>
<p class="lede">
A naive read of "more data is better" misses something specific to DNA: most of a
eukaryotic genome is repeats, low-complexity, and weakly-constrained background.
Train on raw sequence and a lot of your loss is dominated by easy-to-predict noise.
Carbon's corpus is an annotation-aware mixture, biased toward gene-centric, transcript,
and bacterial sequence, so the model spends more of its gradient updates on biologically
meaningful sequence.
</p>
</div>
<div class="section-body">
<div class="demo" id="demo9">
<div class="seq-label" style="margin-top:0">corpus composition · 1T tokens (6T base pairs)</div>
<div id="d9-bars" class="d9-bars" style="margin-bottom:22px"></div>
<div class="seq-label">signal-to-noise · raw genome vs annotation-aware curation</div>
<svg id="d9-snr" viewBox="0 0 1000 100" preserveAspectRatio="none" style="display:block;width:100%;height:90px;background:#fff;border:1px solid #eee"></svg>
<div class="track-axis-label" style="padding-top:10px">
<span><span class="legend-swatch" style="background:#317f3f"></span>functional / annotated</span>
<span><span class="legend-swatch" style="background:#ddd"></span>background</span>
<span style="color:#888">curating raises the density of biological signal in the gradient</span>
</div>
<div class="seq-label" style="margin-top:18px">metadata templates · the model sees mixed contexts so it works with or without labels</div>
<div id="d9-templates" style="display:grid;grid-template-columns:80px 1fr;gap:6px 14px;font-family:'JetBrains Mono',monospace;font-size:11px;color:#333"></div>
</div>
<div class="takeaway">
<strong>The signal-to-noise math</strong>
If only 5% of a raw corpus is informative, but you keep 80% of informative regions while
discarding 95% of background, the effective informative fraction jumps from 5% to ≈ 46%.
Same training compute, ~9× more learning signal per gradient step.
</div>
</div>
</section>
<!-- ============================================================ -->
<!-- §10 · ARCHITECTURE -->
<!-- ============================================================ -->
<section id="architecture" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§5 · Architecture</div>
<div class="section-title">A deliberately vanilla transformer</div>
<p class="lede">
Decoder-only, RMSNorm + SwiGLU + RoPE + grouped-query attention, tied I/O embeddings,
8k-token context. Nothing exotic. The architectural surface is intentionally familiar so
that any improvement Carbon shows on genomic tasks is attributable to the data, the
tokenizer, and the loss, not to a custom block or a hand-crafted attention variant.
</p>
</div>
<div class="section-body">
<div class="demo" id="demo10">
<table id="d10-arch" style="width:100%;border-collapse:collapse;font-family:'JetBrains Mono',monospace;font-size:12px"></table>
<div style="margin-top:14px;font-size:11px;color:#666;font-family:'JetBrains Mono',monospace">
vocabulary = 4,096 6-mer DNA tokens + small set of special / metadata tokens · total 155,776
</div>
</div>
<div class="takeaway">
<strong>Why this matters</strong>
Architecture innovation is one of the cheapest things to claim and one of the hardest things
to attribute. Carbon's results (competitive with Evo2-7B at 3B parameters, ahead of it on a
majority of tasks at 8B) come from changes that <em>aren't</em> the architecture. That's where
the room for genomic foundation models still is.
</div>
</div>
</section>
<!-- ============================================================ -->
<!-- §11 · LONG CONTEXT (training-time extension + YaRN) -->
<!-- ============================================================ -->
<section id="longcontext" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§6 · Long context</div>
<div class="section-title">Pretrain at 8k, retrieve at 786 kbp</div>
<p class="lede">
Carbon's nominal training context is short by megabase-scale standards (8k tokens, ≈49 kbp).
The reach comes from a two-step extension. First, a <strong>training-time</strong> long-context
phase lifts the context to 32k tokens (≈197 kbp) with RoPE θ rescaled from 500k to 5M.
Then, at <strong>inference</strong>, YaRN pushes that further: 2× to 65k tokens for the 3B
model, 4× to 131k tokens for the 8B (≈786 kbp, the size of a small bacterial genome).
The 8B has more capacity to absorb the YaRN stretch, which is why it extends further than the 3B.
</p>
</div>
<div class="section-body">
<div class="demo" id="demo11">
<div class="seq-label" style="margin-top:0">context length · log scale, base pairs of DNA reachable in a single forward pass</div>
<svg id="d11-ladder" preserveAspectRatio="xMinYMin meet" style="display:block;width:100%;background:#fff;border:1px solid #eee"></svg>
<div class="seq-label" style="margin-top:18px">Genome-NIAH retrieval · plain variant · find a planted 24 bp value inside a real-genome haystack</div>
<svg id="d11-niah" preserveAspectRatio="xMinYMin meet" style="display:block;width:100%;background:#fff;border:1px solid #eee"></svg>
<div class="track-axis-label" style="padding-top:10px">
<span><span class="legend-swatch" style="background:#1A7A40"></span>Carbon 8B (YaRN)</span>
<span><span class="legend-swatch" style="background:#6DBF7E"></span>Carbon 3B (YaRN)</span>
<span><span class="legend-swatch" style="background:#8C7355"></span>Evo2-7B (native 1M)</span>
<span style="color:#888">accuracy at exact-match retrieval, 500 samples per cell</span>
</div>
</div>
<div class="takeaway">
<strong>The headline number</strong>
At 786 kbp, Carbon-8B retrieves the planted needle at <em>65%</em> accuracy. Evo2-7B,
natively trained at 1M tokens of single-nucleotide context (≈8× more wall-clock per token),
scores <em>53%</em> at the same length. So a 6-mer model trained to 32k tokens
plus YaRN-4× at inference reaches further than a 1M-native single-nucleotide model, which
is the entire bet of the Carbon recipe: nominal context length is not the same as effective
context utilization.
</div>
</div>
</section>
<!-- ============================================================ -->
<!-- §12 · RESULTS (per-task barplot vs Evo2-7B + GENERator-v2) -->
<!-- ============================================================ -->
<section id="results" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§7 · Results</div>
<div class="section-title">Training-free, head-to-head</div>
<p class="lede">
Eight training-free tasks across four capability axes: generative sequence recovery,
variant-effect prediction (BRCA2, TraitGym, ClinVar coding / non-coding), sequence-level
perturbation (synthetic motif insertion and synonymous codon shuffling), and long-context
retrieval (Genome-NIAH at 393 kbp). No fine-tuning, no head training, all four frozen
pretrained models scored under the same protocol. Carbon-3B is competitive with Evo2-7B
despite less than half the parameters; Carbon-8B is ahead on five of eight.
</p>
</div>
<div class="section-body">
<div class="demo" id="demo12">
<div id="d12-bars"></div>
<div class="track-axis-label chart-legend">
<span class="chart-legend__item"><span class="legend-swatch" style="background:#1A7A40"></span>Carbon 8B</span>
<span class="chart-legend__item"><span class="legend-swatch" style="background:#6DBF7E"></span>Carbon 3B</span>
<span class="chart-legend__item"><span class="legend-swatch" style="background:#5A5A56"></span>Evo2-7B</span>
<span class="chart-legend__item"><span class="legend-swatch" style="background:#B5B0A6"></span>GENERator-v2 3B</span>
</div>
</div>
<div class="takeaway">
<strong>How to read it</strong>
Carbon-8B leads on sequence recovery, BRCA2, ClinVar non-coding, triplet expansion, and
Genome-NIAH at 393 kbp. Evo2-7B holds onto TraitGym Mendelian (a hard non-coding variant set),
and edges Carbon-8B on ClinVar coding and synonymous codon shuffling by a fraction of a point
each — small enough to be effectively a tie. The pattern is broad rather than peaky:
Carbon's gains come from data, tokenizer, and objective design, distributed across tasks,
not from a single specialised benchmark.
</div>
</div>
</section>
<!-- ============================================================ -->
<!-- §13 · EFFICIENCY (placeholder · figure pending) -->
<!-- ============================================================ -->
<section id="efficiency" class="section--two-col">
<div class="section-narrative">
<div class="section-num">§8 · Efficiency</div>
<div class="section-title">Why Carbon is fast</div>
<p class="lede">
The throughput story is a two-factor multiplication, not one big trick. First, the
architecture is deliberately vanilla: a stock Llama-3-shaped decoder. That means
Carbon drops straight into <strong>vLLM</strong> and inherits the same paged-attention,
fused kernels, and CUDA-graph capture that the open-source LLM stack has been
optimizing for two years. Custom blocks would forfeit all of that. Second, 6-mer
tokenization compresses a given DNA span by <strong>6×</strong> at the input, which under
quadratic attention is up to a 36× reduction in prefill cost, and the decode loop
emits 6 bases per step instead of one. Stacking the two: standard-stack inference
speedups, multiplied by tokenizer compression, gets you the order-of-magnitude gap
over Evo2 reported in the paper.
</p>
</div>
<div class="section-body">
<div class="demo" id="demo13">
<div class="seq-label" style="margin-top:0">Inference throughput · output bp/s · single H100</div>
<svg id="d13-throughput" preserveAspectRatio="xMinYMin meet" style="display:block;width:100%;background:#fff;border:1px solid #eee"></svg>
<!-- Bigger sentence-case legend (.chart-legend variant from sequence.css)
to make the 7-model key feel like a proper colour reference rather
than a caption strip. The "Legend" prefix uses the same mono-uppercase
editorial-label register as .seq-label / .sb-examples-label so it
reads as a section gutter rather than as another item in the row. -->
<div class="track-axis-label chart-legend" style="justify-content:flex-start">
<span style="font-family:'JetBrains Mono',monospace;font-size:10px;color:#888;text-transform:uppercase;letter-spacing:1.5px;font-weight:500">Legend</span>
<span class="chart-legend__item"><span class="legend-swatch" style="background:#1A7A40"></span>Carbon-8B</span>
<span class="chart-legend__item"><span class="legend-swatch" style="background:#6DBF7E"></span>Carbon-3B</span>
<span class="chart-legend__item"><span class="legend-swatch" style="background:#A8DCB4"></span>Carbon-500M</span>
<span class="chart-legend__item"><span class="legend-swatch" style="background:#C9A06A"></span>Evo2 1B</span>
<span class="chart-legend__item"><span class="legend-swatch" style="background:#8C7355"></span>Evo2 7B</span>
<span class="chart-legend__item"><span class="legend-swatch" style="background:#5A4A38"></span>Evo2 20B</span>
<span class="chart-legend__item"><span class="legend-swatch" style="background:#2A211A"></span>Evo2 40B</span>
</div>
<div style="margin-top:14px;padding-top:10px;border-top:1px solid #eee;font-family:'JetBrains Mono',monospace;font-size:10px;letter-spacing:1px;text-transform:uppercase;color:#888;display:flex;justify-content:space-between;flex-wrap:wrap;gap:14px">
<span>Source · <a href="https://huggingface.co/datasets/HuggingFaceBio/carbon-inference-evals" style="color:#1f1f1d;text-decoration:underline">carbon-inference-evals</a></span>
<span style="color:#aaa">vLLM for Carbon · Evo2 native runner</span>
</div>
</div>
<div class="takeaway">
<strong>The compound effect</strong>
Neither factor on its own would be a story. Vanilla architecture without 6-mer compression
would land Carbon at roughly Llama-3 throughput: fine but not remarkable. 6-mer compression
on a custom architecture would force a hand-rolled inference stack to keep up with vLLM.
Doing both together is what makes a 3B-parameter DNA model usable for large-scale evaluation
on commodity hardware.
</div>
</div>
</section>
</div>
</div> <!-- /panel-recipe -->
<!-- ============================================================ -->
<!-- TAB 3 · SANDBOX (the open-ended playground) -->
<!-- ============================================================ -->
<div class="tab-panel" id="panel-sandbox" data-tab="sandbox">
<div class="tab-lede">
<div class="tab-lede__rail">
<span class="tab-lede__eyebrow">Intro</span>
<p>Open-ended DNA continuation. Type any prefix in {A, C, G, T}, watch the model continue token by token. Toggle base-coloring or per-token logprob coloring to see where Carbon is confident and where it's guessing. Track GC content, perplexity, and throughput live.</p>
</div>
</div>
<div class="container" style="max-width:1200px">
<aside class="sb-safety" role="note" aria-label="Data safety">
<span class="sb-safety__icon" aria-hidden="true">⚠</span>
<div class="sb-safety__body">
<strong>Genetic data is highly sensitive.</strong>
Depending on how this model is used (local download, inference API/endpoints, third-party inference providers, Spaces demos or others), input and output data may be processed or handled differently by different providers or space owners. Please make sure you understand and agree with how your data is handled before using the model.
</div>
</aside>
<!-- Connection strip: tells you which model the playground is talking to.
Same eyebrow + value pattern reused by the two card headers below so
the whole panel reads as a single layered stack rather than a flat
wall of controls. -->
<!-- INPUT card: examples → prompt → controls → status. -->
<section class="sb-card">
<header class="sb-card__header sb-card__header--with-meta">
<div class="sb-card__heading">
<span class="sb-card__eyebrow">§ Input</span>
<h2 class="sb-card__title">Prompt</h2>
<p class="sb-card__hint">DNA prefix in <code>{A, C, G, T}</code>: pick an example or type your own.</p>
</div>
<div class="sb-card__meta">
<span class="sb-card__eyebrow">Connected to</span>
<div id="sb-meta" class="sb-header__meta">loading…</div>
</div>
</header>
<div class="sb-card__body">
<div class="sb-examples">
<span class="sb-examples-label">examples</span>
<button class="sb-ex-btn" data-ex="">empty<span class="sb-ex-label">unconditional</span></button>
<button class="sb-ex-btn" data-ex="ATG">ATG<span class="sb-ex-label">start codon</span></button>
<button class="sb-ex-btn" data-ex="TATAAA">TATAAA<span class="sb-ex-label">TATA box</span></button>
<button class="sb-ex-btn" data-ex="CGCGCGCGCG">CGCG…<span class="sb-ex-label">CpG island</span></button>
<button class="sb-ex-btn" data-ex="ATGGCCAAGCTGACCAGCGAGCTGCTG">ATGGCC…<span class="sb-ex-label">ORF start</span></button>
<button class="sb-ex-btn" data-ex="AAAAAAAAAAAAAAAA">A·16<span class="sb-ex-label">poly-A</span></button>
</div>
<textarea id="sb-prompt" class="sb-prompt-area" rows="3" spellcheck="false" autocapitalize="characters">AGT</textarea>
<!-- Controls split into two visual halves: sampling/display params on
the left, action buttons pinned to the right. The vertical rule
between them makes the parameter cluster read as one group. -->
<div class="sb-controls">
<div class="sb-controls__params">
<label class="sb-control">max tokens
<input type="number" id="sb-max-tokens" value="128" min="1" max="2048" step="1">
</label>
<label class="sb-control">temperature
<input type="number" id="sb-temperature" value="1.0" min="0" max="2" step="0.1">
</label>
<label class="sb-control">top-p
<input type="number" id="sb-top-p" value="1.0" min="0" max="1" step="0.05">
</label>
<div class="sb-mode-group">color
<div class="sb-mode-btns" id="sb-mode-btns">
<button class="sb-mode-btn active" data-mode="none">none</button>
<button class="sb-mode-btn" data-mode="bases">bases</button>
<button class="sb-mode-btn" data-mode="logprob">logprob</button>
</div>
</div>
</div>
<div class="sb-controls__actions">
<button id="sb-clear-btn" class="action">clear</button>
<button id="sb-stop-btn" class="action" disabled>stop</button>
<button id="sb-generate-btn" class="action primary">▶ generate</button>
</div>
</div>
<!-- Hidden by setStatus("idle") so the toolbar stays clean until
something actually happens (connecting / streaming / done). -->
<div class="sb-status is-hidden" id="sb-status"><span class="dot"></span><span id="sb-status-text">idle</span></div>
</div>
</section>
<!-- OUTPUT card: streamed sequence + sticky stats sidebar. -->
<section class="sb-card">
<header class="sb-card__header">
<span class="sb-card__eyebrow">§ Output</span>
<h2 class="sb-card__title">Sequence</h2>
<p class="sb-card__hint">Streams as the model generates · live stats on the right.</p>
</header>
<div class="sb-card__body">
<div class="sb-output-row">
<div class="sb-seq-wrap">
<button id="sb-copy-btn" class="sb-copy-btn" disabled>copy</button>
<div class="sb-seq-block empty" id="sb-seq">prompt + generated bases will stream here</div>
</div>
<div>
<div class="sb-stats" id="sb-stats">
<div class="sb-stat"><span class="sb-stat-label">prompt</span><span class="sb-stat-value" id="sb-stat-prompt">0<span class="sb-unit">bp</span></span></div>
<div class="sb-stat"><span class="sb-stat-label">generated</span><span class="sb-stat-value" id="sb-stat-gen">0<span class="sb-unit">bp</span></span></div>
<div class="sb-stat"><span class="sb-stat-label">tokens</span><span class="sb-stat-value" id="sb-stat-tok">0</span></div>
<div class="sb-stat"><span class="sb-stat-label">elapsed</span><span class="sb-stat-value" id="sb-stat-time">0.0<span class="sb-unit">s</span></span></div>
<div class="sb-stat"><span class="sb-stat-label">throughput</span><span class="sb-stat-value" id="sb-stat-rate">0<span class="sb-unit">bp/s</span></span></div>
<div class="sb-stat"><span class="sb-stat-label">GC content</span><span class="sb-stat-value" id="sb-stat-gc">·</span></div>
<div class="sb-stat"><span class="sb-stat-label">mean logprob</span><span class="sb-stat-value" id="sb-stat-lp">·</span></div>
<div class="sb-stat"><span class="sb-stat-label">perplexity</span><span class="sb-stat-value" id="sb-stat-ppl">·</span></div>
</div>
<div class="sb-legend" id="sb-legend">
<div>token logprob</div>
<div class="sb-legend-bar" id="sb-legend-bar"></div>
<div class="sb-legend-row"><span id="sb-lp-min">·</span><span id="sb-lp-mid">·</span><span id="sb-lp-max">·</span></div>
<svg id="sb-lp-chart" class="sb-lp-chart" preserveAspectRatio="none"></svg>
</div>
</div>
</div>
</div>
</section>
</div>
</div> <!-- /panel-sandbox -->
<!-- ============================================================ -->
<!-- SITE FOOTER · always visible across tabs. -->
<!-- Composition: collaboration block (eyebrow + headline + lede -->
<!-- + 4 partner stamps), then a three-column strip (Carbon -->
<!-- identity / Resources / Sections), then a thin legal hairline -->
<!-- with copyright + license + model spec recap. -->
<!-- ============================================================ -->
<footer class="site-footer" role="contentinfo">
<div class="site-footer__inner">
<!-- 1) Collaboration block -->
<section class="cb-collab" aria-labelledby="cb-collab-title">
<div class="cb-collab__head">
<span class="cb-collab__eyebrow">§ Collaboration</span>
<h2 id="cb-collab-title" class="cb-collab__title">A joint research effort</h2>
<p class="cb-collab__lede">
Carbon was built together by the research teams at
<em>Hugging Face</em>, the <em>Zhongguancun Academy</em>,
<em>TIGEM</em> and the <em>Università di Napoli Federico II</em>.
</p>
</div>
<!-- Each <img> uses an aspect-correct width/height pair (height fixed
at 56, width derived from each logo's natural ratio) to prevent
CLS while the CSS lets the mark display at its full landscape
ratio. The .cb-partner__name span is hidden visually because
each real logo already carries its own wordmark; it stays in
the DOM as an accessible label for screen readers. -->
<ul class="cb-partners">
<li class="cb-partner">
<a class="cb-partner__link" href="https://huggingface.co" target="_blank" rel="noopener">
<span class="cb-partner__mark"><img src="/img/partners/hugging-face.svg" alt="Hugging Face" width="211" height="56"></span>
<span class="cb-partner__body">
<span class="cb-partner__name">Hugging Face</span>
<span class="cb-partner__sub">open-source AI</span>
</span>
</a>
</li>
<li class="cb-partner">
<a class="cb-partner__link" href="https://www.bza.edu.cn/en/" target="_blank" rel="noopener">
<span class="cb-partner__mark"><img src="/img/partners/zhongguancun.png" alt="Zhongguancun Academy" width="217" height="56"></span>
<span class="cb-partner__body">
<span class="cb-partner__name">Zhongguancun Academy</span>
<span class="cb-partner__sub">Beijing · China</span>
</span>
</a>
</li>
<li class="cb-partner">
<a class="cb-partner__link" href="https://www.tigem.it/" target="_blank" rel="noopener">
<span class="cb-partner__mark"><img src="/img/partners/tigem.svg" alt="TIGEM, Telethon Institute of Genetics and Medicine" width="80" height="56"></span>
<span class="cb-partner__body">
<span class="cb-partner__name">TIGEM</span>
<span class="cb-partner__sub">genetics & medicine</span>
</span>
</a>
</li>
<li class="cb-partner">
<a class="cb-partner__link" href="https://www.unina.it/" target="_blank" rel="noopener">
<span class="cb-partner__mark"><img src="/img/partners/federico-ii.svg" alt="Università degli Studi di Napoli Federico II" width="56" height="56"></span>
<span class="cb-partner__body">
<span class="cb-partner__name">Federico II</span>
<span class="cb-partner__sub">Napoli · Italy</span>
</span>
</a>
</li>
</ul>
</section>
<!-- 2) Identity + link columns -->
<div class="site-footer__cols">
<div class="site-footer__brand">
<a class="logo-card" href="#" aria-label="Carbon, go to top">
<img class="logo-img" src="/img/logo.svg" alt="" width="44" height="44">
</a>
<div class="site-footer__brand-meta">
<div class="site-footer__brand-name">CARBON</div>
<div class="site-footer__brand-path">huggingfacebio/carbon-3b</div>
<p class="site-footer__brand-lede">
An autoregressive genomic foundation model — open code, open weights, open data.
</p>
</div>
</div>
<div class="site-footer__col">
<h3 class="site-footer__col-title">Resources</h3>
<ul class="site-footer__list">
<li><a href="https://huggingface.co/HuggingFaceBio/Carbon-3B" target="_blank" rel="noopener">Model card<span class="arrow" aria-hidden="true">↗</span></a></li>
<li><a href="#" target="_blank" rel="noopener">Tech report<span class="arrow" aria-hidden="true">↗</span></a></li>
<li><a href="https://github.com/huggingface/carbon" target="_blank" rel="noopener">GitHub<span class="arrow" aria-hidden="true">↗</span></a></li>
<li><a href="https://huggingface.co/datasets/HuggingFaceBio/carbon-pretraining-corpus" target="_blank" rel="noopener">Dataset<span class="arrow" aria-hidden="true">↗</span></a></li>
</ul>
</div>
<div class="site-footer__col">
<h3 class="site-footer__col-title">Sections</h3>
<ul class="site-footer__list">
<li><a href="#intro">Intro</a></li>
<li><a href="#dna-lab">DNA Lab</a></li>
<li><a href="#recipe">Carbon Recipe</a></li>
<li><a href="#sandbox">Sandbox</a></li>
</ul>
</div>
</div>
<!-- 3) Legal strip -->
<div class="site-footer__legal">
<span class="site-footer__copy">
© 2026 · Carbon <span class="dot">·</span>
<a href="https://www.apache.org/licenses/LICENSE-2.0" target="_blank" rel="noopener">Apache 2.0</a>
</span>
<span class="site-footer__spec">
393,216 bp context <span class="dot">·</span> 6-mer tokenizer <span class="dot">·</span> 1T train tokens
</span>
</div>
</div>
</footer>
<!-- Modular JS, served from /assets/js/. Load order matters because
section IIFEs reference shared globals (lerp, logprobRgb, GENES,
loadConfig, etc.) defined in shared/. Each file ends with its own
IIFE so order between sections is irrelevant, but shared/ must
load first. tabs.js runs loadConfig() at the bottom, so it sits
last. -->
<script src="/assets/js/shared/helpers.js"></script>
<script src="/assets/js/shared/config.js"></script>
<script src="/assets/js/shared/code-snippet.js"></script>
<script src="/assets/js/sections/intro.js"></script>
<script src="/assets/js/sections/completion.js"></script>
<script src="/assets/js/sections/vep.js"></script>
<script src="/assets/js/sections/track.js"></script>
<script src="/assets/js/sections/species.js"></script>
<script src="/assets/js/sections/folding.js"></script>
<script src="/assets/js/sections/tokenizer.js"></script>
<script src="/assets/js/sections/loss.js"></script>
<script src="/assets/js/sections/data.js"></script>
<script src="/assets/js/sections/architecture.js"></script>
<script src="/assets/js/sections/longcontext.js"></script>
<script src="/assets/js/sections/results.js"></script>
<script src="/assets/js/sections/efficiency.js"></script>
<script src="/assets/js/sections/sandbox.js"></script>
<script src="/assets/js/sections/umap.js"></script>
<script src="/assets/js/sections/tree.js"></script>
<script src="/assets/js/banner.js"></script>
<script src="/assets/js/tabs.js"></script>
</body>
</html>
|