Upload add_sub_baseline_50K
Browse files- add_sub_baseline_50K/metrics.json +1081 -1081
- add_sub_baseline_50K/model.safetensors +1 -1
- add_sub_baseline_50K/train_config.json +64 -15
add_sub_baseline_50K/metrics.json
CHANGED
|
@@ -315,946 +315,946 @@
|
|
| 315 |
15600
|
| 316 |
],
|
| 317 |
"loss": [
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
1.
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
0.
|
| 343 |
-
0.
|
| 344 |
-
0.
|
| 345 |
-
0.
|
| 346 |
-
0.
|
| 347 |
-
0.
|
| 348 |
-
0.
|
| 349 |
-
0.
|
| 350 |
-
0.
|
| 351 |
-
0.
|
| 352 |
-
0.
|
| 353 |
-
0.
|
| 354 |
-
0.
|
| 355 |
-
0.
|
| 356 |
-
0.
|
| 357 |
-
0.
|
| 358 |
-
0.
|
| 359 |
-
0.
|
| 360 |
-
0.
|
| 361 |
-
0.
|
| 362 |
-
0.
|
| 363 |
-
0.
|
| 364 |
-
0.
|
| 365 |
-
0.
|
| 366 |
-
0.
|
| 367 |
-
0.
|
| 368 |
-
0.
|
| 369 |
-
0.
|
| 370 |
-
0.
|
| 371 |
-
0.
|
| 372 |
-
0.
|
| 373 |
-
0.
|
| 374 |
-
0.
|
| 375 |
-
0.
|
| 376 |
-
0.
|
| 377 |
-
0.
|
| 378 |
-
0.
|
| 379 |
-
0.
|
| 380 |
-
0.
|
| 381 |
-
0.
|
| 382 |
-
0.
|
| 383 |
-
0.
|
| 384 |
-
0.
|
| 385 |
-
0.
|
| 386 |
-
0.
|
| 387 |
-
0.
|
| 388 |
-
0.
|
| 389 |
-
0.
|
| 390 |
-
0.
|
| 391 |
-
0.
|
| 392 |
-
0.
|
| 393 |
-
0.
|
| 394 |
-
0.
|
| 395 |
-
0.
|
| 396 |
-
0.
|
| 397 |
-
0.
|
| 398 |
-
0.
|
| 399 |
-
0.
|
| 400 |
-
0.
|
| 401 |
-
0.
|
| 402 |
-
0.
|
| 403 |
-
0.
|
| 404 |
-
0.
|
| 405 |
-
0.
|
| 406 |
-
0.
|
| 407 |
-
0.
|
| 408 |
-
0.
|
| 409 |
-
0.
|
| 410 |
-
0.
|
| 411 |
-
0.
|
| 412 |
-
0.
|
| 413 |
-
0.
|
| 414 |
-
0.
|
| 415 |
-
0.
|
| 416 |
-
0.
|
| 417 |
-
0.
|
| 418 |
-
0.
|
| 419 |
-
0.
|
| 420 |
-
0.
|
| 421 |
-
0.
|
| 422 |
-
0.
|
| 423 |
-
0.
|
| 424 |
-
0.
|
| 425 |
-
0.
|
| 426 |
-
0.
|
| 427 |
-
0.
|
| 428 |
-
0.
|
| 429 |
-
0.
|
| 430 |
-
0.
|
| 431 |
-
0.
|
| 432 |
-
0.
|
| 433 |
-
0.
|
| 434 |
-
0.
|
| 435 |
-
0.
|
| 436 |
-
0.
|
| 437 |
-
0.
|
| 438 |
-
0.
|
| 439 |
-
0.
|
| 440 |
-
0.
|
| 441 |
-
0.
|
| 442 |
-
0.
|
| 443 |
-
0.
|
| 444 |
-
0.
|
| 445 |
-
0.
|
| 446 |
-
0.
|
| 447 |
-
0.
|
| 448 |
-
0.
|
| 449 |
-
0.
|
| 450 |
-
0.
|
| 451 |
-
0.
|
| 452 |
-
0.
|
| 453 |
-
0.
|
| 454 |
-
0.
|
| 455 |
-
0.
|
| 456 |
-
0.
|
| 457 |
-
0.
|
| 458 |
-
0.
|
| 459 |
-
0.
|
| 460 |
-
0.
|
| 461 |
-
0.
|
| 462 |
-
0.
|
| 463 |
-
0.
|
| 464 |
-
0.
|
| 465 |
-
0.
|
| 466 |
-
0.
|
| 467 |
-
0.
|
| 468 |
-
0.
|
| 469 |
-
0.
|
| 470 |
-
0.
|
| 471 |
-
0.
|
| 472 |
-
0.
|
| 473 |
-
0.
|
| 474 |
-
0.
|
| 475 |
-
0.
|
| 476 |
-
0.
|
| 477 |
-
0.
|
| 478 |
-
0.
|
| 479 |
-
0.
|
| 480 |
-
0.
|
| 481 |
-
0.
|
| 482 |
-
0.
|
| 483 |
-
0.
|
| 484 |
-
0.
|
| 485 |
-
0.
|
| 486 |
-
0.
|
| 487 |
-
0.
|
| 488 |
-
0.
|
| 489 |
-
0.
|
| 490 |
-
0.
|
| 491 |
-
0.
|
| 492 |
-
0.
|
| 493 |
-
0.
|
| 494 |
-
0.
|
| 495 |
-
0.
|
| 496 |
-
0.
|
| 497 |
-
0.
|
| 498 |
-
0.
|
| 499 |
-
0.
|
| 500 |
-
0.
|
| 501 |
-
0.
|
| 502 |
-
0.
|
| 503 |
-
0.
|
| 504 |
-
0.
|
| 505 |
-
0.
|
| 506 |
-
0.
|
| 507 |
-
0.
|
| 508 |
-
0.
|
| 509 |
-
0.
|
| 510 |
-
0.
|
| 511 |
-
0.
|
| 512 |
-
0.
|
| 513 |
-
0.
|
| 514 |
-
0.
|
| 515 |
-
0.
|
| 516 |
-
0.
|
| 517 |
-
0.
|
| 518 |
-
0.
|
| 519 |
-
0.
|
| 520 |
-
0.
|
| 521 |
-
0.
|
| 522 |
-
0.
|
| 523 |
-
0.
|
| 524 |
-
0.
|
| 525 |
-
0.
|
| 526 |
-
0.
|
| 527 |
-
0.
|
| 528 |
-
0.
|
| 529 |
-
0.
|
| 530 |
-
0.
|
| 531 |
-
0.
|
| 532 |
-
0.
|
| 533 |
-
0.
|
| 534 |
-
|
| 535 |
-
0.
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
0.
|
| 539 |
-
0.
|
| 540 |
-
0.
|
| 541 |
-
0.
|
| 542 |
-
0.
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
0.
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
0.
|
| 556 |
-
0.
|
| 557 |
-
|
| 558 |
-
0.
|
| 559 |
-
|
| 560 |
-
0.
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
0.
|
| 573 |
-
0.
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
0.
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
0.
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
0.
|
| 630 |
],
|
| 631 |
"base_loss": [
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
1.
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
0.
|
| 657 |
-
0.
|
| 658 |
-
0.
|
| 659 |
-
0.
|
| 660 |
-
0.
|
| 661 |
-
0.
|
| 662 |
-
0.
|
| 663 |
-
0.
|
| 664 |
-
0.
|
| 665 |
-
0.
|
| 666 |
-
0.
|
| 667 |
-
0.
|
| 668 |
-
0.
|
| 669 |
-
0.
|
| 670 |
-
0.
|
| 671 |
-
0.
|
| 672 |
-
0.
|
| 673 |
-
0.
|
| 674 |
-
0.
|
| 675 |
-
0.
|
| 676 |
-
0.
|
| 677 |
-
0.
|
| 678 |
-
0.
|
| 679 |
-
0.
|
| 680 |
-
0.
|
| 681 |
-
0.
|
| 682 |
-
0.
|
| 683 |
-
0.
|
| 684 |
-
0.
|
| 685 |
-
0.
|
| 686 |
-
0.
|
| 687 |
-
0.
|
| 688 |
-
0.
|
| 689 |
-
0.
|
| 690 |
-
0.
|
| 691 |
-
0.
|
| 692 |
-
0.
|
| 693 |
-
0.
|
| 694 |
-
0.
|
| 695 |
-
0.
|
| 696 |
-
0.
|
| 697 |
-
0.
|
| 698 |
-
0.
|
| 699 |
-
0.
|
| 700 |
-
0.
|
| 701 |
-
0.
|
| 702 |
-
0.
|
| 703 |
-
0.
|
| 704 |
-
0.
|
| 705 |
-
0.
|
| 706 |
-
0.
|
| 707 |
-
0.
|
| 708 |
-
0.
|
| 709 |
-
0.
|
| 710 |
-
0.
|
| 711 |
-
0.
|
| 712 |
-
0.
|
| 713 |
-
0.
|
| 714 |
-
0.
|
| 715 |
-
0.
|
| 716 |
-
0.
|
| 717 |
-
0.
|
| 718 |
-
0.
|
| 719 |
-
0.
|
| 720 |
-
0.
|
| 721 |
-
0.
|
| 722 |
-
0.
|
| 723 |
-
0.
|
| 724 |
-
0.
|
| 725 |
-
0.
|
| 726 |
-
0.
|
| 727 |
-
0.
|
| 728 |
-
0.
|
| 729 |
-
0.
|
| 730 |
-
0.
|
| 731 |
-
0.
|
| 732 |
-
0.
|
| 733 |
-
0.
|
| 734 |
-
0.
|
| 735 |
-
0.
|
| 736 |
-
0.
|
| 737 |
-
0.
|
| 738 |
-
0.
|
| 739 |
-
0.
|
| 740 |
-
0.
|
| 741 |
-
0.
|
| 742 |
-
0.
|
| 743 |
-
0.
|
| 744 |
-
0.
|
| 745 |
-
0.
|
| 746 |
-
0.
|
| 747 |
-
0.
|
| 748 |
-
0.
|
| 749 |
-
0.
|
| 750 |
-
0.
|
| 751 |
-
0.
|
| 752 |
-
0.
|
| 753 |
-
0.
|
| 754 |
-
0.
|
| 755 |
-
0.
|
| 756 |
-
0.
|
| 757 |
-
0.
|
| 758 |
-
0.
|
| 759 |
-
0.
|
| 760 |
-
0.
|
| 761 |
-
0.
|
| 762 |
-
0.
|
| 763 |
-
0.
|
| 764 |
-
0.
|
| 765 |
-
0.
|
| 766 |
-
0.
|
| 767 |
-
0.
|
| 768 |
-
0.
|
| 769 |
-
0.
|
| 770 |
-
0.
|
| 771 |
-
0.
|
| 772 |
-
0.
|
| 773 |
-
0.
|
| 774 |
-
0.
|
| 775 |
-
0.
|
| 776 |
-
0.
|
| 777 |
-
0.
|
| 778 |
-
0.
|
| 779 |
-
0.
|
| 780 |
-
0.
|
| 781 |
-
0.
|
| 782 |
-
0.
|
| 783 |
-
0.
|
| 784 |
-
0.
|
| 785 |
-
0.
|
| 786 |
-
0.
|
| 787 |
-
0.
|
| 788 |
-
0.
|
| 789 |
-
0.
|
| 790 |
-
0.
|
| 791 |
-
0.
|
| 792 |
-
0.
|
| 793 |
-
0.
|
| 794 |
-
0.
|
| 795 |
-
0.
|
| 796 |
-
0.
|
| 797 |
-
0.
|
| 798 |
-
0.
|
| 799 |
-
0.
|
| 800 |
-
0.
|
| 801 |
-
0.
|
| 802 |
-
0.
|
| 803 |
-
0.
|
| 804 |
-
0.
|
| 805 |
-
0.
|
| 806 |
-
0.
|
| 807 |
-
0.
|
| 808 |
-
0.
|
| 809 |
-
0.
|
| 810 |
-
0.
|
| 811 |
-
0.
|
| 812 |
-
0.
|
| 813 |
-
0.
|
| 814 |
-
0.
|
| 815 |
-
0.
|
| 816 |
-
0.
|
| 817 |
-
0.
|
| 818 |
-
0.
|
| 819 |
-
0.
|
| 820 |
-
0.
|
| 821 |
-
0.
|
| 822 |
-
0.
|
| 823 |
-
0.
|
| 824 |
-
0.
|
| 825 |
-
0.
|
| 826 |
-
0.
|
| 827 |
-
0.
|
| 828 |
-
0.
|
| 829 |
-
0.
|
| 830 |
-
0.
|
| 831 |
-
0.
|
| 832 |
-
0.
|
| 833 |
-
0.
|
| 834 |
-
0.
|
| 835 |
-
0.
|
| 836 |
-
0.
|
| 837 |
-
0.
|
| 838 |
-
0.
|
| 839 |
-
0.
|
| 840 |
-
0.
|
| 841 |
-
0.
|
| 842 |
-
0.
|
| 843 |
-
0.
|
| 844 |
-
0.
|
| 845 |
-
0.
|
| 846 |
-
0.
|
| 847 |
-
0.
|
| 848 |
-
|
| 849 |
-
0.
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
0.
|
| 853 |
-
0.
|
| 854 |
-
0.
|
| 855 |
-
0.
|
| 856 |
-
0.
|
| 857 |
-
|
| 858 |
-
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
0.
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
0.
|
| 870 |
-
0.
|
| 871 |
-
|
| 872 |
-
0.
|
| 873 |
-
|
| 874 |
-
0.
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
0.
|
| 887 |
-
0.
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
0.
|
| 920 |
-
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
-
|
| 929 |
-
|
| 930 |
-
|
| 931 |
-
|
| 932 |
-
0.
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
|
| 943 |
-
0.
|
| 944 |
],
|
| 945 |
"lr": [
|
| 946 |
-
|
| 947 |
-
|
| 948 |
-
|
| 949 |
-
|
| 950 |
-
|
| 951 |
-
|
| 952 |
-
|
| 953 |
-
|
| 954 |
-
|
| 955 |
-
|
| 956 |
-
|
| 957 |
-
|
| 958 |
-
|
| 959 |
-
|
| 960 |
-
|
| 961 |
-
|
| 962 |
-
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
|
| 968 |
-
|
| 969 |
-
3.
|
| 970 |
-
3.
|
| 971 |
-
3.
|
| 972 |
-
3.
|
| 973 |
-
3.
|
| 974 |
-
3.
|
| 975 |
-
3.
|
| 976 |
-
3.
|
| 977 |
-
|
| 978 |
-
|
| 979 |
-
|
| 980 |
-
|
| 981 |
-
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
|
| 987 |
-
|
| 988 |
-
|
| 989 |
-
|
| 990 |
-
|
| 991 |
-
|
| 992 |
-
|
| 993 |
-
|
| 994 |
-
|
| 995 |
-
|
| 996 |
-
|
| 997 |
-
|
| 998 |
-
|
| 999 |
-
|
| 1000 |
-
|
| 1001 |
-
|
| 1002 |
-
|
| 1003 |
-
|
| 1004 |
-
|
| 1005 |
-
|
| 1006 |
-
|
| 1007 |
-
|
| 1008 |
-
|
| 1009 |
-
|
| 1010 |
-
|
| 1011 |
-
|
| 1012 |
-
|
| 1013 |
-
|
| 1014 |
-
|
| 1015 |
-
|
| 1016 |
-
|
| 1017 |
-
|
| 1018 |
-
|
| 1019 |
-
|
| 1020 |
-
|
| 1021 |
-
|
| 1022 |
-
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
|
| 1026 |
-
|
| 1027 |
-
|
| 1028 |
-
|
| 1029 |
-
|
| 1030 |
-
|
| 1031 |
-
|
| 1032 |
-
|
| 1033 |
-
|
| 1034 |
-
|
| 1035 |
-
|
| 1036 |
-
|
| 1037 |
-
|
| 1038 |
-
|
| 1039 |
-
|
| 1040 |
-
|
| 1041 |
-
|
| 1042 |
-
|
| 1043 |
-
|
| 1044 |
-
|
| 1045 |
-
|
| 1046 |
-
|
| 1047 |
-
|
| 1048 |
-
|
| 1049 |
-
|
| 1050 |
-
|
| 1051 |
-
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
|
| 1059 |
-
|
| 1060 |
-
|
| 1061 |
-
|
| 1062 |
-
|
| 1063 |
-
|
| 1064 |
-
|
| 1065 |
-
|
| 1066 |
-
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
|
| 1074 |
-
|
| 1075 |
-
|
| 1076 |
-
|
| 1077 |
-
|
| 1078 |
-
|
| 1079 |
-
|
| 1080 |
-
|
| 1081 |
-
|
| 1082 |
-
|
| 1083 |
-
|
| 1084 |
-
|
| 1085 |
-
|
| 1086 |
-
|
| 1087 |
-
|
| 1088 |
-
|
| 1089 |
-
|
| 1090 |
-
|
| 1091 |
-
|
| 1092 |
-
|
| 1093 |
-
|
| 1094 |
-
|
| 1095 |
-
|
| 1096 |
-
|
| 1097 |
-
|
| 1098 |
-
|
| 1099 |
-
|
| 1100 |
-
|
| 1101 |
-
|
| 1102 |
-
|
| 1103 |
-
|
| 1104 |
-
|
| 1105 |
-
|
| 1106 |
-
|
| 1107 |
-
|
| 1108 |
-
|
| 1109 |
-
|
| 1110 |
-
|
| 1111 |
-
|
| 1112 |
-
|
| 1113 |
-
|
| 1114 |
-
|
| 1115 |
-
|
| 1116 |
-
|
| 1117 |
-
|
| 1118 |
-
|
| 1119 |
-
|
| 1120 |
-
|
| 1121 |
-
|
| 1122 |
-
|
| 1123 |
-
|
| 1124 |
-
|
| 1125 |
-
|
| 1126 |
-
|
| 1127 |
-
|
| 1128 |
-
|
| 1129 |
-
|
| 1130 |
-
|
| 1131 |
-
|
| 1132 |
-
|
| 1133 |
-
|
| 1134 |
-
|
| 1135 |
-
|
| 1136 |
-
|
| 1137 |
-
|
| 1138 |
-
|
| 1139 |
-
|
| 1140 |
-
|
| 1141 |
-
|
| 1142 |
-
|
| 1143 |
-
|
| 1144 |
-
|
| 1145 |
-
|
| 1146 |
-
|
| 1147 |
-
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
|
| 1160 |
-
|
| 1161 |
-
|
| 1162 |
-
|
| 1163 |
-
|
| 1164 |
-
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
|
| 1169 |
-
|
| 1170 |
-
|
| 1171 |
-
|
| 1172 |
-
|
| 1173 |
-
|
| 1174 |
-
|
| 1175 |
-
|
| 1176 |
-
|
| 1177 |
-
|
| 1178 |
-
|
| 1179 |
-
|
| 1180 |
-
|
| 1181 |
-
|
| 1182 |
-
|
| 1183 |
-
|
| 1184 |
-
|
| 1185 |
-
|
| 1186 |
-
|
| 1187 |
-
|
| 1188 |
-
|
| 1189 |
-
|
| 1190 |
-
|
| 1191 |
-
|
| 1192 |
-
|
| 1193 |
-
|
| 1194 |
-
|
| 1195 |
-
|
| 1196 |
-
|
| 1197 |
-
|
| 1198 |
-
|
| 1199 |
-
|
| 1200 |
-
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
|
| 1204 |
-
|
| 1205 |
-
|
| 1206 |
-
|
| 1207 |
-
|
| 1208 |
-
|
| 1209 |
-
|
| 1210 |
-
|
| 1211 |
-
|
| 1212 |
-
|
| 1213 |
-
|
| 1214 |
-
|
| 1215 |
-
|
| 1216 |
-
|
| 1217 |
-
|
| 1218 |
-
|
| 1219 |
-
|
| 1220 |
-
|
| 1221 |
-
|
| 1222 |
-
|
| 1223 |
-
|
| 1224 |
-
|
| 1225 |
-
|
| 1226 |
-
|
| 1227 |
-
|
| 1228 |
-
|
| 1229 |
-
|
| 1230 |
-
|
| 1231 |
-
|
| 1232 |
-
|
| 1233 |
-
|
| 1234 |
-
|
| 1235 |
-
|
| 1236 |
-
|
| 1237 |
-
|
| 1238 |
-
|
| 1239 |
-
|
| 1240 |
-
|
| 1241 |
-
|
| 1242 |
-
|
| 1243 |
-
|
| 1244 |
-
|
| 1245 |
-
|
| 1246 |
-
|
| 1247 |
-
|
| 1248 |
-
|
| 1249 |
-
|
| 1250 |
-
|
| 1251 |
-
|
| 1252 |
-
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
|
| 1258 |
],
|
| 1259 |
"eval_step": [
|
| 1260 |
781,
|
|
@@ -1301,189 +1301,189 @@
|
|
| 1301 |
20
|
| 1302 |
],
|
| 1303 |
"eval_accuracy": [
|
| 1304 |
-
0.
|
| 1305 |
-
0.
|
| 1306 |
-
0.
|
| 1307 |
-
0.
|
| 1308 |
-
0.
|
| 1309 |
-
0.
|
| 1310 |
-
0.
|
| 1311 |
-
0.
|
| 1312 |
-
0.
|
| 1313 |
-
0.
|
| 1314 |
-
0.
|
| 1315 |
-
0.
|
| 1316 |
-
0.
|
| 1317 |
-
0.
|
| 1318 |
-
0.
|
| 1319 |
-
0.
|
| 1320 |
-
|
| 1321 |
-
|
| 1322 |
-
|
| 1323 |
-
|
| 1324 |
]
|
| 1325 |
},
|
| 1326 |
-
"final_accuracy":
|
| 1327 |
"sft_eval": {
|
| 1328 |
"config": {
|
| 1329 |
"ops": "add_sub",
|
| 1330 |
"K": null,
|
| 1331 |
"mode": "sft",
|
| 1332 |
"n_digits": 6,
|
| 1333 |
-
"n_per_split":
|
| 1334 |
},
|
| 1335 |
"splits": {
|
| 1336 |
"add_S0": {
|
| 1337 |
"full_accuracy": 1.0,
|
| 1338 |
-
"n_examples":
|
| 1339 |
"per_subtask": {
|
| 1340 |
"SA": {
|
| 1341 |
"accuracy": 1.0,
|
| 1342 |
-
"count":
|
| 1343 |
},
|
| 1344 |
"SS": {
|
| 1345 |
"accuracy": 1.0,
|
| 1346 |
-
"count":
|
| 1347 |
}
|
| 1348 |
}
|
| 1349 |
},
|
| 1350 |
"add_S1": {
|
| 1351 |
"full_accuracy": 1.0,
|
| 1352 |
-
"n_examples":
|
| 1353 |
"per_subtask": {
|
| 1354 |
"SA": {
|
| 1355 |
"accuracy": 1.0,
|
| 1356 |
-
"count":
|
| 1357 |
},
|
| 1358 |
"SC": {
|
| 1359 |
"accuracy": 1.0,
|
| 1360 |
-
"count":
|
| 1361 |
},
|
| 1362 |
"SS": {
|
| 1363 |
"accuracy": 1.0,
|
| 1364 |
-
"count":
|
| 1365 |
},
|
| 1366 |
"UC": {
|
| 1367 |
"accuracy": 1.0,
|
| 1368 |
-
"count":
|
| 1369 |
}
|
| 1370 |
}
|
| 1371 |
},
|
| 1372 |
"add_S2": {
|
| 1373 |
"full_accuracy": 1.0,
|
| 1374 |
-
"n_examples":
|
| 1375 |
"per_subtask": {
|
| 1376 |
"SA": {
|
| 1377 |
"accuracy": 1.0,
|
| 1378 |
-
"count":
|
| 1379 |
},
|
| 1380 |
"SC": {
|
| 1381 |
"accuracy": 1.0,
|
| 1382 |
-
"count":
|
| 1383 |
},
|
| 1384 |
"SS": {
|
| 1385 |
"accuracy": 1.0,
|
| 1386 |
-
"count":
|
| 1387 |
},
|
| 1388 |
"UC": {
|
| 1389 |
"accuracy": 1.0,
|
| 1390 |
-
"count":
|
| 1391 |
},
|
| 1392 |
"US": {
|
| 1393 |
"accuracy": 1.0,
|
| 1394 |
-
"count":
|
| 1395 |
}
|
| 1396 |
}
|
| 1397 |
},
|
| 1398 |
"add_S3": {
|
| 1399 |
"full_accuracy": 1.0,
|
| 1400 |
-
"n_examples":
|
| 1401 |
"per_subtask": {
|
| 1402 |
"SA": {
|
| 1403 |
"accuracy": 1.0,
|
| 1404 |
-
"count":
|
| 1405 |
},
|
| 1406 |
"SC": {
|
| 1407 |
"accuracy": 1.0,
|
| 1408 |
-
"count":
|
| 1409 |
},
|
| 1410 |
"SS": {
|
| 1411 |
"accuracy": 1.0,
|
| 1412 |
-
"count":
|
| 1413 |
},
|
| 1414 |
"UC": {
|
| 1415 |
"accuracy": 1.0,
|
| 1416 |
-
"count":
|
| 1417 |
},
|
| 1418 |
"US": {
|
| 1419 |
"accuracy": 1.0,
|
| 1420 |
-
"count":
|
| 1421 |
}
|
| 1422 |
}
|
| 1423 |
},
|
| 1424 |
"add_S4": {
|
| 1425 |
"full_accuracy": 1.0,
|
| 1426 |
-
"n_examples":
|
| 1427 |
"per_subtask": {
|
| 1428 |
"SA": {
|
| 1429 |
"accuracy": 1.0,
|
| 1430 |
-
"count":
|
| 1431 |
},
|
| 1432 |
"SC": {
|
| 1433 |
"accuracy": 1.0,
|
| 1434 |
-
"count":
|
| 1435 |
},
|
| 1436 |
"SS": {
|
| 1437 |
"accuracy": 1.0,
|
| 1438 |
-
"count":
|
| 1439 |
},
|
| 1440 |
"UC": {
|
| 1441 |
"accuracy": 1.0,
|
| 1442 |
-
"count":
|
| 1443 |
},
|
| 1444 |
"US": {
|
| 1445 |
"accuracy": 1.0,
|
| 1446 |
-
"count":
|
| 1447 |
}
|
| 1448 |
}
|
| 1449 |
},
|
| 1450 |
"add_S5": {
|
| 1451 |
-
"full_accuracy":
|
| 1452 |
-
"n_examples":
|
| 1453 |
"per_subtask": {
|
| 1454 |
"SA": {
|
| 1455 |
"accuracy": 1.0,
|
| 1456 |
-
"count":
|
| 1457 |
},
|
| 1458 |
"SC": {
|
| 1459 |
"accuracy": 1.0,
|
| 1460 |
-
"count":
|
| 1461 |
},
|
| 1462 |
"UC": {
|
| 1463 |
-
"accuracy":
|
| 1464 |
-
"count":
|
| 1465 |
},
|
| 1466 |
"US": {
|
| 1467 |
-
"accuracy":
|
| 1468 |
-
"count":
|
| 1469 |
}
|
| 1470 |
}
|
| 1471 |
},
|
| 1472 |
"add_S6": {
|
| 1473 |
-
"full_accuracy":
|
| 1474 |
-
"n_examples":
|
| 1475 |
"per_subtask": {
|
| 1476 |
"SC": {
|
| 1477 |
"accuracy": 1.0,
|
| 1478 |
-
"count":
|
| 1479 |
},
|
| 1480 |
"UC": {
|
| 1481 |
-
"accuracy":
|
| 1482 |
-
"count":
|
| 1483 |
},
|
| 1484 |
"US": {
|
| 1485 |
-
"accuracy":
|
| 1486 |
-
"count":
|
| 1487 |
}
|
| 1488 |
}
|
| 1489 |
},
|
|
@@ -1493,239 +1493,239 @@
|
|
| 1493 |
"per_subtask": {
|
| 1494 |
"SA": {
|
| 1495 |
"accuracy": 1.0,
|
| 1496 |
-
"count":
|
| 1497 |
},
|
| 1498 |
"SC": {
|
| 1499 |
"accuracy": 1.0,
|
| 1500 |
-
"count":
|
| 1501 |
},
|
| 1502 |
"SS": {
|
| 1503 |
"accuracy": 1.0,
|
| 1504 |
-
"count":
|
| 1505 |
},
|
| 1506 |
"UC": {
|
| 1507 |
"accuracy": 1.0,
|
| 1508 |
-
"count":
|
| 1509 |
},
|
| 1510 |
"US": {
|
| 1511 |
"accuracy": 1.0,
|
| 1512 |
-
"count":
|
| 1513 |
}
|
| 1514 |
}
|
| 1515 |
},
|
| 1516 |
"add_C3": {
|
| 1517 |
"full_accuracy": 1.0,
|
| 1518 |
-
"n_examples":
|
| 1519 |
"per_subtask": {
|
| 1520 |
"SA": {
|
| 1521 |
"accuracy": 1.0,
|
| 1522 |
-
"count":
|
| 1523 |
},
|
| 1524 |
"SC": {
|
| 1525 |
"accuracy": 1.0,
|
| 1526 |
-
"count":
|
| 1527 |
},
|
| 1528 |
"UC": {
|
| 1529 |
"accuracy": 1.0,
|
| 1530 |
-
"count":
|
| 1531 |
},
|
| 1532 |
"US": {
|
| 1533 |
"accuracy": 1.0,
|
| 1534 |
-
"count":
|
| 1535 |
}
|
| 1536 |
}
|
| 1537 |
},
|
| 1538 |
"add_C4": {
|
| 1539 |
"full_accuracy": 1.0,
|
| 1540 |
-
"n_examples":
|
| 1541 |
"per_subtask": {
|
| 1542 |
"SA": {
|
| 1543 |
"accuracy": 1.0,
|
| 1544 |
-
"count":
|
| 1545 |
},
|
| 1546 |
"SC": {
|
| 1547 |
"accuracy": 1.0,
|
| 1548 |
-
"count":
|
| 1549 |
},
|
| 1550 |
"UC": {
|
| 1551 |
"accuracy": 1.0,
|
| 1552 |
-
"count":
|
| 1553 |
},
|
| 1554 |
"US": {
|
| 1555 |
"accuracy": 1.0,
|
| 1556 |
-
"count":
|
| 1557 |
}
|
| 1558 |
}
|
| 1559 |
},
|
| 1560 |
"add_C5": {
|
| 1561 |
-
"full_accuracy":
|
| 1562 |
-
"n_examples":
|
| 1563 |
"per_subtask": {
|
| 1564 |
"SA": {
|
| 1565 |
"accuracy": 1.0,
|
| 1566 |
-
"count":
|
| 1567 |
},
|
| 1568 |
"SC": {
|
| 1569 |
"accuracy": 1.0,
|
| 1570 |
-
"count":
|
| 1571 |
},
|
| 1572 |
"UC": {
|
| 1573 |
-
"accuracy":
|
| 1574 |
-
"count":
|
| 1575 |
},
|
| 1576 |
"US": {
|
| 1577 |
"accuracy": 1.0,
|
| 1578 |
-
"count":
|
| 1579 |
}
|
| 1580 |
}
|
| 1581 |
},
|
| 1582 |
"add_C6": {
|
| 1583 |
"full_accuracy": 1.0,
|
| 1584 |
-
"n_examples":
|
| 1585 |
"per_subtask": {
|
| 1586 |
"SC": {
|
| 1587 |
"accuracy": 1.0,
|
| 1588 |
-
"count":
|
| 1589 |
},
|
| 1590 |
"UC": {
|
| 1591 |
"accuracy": 1.0,
|
| 1592 |
-
"count":
|
| 1593 |
},
|
| 1594 |
"US": {
|
| 1595 |
"accuracy": 1.0,
|
| 1596 |
-
"count":
|
| 1597 |
}
|
| 1598 |
}
|
| 1599 |
},
|
| 1600 |
"sub_M0": {
|
| 1601 |
"full_accuracy": 1.0,
|
| 1602 |
-
"n_examples":
|
| 1603 |
"per_subtask": {
|
| 1604 |
"MD": {
|
| 1605 |
"accuracy": 1.0,
|
| 1606 |
-
"count":
|
| 1607 |
},
|
| 1608 |
"ME": {
|
| 1609 |
"accuracy": 1.0,
|
| 1610 |
-
"count":
|
| 1611 |
}
|
| 1612 |
}
|
| 1613 |
},
|
| 1614 |
"sub_M1": {
|
| 1615 |
"full_accuracy": 1.0,
|
| 1616 |
-
"n_examples":
|
| 1617 |
"per_subtask": {
|
| 1618 |
"MD": {
|
| 1619 |
"accuracy": 1.0,
|
| 1620 |
-
"count":
|
| 1621 |
},
|
| 1622 |
"MB": {
|
| 1623 |
"accuracy": 1.0,
|
| 1624 |
-
"count":
|
| 1625 |
},
|
| 1626 |
"ME": {
|
| 1627 |
"accuracy": 1.0,
|
| 1628 |
-
"count":
|
| 1629 |
},
|
| 1630 |
"UB": {
|
| 1631 |
"accuracy": 1.0,
|
| 1632 |
-
"count":
|
| 1633 |
}
|
| 1634 |
}
|
| 1635 |
},
|
| 1636 |
"sub_M2": {
|
| 1637 |
"full_accuracy": 1.0,
|
| 1638 |
-
"n_examples":
|
| 1639 |
"per_subtask": {
|
| 1640 |
"MD": {
|
| 1641 |
"accuracy": 1.0,
|
| 1642 |
-
"count":
|
| 1643 |
},
|
| 1644 |
"MB": {
|
| 1645 |
"accuracy": 1.0,
|
| 1646 |
-
"count":
|
| 1647 |
},
|
| 1648 |
"ME": {
|
| 1649 |
"accuracy": 1.0,
|
| 1650 |
-
"count":
|
| 1651 |
},
|
| 1652 |
"UB": {
|
| 1653 |
"accuracy": 1.0,
|
| 1654 |
-
"count":
|
| 1655 |
},
|
| 1656 |
"UD": {
|
| 1657 |
"accuracy": 1.0,
|
| 1658 |
-
"count":
|
| 1659 |
}
|
| 1660 |
}
|
| 1661 |
},
|
| 1662 |
"sub_M3": {
|
| 1663 |
"full_accuracy": 1.0,
|
| 1664 |
-
"n_examples":
|
| 1665 |
"per_subtask": {
|
| 1666 |
"MD": {
|
| 1667 |
"accuracy": 1.0,
|
| 1668 |
-
"count":
|
| 1669 |
},
|
| 1670 |
"MB": {
|
| 1671 |
"accuracy": 1.0,
|
| 1672 |
-
"count":
|
| 1673 |
},
|
| 1674 |
"ME": {
|
| 1675 |
"accuracy": 1.0,
|
| 1676 |
-
"count":
|
| 1677 |
},
|
| 1678 |
"UB": {
|
| 1679 |
"accuracy": 1.0,
|
| 1680 |
-
"count":
|
| 1681 |
},
|
| 1682 |
"UD": {
|
| 1683 |
"accuracy": 1.0,
|
| 1684 |
-
"count":
|
| 1685 |
}
|
| 1686 |
}
|
| 1687 |
},
|
| 1688 |
"sub_M4": {
|
| 1689 |
-
"full_accuracy":
|
| 1690 |
-
"n_examples":
|
| 1691 |
"per_subtask": {
|
| 1692 |
"MD": {
|
| 1693 |
"accuracy": 1.0,
|
| 1694 |
-
"count":
|
| 1695 |
},
|
| 1696 |
"MB": {
|
| 1697 |
"accuracy": 1.0,
|
| 1698 |
-
"count":
|
| 1699 |
},
|
| 1700 |
"UB": {
|
| 1701 |
-
"accuracy":
|
| 1702 |
-
"count":
|
| 1703 |
},
|
| 1704 |
"UD": {
|
| 1705 |
"accuracy": 1.0,
|
| 1706 |
-
"count":
|
| 1707 |
}
|
| 1708 |
}
|
| 1709 |
},
|
| 1710 |
"sub_M5": {
|
| 1711 |
-
"full_accuracy":
|
| 1712 |
-
"n_examples":
|
| 1713 |
"per_subtask": {
|
| 1714 |
"MD": {
|
| 1715 |
"accuracy": 1.0,
|
| 1716 |
-
"count":
|
| 1717 |
},
|
| 1718 |
"MB": {
|
| 1719 |
"accuracy": 1.0,
|
| 1720 |
-
"count":
|
| 1721 |
},
|
| 1722 |
"UB": {
|
| 1723 |
"accuracy": 1.0,
|
| 1724 |
-
"count":
|
| 1725 |
},
|
| 1726 |
"UD": {
|
| 1727 |
-
"accuracy":
|
| 1728 |
-
"count":
|
| 1729 |
}
|
| 1730 |
}
|
| 1731 |
},
|
|
@@ -1735,96 +1735,96 @@
|
|
| 1735 |
"per_subtask": {
|
| 1736 |
"MD": {
|
| 1737 |
"accuracy": 1.0,
|
| 1738 |
-
"count":
|
| 1739 |
},
|
| 1740 |
"MB": {
|
| 1741 |
"accuracy": 1.0,
|
| 1742 |
-
"count":
|
| 1743 |
},
|
| 1744 |
"ME": {
|
| 1745 |
"accuracy": 1.0,
|
| 1746 |
-
"count":
|
| 1747 |
},
|
| 1748 |
"UB": {
|
| 1749 |
"accuracy": 1.0,
|
| 1750 |
-
"count":
|
| 1751 |
},
|
| 1752 |
"UD": {
|
| 1753 |
"accuracy": 1.0,
|
| 1754 |
-
"count":
|
| 1755 |
}
|
| 1756 |
}
|
| 1757 |
},
|
| 1758 |
"sub_B3": {
|
| 1759 |
"full_accuracy": 1.0,
|
| 1760 |
-
"n_examples":
|
| 1761 |
"per_subtask": {
|
| 1762 |
"MD": {
|
| 1763 |
"accuracy": 1.0,
|
| 1764 |
-
"count":
|
| 1765 |
},
|
| 1766 |
"MB": {
|
| 1767 |
"accuracy": 1.0,
|
| 1768 |
-
"count":
|
| 1769 |
},
|
| 1770 |
"UB": {
|
| 1771 |
"accuracy": 1.0,
|
| 1772 |
-
"count":
|
| 1773 |
},
|
| 1774 |
"UD": {
|
| 1775 |
"accuracy": 1.0,
|
| 1776 |
-
"count":
|
| 1777 |
}
|
| 1778 |
}
|
| 1779 |
},
|
| 1780 |
"sub_B4": {
|
| 1781 |
"full_accuracy": 1.0,
|
| 1782 |
-
"n_examples":
|
| 1783 |
"per_subtask": {
|
| 1784 |
"MD": {
|
| 1785 |
"accuracy": 1.0,
|
| 1786 |
-
"count":
|
| 1787 |
},
|
| 1788 |
"MB": {
|
| 1789 |
"accuracy": 1.0,
|
| 1790 |
-
"count":
|
| 1791 |
},
|
| 1792 |
"UB": {
|
| 1793 |
"accuracy": 1.0,
|
| 1794 |
-
"count":
|
| 1795 |
},
|
| 1796 |
"UD": {
|
| 1797 |
"accuracy": 1.0,
|
| 1798 |
-
"count":
|
| 1799 |
}
|
| 1800 |
}
|
| 1801 |
},
|
| 1802 |
"sub_B5": {
|
| 1803 |
"full_accuracy": 1.0,
|
| 1804 |
-
"n_examples":
|
| 1805 |
"per_subtask": {
|
| 1806 |
"MD": {
|
| 1807 |
"accuracy": 1.0,
|
| 1808 |
-
"count":
|
| 1809 |
},
|
| 1810 |
"MB": {
|
| 1811 |
"accuracy": 1.0,
|
| 1812 |
-
"count":
|
| 1813 |
},
|
| 1814 |
"UB": {
|
| 1815 |
"accuracy": 1.0,
|
| 1816 |
-
"count":
|
| 1817 |
},
|
| 1818 |
"UD": {
|
| 1819 |
"accuracy": 1.0,
|
| 1820 |
-
"count":
|
| 1821 |
}
|
| 1822 |
}
|
| 1823 |
}
|
| 1824 |
},
|
| 1825 |
"summary": {
|
| 1826 |
-
"overall_accuracy":
|
| 1827 |
-
"total_examples":
|
| 1828 |
"n_splits": 22
|
| 1829 |
}
|
| 1830 |
}
|
|
|
|
| 315 |
15600
|
| 316 |
],
|
| 317 |
"loss": [
|
| 318 |
+
10.58569622039795,
|
| 319 |
+
7.936840534210205,
|
| 320 |
+
7.2051615715026855,
|
| 321 |
+
6.292751789093018,
|
| 322 |
+
5.380630016326904,
|
| 323 |
+
4.158393383026123,
|
| 324 |
+
2.91589093208313,
|
| 325 |
+
2.2648446559906006,
|
| 326 |
+
1.9414230585098267,
|
| 327 |
+
1.8107651472091675,
|
| 328 |
+
1.800335168838501,
|
| 329 |
+
1.7123852968215942,
|
| 330 |
+
1.710773229598999,
|
| 331 |
+
1.6536633968353271,
|
| 332 |
+
1.5563157796859741,
|
| 333 |
+
1.4610322713851929,
|
| 334 |
+
1.153622031211853,
|
| 335 |
+
0.8404688835144043,
|
| 336 |
+
0.6217839121818542,
|
| 337 |
+
0.5448878407478333,
|
| 338 |
+
0.4241843819618225,
|
| 339 |
+
0.34047311544418335,
|
| 340 |
+
0.2867976725101471,
|
| 341 |
+
0.2169194221496582,
|
| 342 |
+
0.13546958565711975,
|
| 343 |
+
0.12438952922821045,
|
| 344 |
+
0.11186210811138153,
|
| 345 |
+
0.0944901555776596,
|
| 346 |
+
0.1039363369345665,
|
| 347 |
+
0.08501303195953369,
|
| 348 |
+
0.11829841136932373,
|
| 349 |
+
0.04217684641480446,
|
| 350 |
+
0.09959305822849274,
|
| 351 |
+
0.030621467158198357,
|
| 352 |
+
0.047111596912145615,
|
| 353 |
+
0.06351909786462784,
|
| 354 |
+
0.050098877400159836,
|
| 355 |
+
0.03927459940314293,
|
| 356 |
+
0.0296621173620224,
|
| 357 |
+
0.05172724649310112,
|
| 358 |
+
0.025398079305887222,
|
| 359 |
+
0.0191025547683239,
|
| 360 |
+
0.02552829682826996,
|
| 361 |
+
0.014332198537886143,
|
| 362 |
+
0.020101871341466904,
|
| 363 |
+
0.016612058505415916,
|
| 364 |
+
0.02385997399687767,
|
| 365 |
+
0.008157567121088505,
|
| 366 |
+
0.026038693264126778,
|
| 367 |
+
0.01184095162898302,
|
| 368 |
+
0.02082441933453083,
|
| 369 |
+
0.038108620792627335,
|
| 370 |
+
0.031920164823532104,
|
| 371 |
+
0.023155227303504944,
|
| 372 |
+
0.029825836420059204,
|
| 373 |
+
0.009044607169926167,
|
| 374 |
+
0.027161160483956337,
|
| 375 |
+
0.007930578663945198,
|
| 376 |
+
0.021433621644973755,
|
| 377 |
+
0.005527331493794918,
|
| 378 |
+
0.007499660365283489,
|
| 379 |
+
0.017420727759599686,
|
| 380 |
+
0.014103187248110771,
|
| 381 |
+
0.011276716366410255,
|
| 382 |
+
0.012983249500393867,
|
| 383 |
+
0.030153032392263412,
|
| 384 |
+
0.008853526785969734,
|
| 385 |
+
0.03762682154774666,
|
| 386 |
+
0.01117522269487381,
|
| 387 |
+
0.023195743560791016,
|
| 388 |
+
0.013249148614704609,
|
| 389 |
+
0.014523004181683064,
|
| 390 |
+
0.0055778538808226585,
|
| 391 |
+
0.020841525867581367,
|
| 392 |
+
0.014729819260537624,
|
| 393 |
+
0.013070695102214813,
|
| 394 |
+
0.00855342485010624,
|
| 395 |
+
0.011591176502406597,
|
| 396 |
+
0.002833534497767687,
|
| 397 |
+
0.025547100231051445,
|
| 398 |
+
0.012554551474750042,
|
| 399 |
+
0.009271000511944294,
|
| 400 |
+
0.01295425184071064,
|
| 401 |
+
0.010510939173400402,
|
| 402 |
+
0.007462501060217619,
|
| 403 |
+
0.014900215901434422,
|
| 404 |
+
0.014310033991932869,
|
| 405 |
+
0.008596707135438919,
|
| 406 |
+
0.001958919456228614,
|
| 407 |
+
0.0046486821956932545,
|
| 408 |
+
0.018444955348968506,
|
| 409 |
+
0.008556962944567204,
|
| 410 |
+
0.004552661441266537,
|
| 411 |
+
0.027067942544817924,
|
| 412 |
+
0.00958697684109211,
|
| 413 |
+
0.009366557002067566,
|
| 414 |
+
0.004811967723071575,
|
| 415 |
+
0.013344655744731426,
|
| 416 |
+
0.008742153644561768,
|
| 417 |
+
0.008907586336135864,
|
| 418 |
+
0.007083355449140072,
|
| 419 |
+
0.015731360763311386,
|
| 420 |
+
0.00568253081291914,
|
| 421 |
+
0.001037583569996059,
|
| 422 |
+
0.0031601584050804377,
|
| 423 |
+
0.0036910015624016523,
|
| 424 |
+
0.006643303204327822,
|
| 425 |
+
0.020221156999468803,
|
| 426 |
+
0.011893088929355145,
|
| 427 |
+
0.005519048310816288,
|
| 428 |
+
0.023880789056420326,
|
| 429 |
+
0.01644163765013218,
|
| 430 |
+
0.026987669989466667,
|
| 431 |
+
0.0011081791017204523,
|
| 432 |
+
0.0033056442625820637,
|
| 433 |
+
0.000858158920891583,
|
| 434 |
+
0.009378315880894661,
|
| 435 |
+
0.005004131700843573,
|
| 436 |
+
0.002976806601509452,
|
| 437 |
+
0.007938246242702007,
|
| 438 |
+
0.00408149091526866,
|
| 439 |
+
0.010652265511453152,
|
| 440 |
+
0.006073917727917433,
|
| 441 |
+
0.018400464206933975,
|
| 442 |
+
0.0039037999231368303,
|
| 443 |
+
0.01426150742918253,
|
| 444 |
+
0.003052217187359929,
|
| 445 |
+
0.016827749088406563,
|
| 446 |
+
0.0037939571775496006,
|
| 447 |
+
0.000831973273307085,
|
| 448 |
+
0.002118675271049142,
|
| 449 |
+
0.004233058542013168,
|
| 450 |
+
0.0016138197388499975,
|
| 451 |
+
0.010185384191572666,
|
| 452 |
+
0.0007464668597094715,
|
| 453 |
+
0.00029250283841975033,
|
| 454 |
+
0.01994812674820423,
|
| 455 |
+
0.0005010344320908189,
|
| 456 |
+
0.006236571352928877,
|
| 457 |
+
0.002717872615903616,
|
| 458 |
+
0.002377332653850317,
|
| 459 |
+
0.007453155238181353,
|
| 460 |
+
0.0005609996733255684,
|
| 461 |
+
0.0011483365669846535,
|
| 462 |
+
0.00160598277579993,
|
| 463 |
+
0.0012431765208020806,
|
| 464 |
+
0.000852460041642189,
|
| 465 |
+
0.0023972985800355673,
|
| 466 |
+
0.0035680646542459726,
|
| 467 |
+
0.004298224579542875,
|
| 468 |
+
0.0015388120664283633,
|
| 469 |
+
0.0022699700202792883,
|
| 470 |
+
0.0015617648605257273,
|
| 471 |
+
0.0017532998463138938,
|
| 472 |
+
0.0030310507863759995,
|
| 473 |
+
0.002370015950873494,
|
| 474 |
+
0.0003038027498405427,
|
| 475 |
+
0.0020328103564679623,
|
| 476 |
+
0.0005833978648297489,
|
| 477 |
+
0.0011084805009886622,
|
| 478 |
+
0.0037475605495274067,
|
| 479 |
+
0.001984222559258342,
|
| 480 |
+
0.0002572322264313698,
|
| 481 |
+
0.012740428559482098,
|
| 482 |
+
0.013420491479337215,
|
| 483 |
+
0.0002775133471004665,
|
| 484 |
+
0.0007172977202571929,
|
| 485 |
+
0.0003056035202462226,
|
| 486 |
+
0.006681203842163086,
|
| 487 |
+
0.0008299726760014892,
|
| 488 |
+
0.002304975176230073,
|
| 489 |
+
0.004181632772088051,
|
| 490 |
+
0.00016523328667972237,
|
| 491 |
+
0.00445162458345294,
|
| 492 |
+
0.0003404470335226506,
|
| 493 |
+
0.00019811275706160814,
|
| 494 |
+
0.00017772385035641491,
|
| 495 |
+
0.00016012518608476967,
|
| 496 |
+
0.0027634704019874334,
|
| 497 |
+
0.00020777643658220768,
|
| 498 |
+
0.014718319289386272,
|
| 499 |
+
0.000357446086127311,
|
| 500 |
+
0.002793132560327649,
|
| 501 |
+
0.001239171950146556,
|
| 502 |
+
0.0007129187579266727,
|
| 503 |
+
0.0010272933868691325,
|
| 504 |
+
0.00018213230941910297,
|
| 505 |
+
0.000530033721588552,
|
| 506 |
+
0.0005569524364545941,
|
| 507 |
+
0.003903051372617483,
|
| 508 |
+
0.0002563659509178251,
|
| 509 |
+
0.00018147526134271175,
|
| 510 |
+
0.0007504919194616377,
|
| 511 |
+
0.0001638331450521946,
|
| 512 |
+
0.0011349172564223409,
|
| 513 |
+
0.0035133049823343754,
|
| 514 |
+
0.0009539271704852581,
|
| 515 |
+
0.002135826274752617,
|
| 516 |
+
0.0004690276109613478,
|
| 517 |
+
0.005972139071673155,
|
| 518 |
+
0.00011733471183106303,
|
| 519 |
+
0.001328730140812695,
|
| 520 |
+
0.0010840508621186018,
|
| 521 |
+
0.0002192206884501502,
|
| 522 |
+
0.00018484889005776495,
|
| 523 |
+
0.0002260169858345762,
|
| 524 |
+
0.0002009521413128823,
|
| 525 |
+
0.00014640075096394867,
|
| 526 |
+
0.005435463972389698,
|
| 527 |
+
0.00014929195458535105,
|
| 528 |
+
0.00013214684440754354,
|
| 529 |
+
0.003676820080727339,
|
| 530 |
+
0.00011442940012784675,
|
| 531 |
+
0.000258677318925038,
|
| 532 |
+
0.0004960866062901914,
|
| 533 |
+
0.0001674027444096282,
|
| 534 |
+
8.12130092526786e-05,
|
| 535 |
+
0.0003417516709305346,
|
| 536 |
+
6.978169403737411e-05,
|
| 537 |
+
8.650257223052904e-05,
|
| 538 |
+
0.0002306181559106335,
|
| 539 |
+
0.00016351799422409385,
|
| 540 |
+
0.009652246721088886,
|
| 541 |
+
0.0026225128676742315,
|
| 542 |
+
0.00016337752458639443,
|
| 543 |
+
8.328648254973814e-05,
|
| 544 |
+
7.744420872768387e-05,
|
| 545 |
+
9.118799061980098e-05,
|
| 546 |
+
6.463653699029237e-05,
|
| 547 |
+
7.599593664053828e-05,
|
| 548 |
+
8.013672049855813e-05,
|
| 549 |
+
7.97944376245141e-05,
|
| 550 |
+
0.00010155878408113495,
|
| 551 |
+
6.792811473133042e-05,
|
| 552 |
+
6.025990660418756e-05,
|
| 553 |
+
7.2588307375554e-05,
|
| 554 |
+
6.611739081563428e-05,
|
| 555 |
+
0.0003789363254327327,
|
| 556 |
+
0.0001938583591254428,
|
| 557 |
+
6.89834268996492e-05,
|
| 558 |
+
0.00017879356164485216,
|
| 559 |
+
5.964725278317928e-05,
|
| 560 |
+
0.00018611035193316638,
|
| 561 |
+
6.1028571508359164e-05,
|
| 562 |
+
6.326786387944594e-05,
|
| 563 |
+
5.722946298192255e-05,
|
| 564 |
+
6.754681089660153e-05,
|
| 565 |
+
5.179006984690204e-05,
|
| 566 |
+
6.45291293039918e-05,
|
| 567 |
+
6.92441753926687e-05,
|
| 568 |
+
5.522249921341427e-05,
|
| 569 |
+
6.423047307180241e-05,
|
| 570 |
+
5.416608109953813e-05,
|
| 571 |
+
5.762785440310836e-05,
|
| 572 |
+
0.0005954610533080995,
|
| 573 |
+
0.00022168297437019646,
|
| 574 |
+
7.994456973392516e-05,
|
| 575 |
+
5.1874576456611976e-05,
|
| 576 |
+
8.403260289924219e-05,
|
| 577 |
+
6.857109838165343e-05,
|
| 578 |
+
5.5658929341007024e-05,
|
| 579 |
+
4.719209027825855e-05,
|
| 580 |
+
5.400797090260312e-05,
|
| 581 |
+
6.980347825447097e-05,
|
| 582 |
+
6.258647044887766e-05,
|
| 583 |
+
5.260522084427066e-05,
|
| 584 |
+
5.064627112005837e-05,
|
| 585 |
+
4.657195677282289e-05,
|
| 586 |
+
4.758452996611595e-05,
|
| 587 |
+
5.012748806620948e-05,
|
| 588 |
+
4.798350346391089e-05,
|
| 589 |
+
4.9020447477232665e-05,
|
| 590 |
+
4.789638114743866e-05,
|
| 591 |
+
4.8645579227013513e-05,
|
| 592 |
+
8.214709669118747e-05,
|
| 593 |
+
6.254074833123013e-05,
|
| 594 |
+
4.200910188956186e-05,
|
| 595 |
+
4.811974213225767e-05,
|
| 596 |
+
4.6095901780063286e-05,
|
| 597 |
+
4.615750003722496e-05,
|
| 598 |
+
4.747844286612235e-05,
|
| 599 |
+
5.03574192407541e-05,
|
| 600 |
+
4.847695890930481e-05,
|
| 601 |
+
4.824926145374775e-05,
|
| 602 |
+
4.547606658888981e-05,
|
| 603 |
+
6.05795175943058e-05,
|
| 604 |
+
4.456207170733251e-05,
|
| 605 |
+
0.00015212806465569884,
|
| 606 |
+
4.549638106254861e-05,
|
| 607 |
+
5.563519516726956e-05,
|
| 608 |
+
4.517229172051884e-05,
|
| 609 |
+
5.8811048802454025e-05,
|
| 610 |
+
5.0130507588619366e-05,
|
| 611 |
+
5.230798706179485e-05,
|
| 612 |
+
4.3880845623789355e-05,
|
| 613 |
+
4.618477032636292e-05,
|
| 614 |
+
4.3583720980677754e-05,
|
| 615 |
+
5.8016888942802325e-05,
|
| 616 |
+
4.766209167428315e-05,
|
| 617 |
+
5.783405140391551e-05,
|
| 618 |
+
0.002386096864938736,
|
| 619 |
+
4.821366383112036e-05,
|
| 620 |
+
4.6168104745447636e-05,
|
| 621 |
+
4.6418874262599275e-05,
|
| 622 |
+
4.2866351577686146e-05,
|
| 623 |
+
4.370551687316038e-05,
|
| 624 |
+
4.045507375849411e-05,
|
| 625 |
+
4.6614575694547966e-05,
|
| 626 |
+
5.645145211019553e-05,
|
| 627 |
+
4.3131029087817296e-05,
|
| 628 |
+
4.016207094537094e-05,
|
| 629 |
+
0.0004487961414270103
|
| 630 |
],
|
| 631 |
"base_loss": [
|
| 632 |
+
10.58569622039795,
|
| 633 |
+
7.936840534210205,
|
| 634 |
+
7.2051615715026855,
|
| 635 |
+
6.292751789093018,
|
| 636 |
+
5.380630016326904,
|
| 637 |
+
4.158393383026123,
|
| 638 |
+
2.91589093208313,
|
| 639 |
+
2.2648446559906006,
|
| 640 |
+
1.9414230585098267,
|
| 641 |
+
1.8107651472091675,
|
| 642 |
+
1.800335168838501,
|
| 643 |
+
1.7123852968215942,
|
| 644 |
+
1.710773229598999,
|
| 645 |
+
1.6536633968353271,
|
| 646 |
+
1.5563157796859741,
|
| 647 |
+
1.4610322713851929,
|
| 648 |
+
1.153622031211853,
|
| 649 |
+
0.8404688835144043,
|
| 650 |
+
0.6217839121818542,
|
| 651 |
+
0.5448878407478333,
|
| 652 |
+
0.4241843819618225,
|
| 653 |
+
0.34047311544418335,
|
| 654 |
+
0.2867976725101471,
|
| 655 |
+
0.2169194221496582,
|
| 656 |
+
0.13546958565711975,
|
| 657 |
+
0.12438952922821045,
|
| 658 |
+
0.11186210811138153,
|
| 659 |
+
0.0944901555776596,
|
| 660 |
+
0.1039363369345665,
|
| 661 |
+
0.08501303195953369,
|
| 662 |
+
0.11829841136932373,
|
| 663 |
+
0.04217684641480446,
|
| 664 |
+
0.09959305822849274,
|
| 665 |
+
0.030621467158198357,
|
| 666 |
+
0.047111596912145615,
|
| 667 |
+
0.06351909786462784,
|
| 668 |
+
0.050098877400159836,
|
| 669 |
+
0.03927459940314293,
|
| 670 |
+
0.0296621173620224,
|
| 671 |
+
0.05172724649310112,
|
| 672 |
+
0.025398079305887222,
|
| 673 |
+
0.0191025547683239,
|
| 674 |
+
0.02552829682826996,
|
| 675 |
+
0.014332198537886143,
|
| 676 |
+
0.020101871341466904,
|
| 677 |
+
0.016612058505415916,
|
| 678 |
+
0.02385997399687767,
|
| 679 |
+
0.008157567121088505,
|
| 680 |
+
0.026038693264126778,
|
| 681 |
+
0.01184095162898302,
|
| 682 |
+
0.02082441933453083,
|
| 683 |
+
0.038108620792627335,
|
| 684 |
+
0.031920164823532104,
|
| 685 |
+
0.023155227303504944,
|
| 686 |
+
0.029825836420059204,
|
| 687 |
+
0.009044607169926167,
|
| 688 |
+
0.027161160483956337,
|
| 689 |
+
0.007930578663945198,
|
| 690 |
+
0.021433621644973755,
|
| 691 |
+
0.005527331493794918,
|
| 692 |
+
0.007499660365283489,
|
| 693 |
+
0.017420727759599686,
|
| 694 |
+
0.014103187248110771,
|
| 695 |
+
0.011276716366410255,
|
| 696 |
+
0.012983249500393867,
|
| 697 |
+
0.030153032392263412,
|
| 698 |
+
0.008853526785969734,
|
| 699 |
+
0.03762682154774666,
|
| 700 |
+
0.01117522269487381,
|
| 701 |
+
0.023195743560791016,
|
| 702 |
+
0.013249148614704609,
|
| 703 |
+
0.014523004181683064,
|
| 704 |
+
0.0055778538808226585,
|
| 705 |
+
0.020841525867581367,
|
| 706 |
+
0.014729819260537624,
|
| 707 |
+
0.013070695102214813,
|
| 708 |
+
0.00855342485010624,
|
| 709 |
+
0.011591176502406597,
|
| 710 |
+
0.002833534497767687,
|
| 711 |
+
0.025547100231051445,
|
| 712 |
+
0.012554551474750042,
|
| 713 |
+
0.009271000511944294,
|
| 714 |
+
0.01295425184071064,
|
| 715 |
+
0.010510939173400402,
|
| 716 |
+
0.007462501060217619,
|
| 717 |
+
0.014900215901434422,
|
| 718 |
+
0.014310033991932869,
|
| 719 |
+
0.008596707135438919,
|
| 720 |
+
0.001958919456228614,
|
| 721 |
+
0.0046486821956932545,
|
| 722 |
+
0.018444955348968506,
|
| 723 |
+
0.008556962944567204,
|
| 724 |
+
0.004552661441266537,
|
| 725 |
+
0.027067942544817924,
|
| 726 |
+
0.00958697684109211,
|
| 727 |
+
0.009366557002067566,
|
| 728 |
+
0.004811967723071575,
|
| 729 |
+
0.013344655744731426,
|
| 730 |
+
0.008742153644561768,
|
| 731 |
+
0.008907586336135864,
|
| 732 |
+
0.007083355449140072,
|
| 733 |
+
0.015731360763311386,
|
| 734 |
+
0.00568253081291914,
|
| 735 |
+
0.001037583569996059,
|
| 736 |
+
0.0031601584050804377,
|
| 737 |
+
0.0036910015624016523,
|
| 738 |
+
0.006643303204327822,
|
| 739 |
+
0.020221156999468803,
|
| 740 |
+
0.011893088929355145,
|
| 741 |
+
0.005519048310816288,
|
| 742 |
+
0.023880789056420326,
|
| 743 |
+
0.01644163765013218,
|
| 744 |
+
0.026987669989466667,
|
| 745 |
+
0.0011081791017204523,
|
| 746 |
+
0.0033056442625820637,
|
| 747 |
+
0.000858158920891583,
|
| 748 |
+
0.009378315880894661,
|
| 749 |
+
0.005004131700843573,
|
| 750 |
+
0.002976806601509452,
|
| 751 |
+
0.007938246242702007,
|
| 752 |
+
0.00408149091526866,
|
| 753 |
+
0.010652265511453152,
|
| 754 |
+
0.006073917727917433,
|
| 755 |
+
0.018400464206933975,
|
| 756 |
+
0.0039037999231368303,
|
| 757 |
+
0.01426150742918253,
|
| 758 |
+
0.003052217187359929,
|
| 759 |
+
0.016827749088406563,
|
| 760 |
+
0.0037939571775496006,
|
| 761 |
+
0.000831973273307085,
|
| 762 |
+
0.002118675271049142,
|
| 763 |
+
0.004233058542013168,
|
| 764 |
+
0.0016138197388499975,
|
| 765 |
+
0.010185384191572666,
|
| 766 |
+
0.0007464668597094715,
|
| 767 |
+
0.00029250283841975033,
|
| 768 |
+
0.01994812674820423,
|
| 769 |
+
0.0005010344320908189,
|
| 770 |
+
0.006236571352928877,
|
| 771 |
+
0.002717872615903616,
|
| 772 |
+
0.002377332653850317,
|
| 773 |
+
0.007453155238181353,
|
| 774 |
+
0.0005609996733255684,
|
| 775 |
+
0.0011483365669846535,
|
| 776 |
+
0.00160598277579993,
|
| 777 |
+
0.0012431765208020806,
|
| 778 |
+
0.000852460041642189,
|
| 779 |
+
0.0023972985800355673,
|
| 780 |
+
0.0035680646542459726,
|
| 781 |
+
0.004298224579542875,
|
| 782 |
+
0.0015388120664283633,
|
| 783 |
+
0.0022699700202792883,
|
| 784 |
+
0.0015617648605257273,
|
| 785 |
+
0.0017532998463138938,
|
| 786 |
+
0.0030310507863759995,
|
| 787 |
+
0.002370015950873494,
|
| 788 |
+
0.0003038027498405427,
|
| 789 |
+
0.0020328103564679623,
|
| 790 |
+
0.0005833978648297489,
|
| 791 |
+
0.0011084805009886622,
|
| 792 |
+
0.0037475605495274067,
|
| 793 |
+
0.001984222559258342,
|
| 794 |
+
0.0002572322264313698,
|
| 795 |
+
0.012740428559482098,
|
| 796 |
+
0.013420491479337215,
|
| 797 |
+
0.0002775133471004665,
|
| 798 |
+
0.0007172977202571929,
|
| 799 |
+
0.0003056035202462226,
|
| 800 |
+
0.006681203842163086,
|
| 801 |
+
0.0008299726760014892,
|
| 802 |
+
0.002304975176230073,
|
| 803 |
+
0.004181632772088051,
|
| 804 |
+
0.00016523328667972237,
|
| 805 |
+
0.00445162458345294,
|
| 806 |
+
0.0003404470335226506,
|
| 807 |
+
0.00019811275706160814,
|
| 808 |
+
0.00017772385035641491,
|
| 809 |
+
0.00016012518608476967,
|
| 810 |
+
0.0027634704019874334,
|
| 811 |
+
0.00020777643658220768,
|
| 812 |
+
0.014718319289386272,
|
| 813 |
+
0.000357446086127311,
|
| 814 |
+
0.002793132560327649,
|
| 815 |
+
0.001239171950146556,
|
| 816 |
+
0.0007129187579266727,
|
| 817 |
+
0.0010272933868691325,
|
| 818 |
+
0.00018213230941910297,
|
| 819 |
+
0.000530033721588552,
|
| 820 |
+
0.0005569524364545941,
|
| 821 |
+
0.003903051372617483,
|
| 822 |
+
0.0002563659509178251,
|
| 823 |
+
0.00018147526134271175,
|
| 824 |
+
0.0007504919194616377,
|
| 825 |
+
0.0001638331450521946,
|
| 826 |
+
0.0011349172564223409,
|
| 827 |
+
0.0035133049823343754,
|
| 828 |
+
0.0009539271704852581,
|
| 829 |
+
0.002135826274752617,
|
| 830 |
+
0.0004690276109613478,
|
| 831 |
+
0.005972139071673155,
|
| 832 |
+
0.00011733471183106303,
|
| 833 |
+
0.001328730140812695,
|
| 834 |
+
0.0010840508621186018,
|
| 835 |
+
0.0002192206884501502,
|
| 836 |
+
0.00018484889005776495,
|
| 837 |
+
0.0002260169858345762,
|
| 838 |
+
0.0002009521413128823,
|
| 839 |
+
0.00014640075096394867,
|
| 840 |
+
0.005435463972389698,
|
| 841 |
+
0.00014929195458535105,
|
| 842 |
+
0.00013214684440754354,
|
| 843 |
+
0.003676820080727339,
|
| 844 |
+
0.00011442940012784675,
|
| 845 |
+
0.000258677318925038,
|
| 846 |
+
0.0004960866062901914,
|
| 847 |
+
0.0001674027444096282,
|
| 848 |
+
8.12130092526786e-05,
|
| 849 |
+
0.0003417516709305346,
|
| 850 |
+
6.978169403737411e-05,
|
| 851 |
+
8.650257223052904e-05,
|
| 852 |
+
0.0002306181559106335,
|
| 853 |
+
0.00016351799422409385,
|
| 854 |
+
0.009652246721088886,
|
| 855 |
+
0.0026225128676742315,
|
| 856 |
+
0.00016337752458639443,
|
| 857 |
+
8.328648254973814e-05,
|
| 858 |
+
7.744420872768387e-05,
|
| 859 |
+
9.118799061980098e-05,
|
| 860 |
+
6.463653699029237e-05,
|
| 861 |
+
7.599593664053828e-05,
|
| 862 |
+
8.013672049855813e-05,
|
| 863 |
+
7.97944376245141e-05,
|
| 864 |
+
0.00010155878408113495,
|
| 865 |
+
6.792811473133042e-05,
|
| 866 |
+
6.025990660418756e-05,
|
| 867 |
+
7.2588307375554e-05,
|
| 868 |
+
6.611739081563428e-05,
|
| 869 |
+
0.0003789363254327327,
|
| 870 |
+
0.0001938583591254428,
|
| 871 |
+
6.89834268996492e-05,
|
| 872 |
+
0.00017879356164485216,
|
| 873 |
+
5.964725278317928e-05,
|
| 874 |
+
0.00018611035193316638,
|
| 875 |
+
6.1028571508359164e-05,
|
| 876 |
+
6.326786387944594e-05,
|
| 877 |
+
5.722946298192255e-05,
|
| 878 |
+
6.754681089660153e-05,
|
| 879 |
+
5.179006984690204e-05,
|
| 880 |
+
6.45291293039918e-05,
|
| 881 |
+
6.92441753926687e-05,
|
| 882 |
+
5.522249921341427e-05,
|
| 883 |
+
6.423047307180241e-05,
|
| 884 |
+
5.416608109953813e-05,
|
| 885 |
+
5.762785440310836e-05,
|
| 886 |
+
0.0005954610533080995,
|
| 887 |
+
0.00022168297437019646,
|
| 888 |
+
7.994456973392516e-05,
|
| 889 |
+
5.1874576456611976e-05,
|
| 890 |
+
8.403260289924219e-05,
|
| 891 |
+
6.857109838165343e-05,
|
| 892 |
+
5.5658929341007024e-05,
|
| 893 |
+
4.719209027825855e-05,
|
| 894 |
+
5.400797090260312e-05,
|
| 895 |
+
6.980347825447097e-05,
|
| 896 |
+
6.258647044887766e-05,
|
| 897 |
+
5.260522084427066e-05,
|
| 898 |
+
5.064627112005837e-05,
|
| 899 |
+
4.657195677282289e-05,
|
| 900 |
+
4.758452996611595e-05,
|
| 901 |
+
5.012748806620948e-05,
|
| 902 |
+
4.798350346391089e-05,
|
| 903 |
+
4.9020447477232665e-05,
|
| 904 |
+
4.789638114743866e-05,
|
| 905 |
+
4.8645579227013513e-05,
|
| 906 |
+
8.214709669118747e-05,
|
| 907 |
+
6.254074833123013e-05,
|
| 908 |
+
4.200910188956186e-05,
|
| 909 |
+
4.811974213225767e-05,
|
| 910 |
+
4.6095901780063286e-05,
|
| 911 |
+
4.615750003722496e-05,
|
| 912 |
+
4.747844286612235e-05,
|
| 913 |
+
5.03574192407541e-05,
|
| 914 |
+
4.847695890930481e-05,
|
| 915 |
+
4.824926145374775e-05,
|
| 916 |
+
4.547606658888981e-05,
|
| 917 |
+
6.05795175943058e-05,
|
| 918 |
+
4.456207170733251e-05,
|
| 919 |
+
0.00015212806465569884,
|
| 920 |
+
4.549638106254861e-05,
|
| 921 |
+
5.563519516726956e-05,
|
| 922 |
+
4.517229172051884e-05,
|
| 923 |
+
5.8811048802454025e-05,
|
| 924 |
+
5.0130507588619366e-05,
|
| 925 |
+
5.230798706179485e-05,
|
| 926 |
+
4.3880845623789355e-05,
|
| 927 |
+
4.618477032636292e-05,
|
| 928 |
+
4.3583720980677754e-05,
|
| 929 |
+
5.8016888942802325e-05,
|
| 930 |
+
4.766209167428315e-05,
|
| 931 |
+
5.783405140391551e-05,
|
| 932 |
+
0.002386096864938736,
|
| 933 |
+
4.821366383112036e-05,
|
| 934 |
+
4.6168104745447636e-05,
|
| 935 |
+
4.6418874262599275e-05,
|
| 936 |
+
4.2866351577686146e-05,
|
| 937 |
+
4.370551687316038e-05,
|
| 938 |
+
4.045507375849411e-05,
|
| 939 |
+
4.6614575694547966e-05,
|
| 940 |
+
5.645145211019553e-05,
|
| 941 |
+
4.3131029087817296e-05,
|
| 942 |
+
4.016207094537094e-05,
|
| 943 |
+
0.0004487961414270103
|
| 944 |
],
|
| 945 |
"lr": [
|
| 946 |
+
4.188034188034189e-06,
|
| 947 |
+
8.461538461538462e-06,
|
| 948 |
+
1.2735042735042738e-05,
|
| 949 |
+
1.700854700854701e-05,
|
| 950 |
+
2.1282051282051282e-05,
|
| 951 |
+
2.5555555555555557e-05,
|
| 952 |
+
2.9829059829059833e-05,
|
| 953 |
+
3.4102564102564105e-05,
|
| 954 |
+
3.837606837606838e-05,
|
| 955 |
+
3.999958796323024e-05,
|
| 956 |
+
3.999718697278298e-05,
|
| 957 |
+
3.9992642503923525e-05,
|
| 958 |
+
3.998595504376894e-05,
|
| 959 |
+
3.997712530914136e-05,
|
| 960 |
+
3.996615424649119e-05,
|
| 961 |
+
3.995304303179564e-05,
|
| 962 |
+
3.993779307043264e-05,
|
| 963 |
+
3.992040599703026e-05,
|
| 964 |
+
3.990088367529147e-05,
|
| 965 |
+
3.9879228197794335e-05,
|
| 966 |
+
3.9855441885767774e-05,
|
| 967 |
+
3.982952728884272e-05,
|
| 968 |
+
3.98014871847788e-05,
|
| 969 |
+
3.977132457916666e-05,
|
| 970 |
+
3.9739042705105735e-05,
|
| 971 |
+
3.970464502285772e-05,
|
| 972 |
+
3.9668135219475686e-05,
|
| 973 |
+
3.962951720840881e-05,
|
| 974 |
+
3.9588795129082965e-05,
|
| 975 |
+
3.9545973346457e-05,
|
| 976 |
+
3.950105645055483e-05,
|
| 977 |
+
3.9454049255973464e-05,
|
| 978 |
+
3.9404956801366954e-05,
|
| 979 |
+
3.9353784348906246e-05,
|
| 980 |
+
3.930053738371519e-05,
|
| 981 |
+
3.924522161328258e-05,
|
| 982 |
+
3.9187842966850365e-05,
|
| 983 |
+
3.912840759477808e-05,
|
| 984 |
+
3.9066921867883654e-05,
|
| 985 |
+
3.900339237676047e-05,
|
| 986 |
+
3.893782593107095e-05,
|
| 987 |
+
3.8870229558816636e-05,
|
| 988 |
+
3.880061050558488e-05,
|
| 989 |
+
3.8728976233772144e-05,
|
| 990 |
+
3.865533442178418e-05,
|
| 991 |
+
3.857969296321293e-05,
|
| 992 |
+
3.8502059965990464e-05,
|
| 993 |
+
3.842244375151989e-05,
|
| 994 |
+
3.8340852853783366e-05,
|
| 995 |
+
3.825729601842738e-05,
|
| 996 |
+
3.817178220182529e-05,
|
| 997 |
+
3.8084320570117316e-05,
|
| 998 |
+
3.799492049822804e-05,
|
| 999 |
+
3.7903591568861476e-05,
|
| 1000 |
+
3.7810343571473957e-05,
|
| 1001 |
+
3.771518650122478e-05,
|
| 1002 |
+
3.7618130557904865e-05,
|
| 1003 |
+
3.75191861448434e-05,
|
| 1004 |
+
3.7418363867792776e-05,
|
| 1005 |
+
3.7315674533791735e-05,
|
| 1006 |
+
3.7211129150006987e-05,
|
| 1007 |
+
3.7104738922553335e-05,
|
| 1008 |
+
3.6996515255292544e-05,
|
| 1009 |
+
3.688646974861095e-05,
|
| 1010 |
+
3.677461419817603e-05,
|
| 1011 |
+
3.666096059367202e-05,
|
| 1012 |
+
3.654552111751479e-05,
|
| 1013 |
+
3.642830814354598e-05,
|
| 1014 |
+
3.6309334235706705e-05,
|
| 1015 |
+
3.618861214669079e-05,
|
| 1016 |
+
3.606615481657787e-05,
|
| 1017 |
+
3.594197537144631e-05,
|
| 1018 |
+
3.5816087121966275e-05,
|
| 1019 |
+
3.5688503561972944e-05,
|
| 1020 |
+
3.5559238367020136e-05,
|
| 1021 |
+
3.542830539291442e-05,
|
| 1022 |
+
3.529571867422996e-05,
|
| 1023 |
+
3.516149242280414e-05,
|
| 1024 |
+
3.502564102621419e-05,
|
| 1025 |
+
3.488817904623504e-05,
|
| 1026 |
+
3.474912121727844e-05,
|
| 1027 |
+
3.4608482444813575e-05,
|
| 1028 |
+
3.446627780376941e-05,
|
| 1029 |
+
3.432252253691874e-05,
|
| 1030 |
+
3.4177232053244447e-05,
|
| 1031 |
+
3.403042192628771e-05,
|
| 1032 |
+
3.388210789247879e-05,
|
| 1033 |
+
3.373230584945018e-05,
|
| 1034 |
+
3.358103185433261e-05,
|
| 1035 |
+
3.342830212203387e-05,
|
| 1036 |
+
3.3274133023500764e-05,
|
| 1037 |
+
3.311854108396431e-05,
|
| 1038 |
+
3.2961542981168435e-05,
|
| 1039 |
+
3.280315554358229e-05,
|
| 1040 |
+
3.264339574859642e-05,
|
| 1041 |
+
3.248228072070302e-05,
|
| 1042 |
+
3.2319827729660285e-05,
|
| 1043 |
+
3.2156054188641376e-05,
|
| 1044 |
+
3.1990977652367865e-05,
|
| 1045 |
+
3.1824615815228095e-05,
|
| 1046 |
+
3.165698650938051e-05,
|
| 1047 |
+
3.148810770284225e-05,
|
| 1048 |
+
3.131799749756318e-05,
|
| 1049 |
+
3.114667412748557e-05,
|
| 1050 |
+
3.0974155956589594e-05,
|
| 1051 |
+
3.0800461476924934e-05,
|
| 1052 |
+
3.062560930662865e-05,
|
| 1053 |
+
3.0449618187929455e-05,
|
| 1054 |
+
3.027250698513884e-05,
|
| 1055 |
+
3.0094294682628963e-05,
|
| 1056 |
+
2.9915000382797757e-05,
|
| 1057 |
+
2.973464330402138e-05,
|
| 1058 |
+
2.9553242778594188e-05,
|
| 1059 |
+
2.9370818250656534e-05,
|
| 1060 |
+
2.918738927411057e-05,
|
| 1061 |
+
2.900297551052429e-05,
|
| 1062 |
+
2.8817596727024034e-05,
|
| 1063 |
+
2.863127279417565e-05,
|
| 1064 |
+
2.8444023683854588e-05,
|
| 1065 |
+
2.8255869467105155e-05,
|
| 1066 |
+
2.806683031198911e-05,
|
| 1067 |
+
2.787692648142386e-05,
|
| 1068 |
+
2.76861783310105e-05,
|
| 1069 |
+
2.7494606306851945e-05,
|
| 1070 |
+
2.730223094336128e-05,
|
| 1071 |
+
2.7109072861060756e-05,
|
| 1072 |
+
2.6915152764371454e-05,
|
| 1073 |
+
2.6720491439394013e-05,
|
| 1074 |
+
2.6525109751680584e-05,
|
| 1075 |
+
2.6329028643998294e-05,
|
| 1076 |
+
2.613226913408438e-05,
|
| 1077 |
+
2.593485231239333e-05,
|
| 1078 |
+
2.5736799339836247e-05,
|
| 1079 |
+
2.5538131445512574e-05,
|
| 1080 |
+
2.5338869924434622e-05,
|
| 1081 |
+
2.5139036135244954e-05,
|
| 1082 |
+
2.493865149792698e-05,
|
| 1083 |
+
2.4737737491508967e-05,
|
| 1084 |
+
2.4536315651761724e-05,
|
| 1085 |
+
2.433440756889019e-05,
|
| 1086 |
+
2.4132034885219254e-05,
|
| 1087 |
+
2.3929219292873862e-05,
|
| 1088 |
+
2.3725982531453916e-05,
|
| 1089 |
+
2.3522346385703997e-05,
|
| 1090 |
+
2.3318332683178304e-05,
|
| 1091 |
+
2.3113963291900965e-05,
|
| 1092 |
+
2.290926011802202e-05,
|
| 1093 |
+
2.2704245103469335e-05,
|
| 1094 |
+
2.2498940223596676e-05,
|
| 1095 |
+
2.229336748482816e-05,
|
| 1096 |
+
2.2087548922299454e-05,
|
| 1097 |
+
2.1881506597495808e-05,
|
| 1098 |
+
2.1675262595887345e-05,
|
| 1099 |
+
2.1468839024561703e-05,
|
| 1100 |
+
2.1262258009854425e-05,
|
| 1101 |
+
2.1055541694977263e-05,
|
| 1102 |
+
2.0848712237644633e-05,
|
| 1103 |
+
2.0641791807698616e-05,
|
| 1104 |
+
2.0434802584732507e-05,
|
| 1105 |
+
2.022776675571351e-05,
|
| 1106 |
+
2.0020706512604437e-05,
|
| 1107 |
+
1.9813644049985047e-05,
|
| 1108 |
+
1.9606601562673005e-05,
|
| 1109 |
+
1.939960124334484e-05,
|
| 1110 |
+
1.919266528015713e-05,
|
| 1111 |
+
1.8985815854368193e-05,
|
| 1112 |
+
1.8779075137960494e-05,
|
| 1113 |
+
1.8572465291264022e-05,
|
| 1114 |
+
1.8366008460581004e-05,
|
| 1115 |
+
1.815972677581202e-05,
|
| 1116 |
+
1.7953642348083943e-05,
|
| 1117 |
+
1.774777726737984e-05,
|
| 1118 |
+
1.7542153600171213e-05,
|
| 1119 |
+
1.7336793387052705e-05,
|
| 1120 |
+
1.7131718640379524e-05,
|
| 1121 |
+
1.6926951341908083e-05,
|
| 1122 |
+
1.672251344043969e-05,
|
| 1123 |
+
1.651842684946793e-05,
|
| 1124 |
+
1.6314713444829764e-05,
|
| 1125 |
+
1.611139506236069e-05,
|
| 1126 |
+
1.5908493495554186e-05,
|
| 1127 |
+
1.5706030493225642e-05,
|
| 1128 |
+
1.5504027757181196e-05,
|
| 1129 |
+
1.5302506939891503e-05,
|
| 1130 |
+
1.5101489642170806e-05,
|
| 1131 |
+
1.4900997410861609e-05,
|
| 1132 |
+
1.4701051736525065e-05,
|
| 1133 |
+
1.4501674051137457e-05,
|
| 1134 |
+
1.4302885725792858e-05,
|
| 1135 |
+
1.4104708068412472e-05,
|
| 1136 |
+
1.3907162321460597e-05,
|
| 1137 |
+
1.371026965966768e-05,
|
| 1138 |
+
1.3514051187760642e-05,
|
| 1139 |
+
1.3318527938200655e-05,
|
| 1140 |
+
1.3123720868928707e-05,
|
| 1141 |
+
1.292965086111913e-05,
|
| 1142 |
+
1.2736338716941403e-05,
|
| 1143 |
+
1.2543805157330346e-05,
|
| 1144 |
+
1.2352070819765072e-05,
|
| 1145 |
+
1.2161156256056894e-05,
|
| 1146 |
+
1.1971081930146396e-05,
|
| 1147 |
+
1.1781868215909893e-05,
|
| 1148 |
+
1.1593535394975626e-05,
|
| 1149 |
+
1.1406103654549742e-05,
|
| 1150 |
+
1.1219593085252485e-05,
|
| 1151 |
+
1.103402367896469e-05,
|
| 1152 |
+
1.0849415326684864e-05,
|
| 1153 |
+
1.0665787816397109e-05,
|
| 1154 |
+
1.0483160830950054e-05,
|
| 1155 |
+
1.030155394594707e-05,
|
| 1156 |
+
1.0120986627648004e-05,
|
| 1157 |
+
9.941478230882551e-06,
|
| 1158 |
+
9.763047996975699e-06,
|
| 1159 |
+
9.585715051685247e-06,
|
| 1160 |
+
9.409498403151716e-06,
|
| 1161 |
+
9.234416939860887e-06,
|
| 1162 |
+
9.060489428619184e-06,
|
| 1163 |
+
8.887734512542072e-06,
|
| 1164 |
+
8.716170709055716e-06,
|
| 1165 |
+
8.545816407912107e-06,
|
| 1166 |
+
8.376689869217913e-06,
|
| 1167 |
+
8.208809221477138e-06,
|
| 1168 |
+
8.042192459648035e-06,
|
| 1169 |
+
7.87685744321416e-06,
|
| 1170 |
+
7.712821894270087e-06,
|
| 1171 |
+
7.550103395621737e-06,
|
| 1172 |
+
7.388719388901766e-06,
|
| 1173 |
+
7.228687172699982e-06,
|
| 1174 |
+
7.070023900709091e-06,
|
| 1175 |
+
6.912746579886067e-06,
|
| 1176 |
+
6.756872068629164e-06,
|
| 1177 |
+
6.6024170749708814e-06,
|
| 1178 |
+
6.4493981547870566e-06,
|
| 1179 |
+
6.297831710022247e-06,
|
| 1180 |
+
6.147733986931628e-06,
|
| 1181 |
+
5.999121074339575e-06,
|
| 1182 |
+
5.8520089019151116e-06,
|
| 1183 |
+
5.706413238464439e-06,
|
| 1184 |
+
5.562349690240656e-06,
|
| 1185 |
+
5.419833699270991e-06,
|
| 1186 |
+
5.278880541701565e-06,
|
| 1187 |
+
5.139505326159946e-06,
|
| 1188 |
+
5.0017229921356696e-06,
|
| 1189 |
+
4.8655483083789115e-06,
|
| 1190 |
+
4.730995871317427e-06,
|
| 1191 |
+
4.598080103491973e-06,
|
| 1192 |
+
4.4668152520103745e-06,
|
| 1193 |
+
4.33721538702039e-06,
|
| 1194 |
+
4.209294400201533e-06,
|
| 1195 |
+
4.083066003276077e-06,
|
| 1196 |
+
3.958543726539259e-06,
|
| 1197 |
+
3.835740917409019e-06,
|
| 1198 |
+
3.714670738995274e-06,
|
| 1199 |
+
3.595346168689e-06,
|
| 1200 |
+
3.477779996771207e-06,
|
| 1201 |
+
3.361984825041915e-06,
|
| 1202 |
+
3.2479730654694342e-06,
|
| 1203 |
+
3.135756938859904e-06,
|
| 1204 |
+
3.0253484735473714e-06,
|
| 1205 |
+
2.9167595041044805e-06,
|
| 1206 |
+
2.8100016700739385e-06,
|
| 1207 |
+
2.705086414720892e-06,
|
| 1208 |
+
2.6020249838063037e-06,
|
| 1209 |
+
2.500828424381587e-06,
|
| 1210 |
+
2.4015075836044345e-06,
|
| 1211 |
+
2.3040731075761303e-06,
|
| 1212 |
+
2.208535440200428e-06,
|
| 1213 |
+
2.11490482206405e-06,
|
| 1214 |
+
2.0231912893390504e-06,
|
| 1215 |
+
1.9334046727069866e-06,
|
| 1216 |
+
1.8455545963052347e-06,
|
| 1217 |
+
1.7596504766953605e-06,
|
| 1218 |
+
1.6757015218537743e-06,
|
| 1219 |
+
1.5937167301847356e-06,
|
| 1220 |
+
1.513704889555827e-06,
|
| 1221 |
+
1.4356745763559742e-06,
|
| 1222 |
+
1.3596341545761815e-06,
|
| 1223 |
+
1.2855917749129866e-06,
|
| 1224 |
+
1.2135553738947903e-06,
|
| 1225 |
+
1.1435326730311536e-06,
|
| 1226 |
+
1.075531177985145e-06,
|
| 1227 |
+
1.0095581777688036e-06,
|
| 1228 |
+
9.456207439618459e-07,
|
| 1229 |
+
8.837257299536639e-07,
|
| 1230 |
+
8.238797702087242e-07,
|
| 1231 |
+
7.660892795554131e-07,
|
| 1232 |
+
7.103604524984597e-07,
|
| 1233 |
+
6.566992625549318e-07,
|
| 1234 |
+
6.051114616139542e-07,
|
| 1235 |
+
5.556025793201581e-07,
|
| 1236 |
+
5.081779224809702e-07,
|
| 1237 |
+
4.628425744977927e-07,
|
| 1238 |
+
4.196013948210942e-07,
|
| 1239 |
+
3.784590184295511e-07,
|
| 1240 |
+
3.394198553332162e-07,
|
| 1241 |
+
3.024880901008209e-07,
|
| 1242 |
+
2.676676814112367e-07,
|
| 1243 |
+
2.3496236162914076e-07,
|
| 1244 |
+
2.0437563640495206e-07,
|
| 1245 |
+
1.7591078429906706e-07,
|
| 1246 |
+
1.495708564304299e-07,
|
| 1247 |
+
1.2535867614948739e-07,
|
| 1248 |
+
1.0327683873555317e-07,
|
| 1249 |
+
8.332771111863037e-08,
|
| 1250 |
+
6.551343162569667e-08,
|
| 1251 |
+
4.983590975150554e-08,
|
| 1252 |
+
3.629682595389428e-08,
|
| 1253 |
+
2.4897631473679297e-08,
|
| 1254 |
+
1.563954817907831e-08,
|
| 1255 |
+
8.523568434752882e-09,
|
| 1256 |
+
3.550454995435715e-09,
|
| 1257 |
+
7.207409241671848e-10
|
| 1258 |
],
|
| 1259 |
"eval_step": [
|
| 1260 |
781,
|
|
|
|
| 1301 |
20
|
| 1302 |
],
|
| 1303 |
"eval_accuracy": [
|
| 1304 |
+
0.0011111111111111111,
|
| 1305 |
+
0.8111111111111111,
|
| 1306 |
+
0.8677777777777778,
|
| 1307 |
+
0.8966666666666666,
|
| 1308 |
+
0.8977777777777778,
|
| 1309 |
+
0.9522222222222222,
|
| 1310 |
+
0.9411111111111111,
|
| 1311 |
+
0.9588888888888889,
|
| 1312 |
+
0.9788888888888889,
|
| 1313 |
+
0.9755555555555555,
|
| 1314 |
+
0.9944444444444445,
|
| 1315 |
+
0.9888888888888889,
|
| 1316 |
+
0.9966666666666667,
|
| 1317 |
+
0.9811111111111112,
|
| 1318 |
+
0.9955555555555555,
|
| 1319 |
+
0.9977777777777778,
|
| 1320 |
+
1.0,
|
| 1321 |
+
1.0,
|
| 1322 |
+
1.0,
|
| 1323 |
+
1.0
|
| 1324 |
]
|
| 1325 |
},
|
| 1326 |
+
"final_accuracy": 1.0,
|
| 1327 |
"sft_eval": {
|
| 1328 |
"config": {
|
| 1329 |
"ops": "add_sub",
|
| 1330 |
"K": null,
|
| 1331 |
"mode": "sft",
|
| 1332 |
"n_digits": 6,
|
| 1333 |
+
"n_per_split": 100
|
| 1334 |
},
|
| 1335 |
"splits": {
|
| 1336 |
"add_S0": {
|
| 1337 |
"full_accuracy": 1.0,
|
| 1338 |
+
"n_examples": 100,
|
| 1339 |
"per_subtask": {
|
| 1340 |
"SA": {
|
| 1341 |
"accuracy": 1.0,
|
| 1342 |
+
"count": 605
|
| 1343 |
},
|
| 1344 |
"SS": {
|
| 1345 |
"accuracy": 1.0,
|
| 1346 |
+
"count": 95
|
| 1347 |
}
|
| 1348 |
}
|
| 1349 |
},
|
| 1350 |
"add_S1": {
|
| 1351 |
"full_accuracy": 1.0,
|
| 1352 |
+
"n_examples": 100,
|
| 1353 |
"per_subtask": {
|
| 1354 |
"SA": {
|
| 1355 |
"accuracy": 1.0,
|
| 1356 |
+
"count": 204
|
| 1357 |
},
|
| 1358 |
"SC": {
|
| 1359 |
"accuracy": 1.0,
|
| 1360 |
+
"count": 169
|
| 1361 |
},
|
| 1362 |
"SS": {
|
| 1363 |
"accuracy": 1.0,
|
| 1364 |
+
"count": 31
|
| 1365 |
},
|
| 1366 |
"UC": {
|
| 1367 |
"accuracy": 1.0,
|
| 1368 |
+
"count": 296
|
| 1369 |
}
|
| 1370 |
}
|
| 1371 |
},
|
| 1372 |
"add_S2": {
|
| 1373 |
"full_accuracy": 1.0,
|
| 1374 |
+
"n_examples": 100,
|
| 1375 |
"per_subtask": {
|
| 1376 |
"SA": {
|
| 1377 |
"accuracy": 1.0,
|
| 1378 |
+
"count": 163
|
| 1379 |
},
|
| 1380 |
"SC": {
|
| 1381 |
"accuracy": 1.0,
|
| 1382 |
+
"count": 130
|
| 1383 |
},
|
| 1384 |
"SS": {
|
| 1385 |
"accuracy": 1.0,
|
| 1386 |
+
"count": 87
|
| 1387 |
},
|
| 1388 |
"UC": {
|
| 1389 |
"accuracy": 1.0,
|
| 1390 |
+
"count": 203
|
| 1391 |
},
|
| 1392 |
"US": {
|
| 1393 |
"accuracy": 1.0,
|
| 1394 |
+
"count": 117
|
| 1395 |
}
|
| 1396 |
}
|
| 1397 |
},
|
| 1398 |
"add_S3": {
|
| 1399 |
"full_accuracy": 1.0,
|
| 1400 |
+
"n_examples": 100,
|
| 1401 |
"per_subtask": {
|
| 1402 |
"SA": {
|
| 1403 |
"accuracy": 1.0,
|
| 1404 |
+
"count": 121
|
| 1405 |
},
|
| 1406 |
"SC": {
|
| 1407 |
"accuracy": 1.0,
|
| 1408 |
+
"count": 121
|
| 1409 |
},
|
| 1410 |
"SS": {
|
| 1411 |
"accuracy": 1.0,
|
| 1412 |
+
"count": 49
|
| 1413 |
},
|
| 1414 |
"UC": {
|
| 1415 |
"accuracy": 1.0,
|
| 1416 |
+
"count": 186
|
| 1417 |
},
|
| 1418 |
"US": {
|
| 1419 |
"accuracy": 1.0,
|
| 1420 |
+
"count": 223
|
| 1421 |
}
|
| 1422 |
}
|
| 1423 |
},
|
| 1424 |
"add_S4": {
|
| 1425 |
"full_accuracy": 1.0,
|
| 1426 |
+
"n_examples": 100,
|
| 1427 |
"per_subtask": {
|
| 1428 |
"SA": {
|
| 1429 |
"accuracy": 1.0,
|
| 1430 |
+
"count": 104
|
| 1431 |
},
|
| 1432 |
"SC": {
|
| 1433 |
"accuracy": 1.0,
|
| 1434 |
+
"count": 106
|
| 1435 |
},
|
| 1436 |
"SS": {
|
| 1437 |
"accuracy": 1.0,
|
| 1438 |
+
"count": 23
|
| 1439 |
},
|
| 1440 |
"UC": {
|
| 1441 |
"accuracy": 1.0,
|
| 1442 |
+
"count": 160
|
| 1443 |
},
|
| 1444 |
"US": {
|
| 1445 |
"accuracy": 1.0,
|
| 1446 |
+
"count": 307
|
| 1447 |
}
|
| 1448 |
}
|
| 1449 |
},
|
| 1450 |
"add_S5": {
|
| 1451 |
+
"full_accuracy": 1.0,
|
| 1452 |
+
"n_examples": 100,
|
| 1453 |
"per_subtask": {
|
| 1454 |
"SA": {
|
| 1455 |
"accuracy": 1.0,
|
| 1456 |
+
"count": 100
|
| 1457 |
},
|
| 1458 |
"SC": {
|
| 1459 |
"accuracy": 1.0,
|
| 1460 |
+
"count": 100
|
| 1461 |
},
|
| 1462 |
"UC": {
|
| 1463 |
+
"accuracy": 1.0,
|
| 1464 |
+
"count": 100
|
| 1465 |
},
|
| 1466 |
"US": {
|
| 1467 |
+
"accuracy": 1.0,
|
| 1468 |
+
"count": 400
|
| 1469 |
}
|
| 1470 |
}
|
| 1471 |
},
|
| 1472 |
"add_S6": {
|
| 1473 |
+
"full_accuracy": 1.0,
|
| 1474 |
+
"n_examples": 100,
|
| 1475 |
"per_subtask": {
|
| 1476 |
"SC": {
|
| 1477 |
"accuracy": 1.0,
|
| 1478 |
+
"count": 100
|
| 1479 |
},
|
| 1480 |
"UC": {
|
| 1481 |
+
"accuracy": 1.0,
|
| 1482 |
+
"count": 100
|
| 1483 |
},
|
| 1484 |
"US": {
|
| 1485 |
+
"accuracy": 1.0,
|
| 1486 |
+
"count": 500
|
| 1487 |
}
|
| 1488 |
}
|
| 1489 |
},
|
|
|
|
| 1493 |
"per_subtask": {
|
| 1494 |
"SA": {
|
| 1495 |
"accuracy": 1.0,
|
| 1496 |
+
"count": 447
|
| 1497 |
},
|
| 1498 |
"SC": {
|
| 1499 |
"accuracy": 1.0,
|
| 1500 |
+
"count": 320
|
| 1501 |
},
|
| 1502 |
"SS": {
|
| 1503 |
"accuracy": 1.0,
|
| 1504 |
+
"count": 56
|
| 1505 |
},
|
| 1506 |
"UC": {
|
| 1507 |
"accuracy": 1.0,
|
| 1508 |
+
"count": 529
|
| 1509 |
},
|
| 1510 |
"US": {
|
| 1511 |
"accuracy": 1.0,
|
| 1512 |
+
"count": 48
|
| 1513 |
}
|
| 1514 |
}
|
| 1515 |
},
|
| 1516 |
"add_C3": {
|
| 1517 |
"full_accuracy": 1.0,
|
| 1518 |
+
"n_examples": 100,
|
| 1519 |
"per_subtask": {
|
| 1520 |
"SA": {
|
| 1521 |
"accuracy": 1.0,
|
| 1522 |
+
"count": 300
|
| 1523 |
},
|
| 1524 |
"SC": {
|
| 1525 |
"accuracy": 1.0,
|
| 1526 |
+
"count": 100
|
| 1527 |
},
|
| 1528 |
"UC": {
|
| 1529 |
"accuracy": 1.0,
|
| 1530 |
+
"count": 193
|
| 1531 |
},
|
| 1532 |
"US": {
|
| 1533 |
"accuracy": 1.0,
|
| 1534 |
+
"count": 107
|
| 1535 |
}
|
| 1536 |
}
|
| 1537 |
},
|
| 1538 |
"add_C4": {
|
| 1539 |
"full_accuracy": 1.0,
|
| 1540 |
+
"n_examples": 100,
|
| 1541 |
"per_subtask": {
|
| 1542 |
"SA": {
|
| 1543 |
"accuracy": 1.0,
|
| 1544 |
+
"count": 200
|
| 1545 |
},
|
| 1546 |
"SC": {
|
| 1547 |
"accuracy": 1.0,
|
| 1548 |
+
"count": 100
|
| 1549 |
},
|
| 1550 |
"UC": {
|
| 1551 |
"accuracy": 1.0,
|
| 1552 |
+
"count": 256
|
| 1553 |
},
|
| 1554 |
"US": {
|
| 1555 |
"accuracy": 1.0,
|
| 1556 |
+
"count": 144
|
| 1557 |
}
|
| 1558 |
}
|
| 1559 |
},
|
| 1560 |
"add_C5": {
|
| 1561 |
+
"full_accuracy": 1.0,
|
| 1562 |
+
"n_examples": 100,
|
| 1563 |
"per_subtask": {
|
| 1564 |
"SA": {
|
| 1565 |
"accuracy": 1.0,
|
| 1566 |
+
"count": 100
|
| 1567 |
},
|
| 1568 |
"SC": {
|
| 1569 |
"accuracy": 1.0,
|
| 1570 |
+
"count": 100
|
| 1571 |
},
|
| 1572 |
"UC": {
|
| 1573 |
+
"accuracy": 1.0,
|
| 1574 |
+
"count": 306
|
| 1575 |
},
|
| 1576 |
"US": {
|
| 1577 |
"accuracy": 1.0,
|
| 1578 |
+
"count": 194
|
| 1579 |
}
|
| 1580 |
}
|
| 1581 |
},
|
| 1582 |
"add_C6": {
|
| 1583 |
"full_accuracy": 1.0,
|
| 1584 |
+
"n_examples": 100,
|
| 1585 |
"per_subtask": {
|
| 1586 |
"SC": {
|
| 1587 |
"accuracy": 1.0,
|
| 1588 |
+
"count": 100
|
| 1589 |
},
|
| 1590 |
"UC": {
|
| 1591 |
"accuracy": 1.0,
|
| 1592 |
+
"count": 366
|
| 1593 |
},
|
| 1594 |
"US": {
|
| 1595 |
"accuracy": 1.0,
|
| 1596 |
+
"count": 234
|
| 1597 |
}
|
| 1598 |
}
|
| 1599 |
},
|
| 1600 |
"sub_M0": {
|
| 1601 |
"full_accuracy": 1.0,
|
| 1602 |
+
"n_examples": 100,
|
| 1603 |
"per_subtask": {
|
| 1604 |
"MD": {
|
| 1605 |
"accuracy": 1.0,
|
| 1606 |
+
"count": 601
|
| 1607 |
},
|
| 1608 |
"ME": {
|
| 1609 |
"accuracy": 1.0,
|
| 1610 |
+
"count": 99
|
| 1611 |
}
|
| 1612 |
}
|
| 1613 |
},
|
| 1614 |
"sub_M1": {
|
| 1615 |
"full_accuracy": 1.0,
|
| 1616 |
+
"n_examples": 100,
|
| 1617 |
"per_subtask": {
|
| 1618 |
"MD": {
|
| 1619 |
"accuracy": 1.0,
|
| 1620 |
+
"count": 279
|
| 1621 |
},
|
| 1622 |
"MB": {
|
| 1623 |
"accuracy": 1.0,
|
| 1624 |
+
"count": 145
|
| 1625 |
},
|
| 1626 |
"ME": {
|
| 1627 |
"accuracy": 1.0,
|
| 1628 |
+
"count": 24
|
| 1629 |
},
|
| 1630 |
"UB": {
|
| 1631 |
"accuracy": 1.0,
|
| 1632 |
+
"count": 252
|
| 1633 |
}
|
| 1634 |
}
|
| 1635 |
},
|
| 1636 |
"sub_M2": {
|
| 1637 |
"full_accuracy": 1.0,
|
| 1638 |
+
"n_examples": 100,
|
| 1639 |
"per_subtask": {
|
| 1640 |
"MD": {
|
| 1641 |
"accuracy": 1.0,
|
| 1642 |
+
"count": 213
|
| 1643 |
},
|
| 1644 |
"MB": {
|
| 1645 |
"accuracy": 1.0,
|
| 1646 |
+
"count": 113
|
| 1647 |
},
|
| 1648 |
"ME": {
|
| 1649 |
"accuracy": 1.0,
|
| 1650 |
+
"count": 85
|
| 1651 |
},
|
| 1652 |
"UB": {
|
| 1653 |
"accuracy": 1.0,
|
| 1654 |
+
"count": 181
|
| 1655 |
},
|
| 1656 |
"UD": {
|
| 1657 |
"accuracy": 1.0,
|
| 1658 |
+
"count": 108
|
| 1659 |
}
|
| 1660 |
}
|
| 1661 |
},
|
| 1662 |
"sub_M3": {
|
| 1663 |
"full_accuracy": 1.0,
|
| 1664 |
+
"n_examples": 100,
|
| 1665 |
"per_subtask": {
|
| 1666 |
"MD": {
|
| 1667 |
"accuracy": 1.0,
|
| 1668 |
+
"count": 179
|
| 1669 |
},
|
| 1670 |
"MB": {
|
| 1671 |
"accuracy": 1.0,
|
| 1672 |
+
"count": 103
|
| 1673 |
},
|
| 1674 |
"ME": {
|
| 1675 |
"accuracy": 1.0,
|
| 1676 |
+
"count": 56
|
| 1677 |
},
|
| 1678 |
"UB": {
|
| 1679 |
"accuracy": 1.0,
|
| 1680 |
+
"count": 149
|
| 1681 |
},
|
| 1682 |
"UD": {
|
| 1683 |
"accuracy": 1.0,
|
| 1684 |
+
"count": 213
|
| 1685 |
}
|
| 1686 |
}
|
| 1687 |
},
|
| 1688 |
"sub_M4": {
|
| 1689 |
+
"full_accuracy": 1.0,
|
| 1690 |
+
"n_examples": 100,
|
| 1691 |
"per_subtask": {
|
| 1692 |
"MD": {
|
| 1693 |
"accuracy": 1.0,
|
| 1694 |
+
"count": 200
|
| 1695 |
},
|
| 1696 |
"MB": {
|
| 1697 |
"accuracy": 1.0,
|
| 1698 |
+
"count": 100
|
| 1699 |
},
|
| 1700 |
"UB": {
|
| 1701 |
+
"accuracy": 1.0,
|
| 1702 |
+
"count": 100
|
| 1703 |
},
|
| 1704 |
"UD": {
|
| 1705 |
"accuracy": 1.0,
|
| 1706 |
+
"count": 300
|
| 1707 |
}
|
| 1708 |
}
|
| 1709 |
},
|
| 1710 |
"sub_M5": {
|
| 1711 |
+
"full_accuracy": 1.0,
|
| 1712 |
+
"n_examples": 100,
|
| 1713 |
"per_subtask": {
|
| 1714 |
"MD": {
|
| 1715 |
"accuracy": 1.0,
|
| 1716 |
+
"count": 100
|
| 1717 |
},
|
| 1718 |
"MB": {
|
| 1719 |
"accuracy": 1.0,
|
| 1720 |
+
"count": 100
|
| 1721 |
},
|
| 1722 |
"UB": {
|
| 1723 |
"accuracy": 1.0,
|
| 1724 |
+
"count": 100
|
| 1725 |
},
|
| 1726 |
"UD": {
|
| 1727 |
+
"accuracy": 1.0,
|
| 1728 |
+
"count": 400
|
| 1729 |
}
|
| 1730 |
}
|
| 1731 |
},
|
|
|
|
| 1735 |
"per_subtask": {
|
| 1736 |
"MD": {
|
| 1737 |
"accuracy": 1.0,
|
| 1738 |
+
"count": 600
|
| 1739 |
},
|
| 1740 |
"MB": {
|
| 1741 |
"accuracy": 1.0,
|
| 1742 |
+
"count": 267
|
| 1743 |
},
|
| 1744 |
"ME": {
|
| 1745 |
"accuracy": 1.0,
|
| 1746 |
+
"count": 53
|
| 1747 |
},
|
| 1748 |
"UB": {
|
| 1749 |
"accuracy": 1.0,
|
| 1750 |
+
"count": 439
|
| 1751 |
},
|
| 1752 |
"UD": {
|
| 1753 |
"accuracy": 1.0,
|
| 1754 |
+
"count": 41
|
| 1755 |
}
|
| 1756 |
}
|
| 1757 |
},
|
| 1758 |
"sub_B3": {
|
| 1759 |
"full_accuracy": 1.0,
|
| 1760 |
+
"n_examples": 100,
|
| 1761 |
"per_subtask": {
|
| 1762 |
"MD": {
|
| 1763 |
"accuracy": 1.0,
|
| 1764 |
+
"count": 300
|
| 1765 |
},
|
| 1766 |
"MB": {
|
| 1767 |
"accuracy": 1.0,
|
| 1768 |
+
"count": 100
|
| 1769 |
},
|
| 1770 |
"UB": {
|
| 1771 |
"accuracy": 1.0,
|
| 1772 |
+
"count": 197
|
| 1773 |
},
|
| 1774 |
"UD": {
|
| 1775 |
"accuracy": 1.0,
|
| 1776 |
+
"count": 103
|
| 1777 |
}
|
| 1778 |
}
|
| 1779 |
},
|
| 1780 |
"sub_B4": {
|
| 1781 |
"full_accuracy": 1.0,
|
| 1782 |
+
"n_examples": 100,
|
| 1783 |
"per_subtask": {
|
| 1784 |
"MD": {
|
| 1785 |
"accuracy": 1.0,
|
| 1786 |
+
"count": 200
|
| 1787 |
},
|
| 1788 |
"MB": {
|
| 1789 |
"accuracy": 1.0,
|
| 1790 |
+
"count": 100
|
| 1791 |
},
|
| 1792 |
"UB": {
|
| 1793 |
"accuracy": 1.0,
|
| 1794 |
+
"count": 247
|
| 1795 |
},
|
| 1796 |
"UD": {
|
| 1797 |
"accuracy": 1.0,
|
| 1798 |
+
"count": 153
|
| 1799 |
}
|
| 1800 |
}
|
| 1801 |
},
|
| 1802 |
"sub_B5": {
|
| 1803 |
"full_accuracy": 1.0,
|
| 1804 |
+
"n_examples": 100,
|
| 1805 |
"per_subtask": {
|
| 1806 |
"MD": {
|
| 1807 |
"accuracy": 1.0,
|
| 1808 |
+
"count": 100
|
| 1809 |
},
|
| 1810 |
"MB": {
|
| 1811 |
"accuracy": 1.0,
|
| 1812 |
+
"count": 100
|
| 1813 |
},
|
| 1814 |
"UB": {
|
| 1815 |
"accuracy": 1.0,
|
| 1816 |
+
"count": 298
|
| 1817 |
},
|
| 1818 |
"UD": {
|
| 1819 |
"accuracy": 1.0,
|
| 1820 |
+
"count": 202
|
| 1821 |
}
|
| 1822 |
}
|
| 1823 |
}
|
| 1824 |
},
|
| 1825 |
"summary": {
|
| 1826 |
+
"overall_accuracy": 1.0,
|
| 1827 |
+
"total_examples": 2400,
|
| 1828 |
"n_splits": 22
|
| 1829 |
}
|
| 1830 |
}
|
add_sub_baseline_50K/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 650266922
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1d271990035c114502ee453f5a961d61cc2caa9996b45559249726b42c2cb6a
|
| 3 |
size 650266922
|
add_sub_baseline_50K/train_config.json
CHANGED
|
@@ -1,35 +1,84 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
| 3 |
-
"ops": "add_sub",
|
| 4 |
-
"n_digits": 6,
|
| 5 |
-
"n_layer": 2,
|
| 6 |
-
"n_head": 3,
|
| 7 |
-
"n_embd": 510,
|
| 8 |
-
"abs_vocab": 0,
|
| 9 |
"K": 4,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
"alpha_info_gain": 10.0,
|
| 11 |
"alpha_abs": 0.1,
|
| 12 |
"alpha_soft_zipf": 1.0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"batch_size": 64,
|
|
|
|
| 14 |
"num_epochs": 20,
|
| 15 |
-
"
|
| 16 |
-
"
|
|
|
|
|
|
|
|
|
|
| 17 |
"output_dir": "ckpt/sweep/add_sub_baseline_50K",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
"device": "cuda",
|
| 19 |
"push_to_hub": true,
|
| 20 |
"no_wandb": false,
|
| 21 |
"n_params": 162490082,
|
| 22 |
"run_name": "add_sub_baseline_50K",
|
| 23 |
-
"git_commit": "
|
| 24 |
-
"timestamp": "2026-04-
|
| 25 |
"tokenizer": "Qwen/Qwen3-0.6B",
|
| 26 |
"dataset_repo": "thoughtworks/arithmetic-sorl-data",
|
| 27 |
"dataset_config": "add_sub_6digit",
|
| 28 |
"model_repo": "thoughtworks/arithmetic-sorl",
|
| 29 |
"trainer_version": "sft",
|
| 30 |
-
"wandb_run_id": "
|
| 31 |
-
"wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/
|
| 32 |
-
"final_accuracy":
|
| 33 |
-
"sft_accuracy":
|
| 34 |
"eval_method": "ArithmeticEvaluator"
|
| 35 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"num_rollouts": 4,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"K": 4,
|
| 4 |
+
"max_iterations": 2,
|
| 5 |
+
"memory_span_abs": 1792,
|
| 6 |
+
"memory_span_traj": 1792,
|
| 7 |
+
"temperature": 1.0,
|
| 8 |
+
"ar_search": false,
|
| 9 |
+
"response_only_abs": false,
|
| 10 |
"alpha_info_gain": 10.0,
|
| 11 |
"alpha_abs": 0.1,
|
| 12 |
"alpha_soft_zipf": 1.0,
|
| 13 |
+
"alpha_ortho": 0.0,
|
| 14 |
+
"alpha_anchor": 0.0,
|
| 15 |
+
"alpha_jacobi": 0.0,
|
| 16 |
+
"decay": 0.8,
|
| 17 |
+
"target_vocab_util": 0.8,
|
| 18 |
+
"min_abs_ppl": 0.0,
|
| 19 |
+
"zipf_alpha": 1.0,
|
| 20 |
+
"lr": 4e-05,
|
| 21 |
+
"emb_lr_mult": 1.0,
|
| 22 |
+
"weight_decay": 0.01,
|
| 23 |
+
"warmup_steps": 468,
|
| 24 |
+
"cooldown_frac": 0.4,
|
| 25 |
+
"max_grad_norm": 1.0,
|
| 26 |
+
"vq_abs_pretrain_steps": 0,
|
| 27 |
+
"vq_abs_pretrain_lr": 0.001,
|
| 28 |
+
"vq_abs_pretrain_layer": -1,
|
| 29 |
+
"vq_abs_pretrain_batch_size": 256,
|
| 30 |
+
"vq_abs_pretrain_target_vectors": 20000,
|
| 31 |
"batch_size": 64,
|
| 32 |
+
"gradient_accumulation_steps": 1,
|
| 33 |
"num_epochs": 20,
|
| 34 |
+
"emb_warmup_steps": 0,
|
| 35 |
+
"log_every": 50,
|
| 36 |
+
"eval_every": 781,
|
| 37 |
+
"save_every": 999999,
|
| 38 |
+
"eval_samples": 100,
|
| 39 |
"output_dir": "ckpt/sweep/add_sub_baseline_50K",
|
| 40 |
+
"eval_K": 4,
|
| 41 |
+
"alpha_traj": 0.0,
|
| 42 |
+
"corrupt_method": "shuffle",
|
| 43 |
+
"corrupt_ratio": 0.3,
|
| 44 |
+
"alpha_contrastive": 1.0,
|
| 45 |
+
"gamma_contrastive": 0.5,
|
| 46 |
+
"alpha_masked_traj": 0.0,
|
| 47 |
+
"mask_nl_ratio": 0.3,
|
| 48 |
+
"mask_nl_mode": "fixed",
|
| 49 |
+
"mask_nl_fixed_id": 0,
|
| 50 |
+
"use_ste": true,
|
| 51 |
+
"n_inner": 1,
|
| 52 |
+
"random_K": null,
|
| 53 |
+
"strip_suffix": null,
|
| 54 |
+
"compress_prefix": null,
|
| 55 |
+
"random_mem_span": null,
|
| 56 |
+
"warmup_ratio": 0.03,
|
| 57 |
+
"beta2": 0.999,
|
| 58 |
+
"seed": 42,
|
| 59 |
+
"n_digits": 6,
|
| 60 |
+
"n_layer": 2,
|
| 61 |
+
"n_head": 3,
|
| 62 |
+
"n_embd": 510,
|
| 63 |
+
"ops": "add_sub",
|
| 64 |
+
"abs_vocab": 0,
|
| 65 |
+
"dataset_size": 50000,
|
| 66 |
+
"mode": "baseline",
|
| 67 |
"device": "cuda",
|
| 68 |
"push_to_hub": true,
|
| 69 |
"no_wandb": false,
|
| 70 |
"n_params": 162490082,
|
| 71 |
"run_name": "add_sub_baseline_50K",
|
| 72 |
+
"git_commit": "f447da529caceac8c7d256cbb2cd185cbc50feac",
|
| 73 |
+
"timestamp": "2026-04-12T17:38:15.462759+00:00",
|
| 74 |
"tokenizer": "Qwen/Qwen3-0.6B",
|
| 75 |
"dataset_repo": "thoughtworks/arithmetic-sorl-data",
|
| 76 |
"dataset_config": "add_sub_6digit",
|
| 77 |
"model_repo": "thoughtworks/arithmetic-sorl",
|
| 78 |
"trainer_version": "sft",
|
| 79 |
+
"wandb_run_id": "2phtgprv",
|
| 80 |
+
"wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/2phtgprv",
|
| 81 |
+
"final_accuracy": 1.0,
|
| 82 |
+
"sft_accuracy": 1.0,
|
| 83 |
"eval_method": "ArithmeticEvaluator"
|
| 84 |
}
|