diff --git "a/frontend/src/components/DocumentProcessor.jsx" "b/frontend/src/components/DocumentProcessor.jsx" --- "a/frontend/src/components/DocumentProcessor.jsx" +++ "b/frontend/src/components/DocumentProcessor.jsx" @@ -11,12 +11,44 @@ import LoadingAnimation from './LoadingAnimation'; import DocumentViewer from './DocumentViewer'; import ChunkPanel from './ChunkPanel'; import ProgressBar from './ProgressBar'; +import WelcomeScreen from './WelcomeScreen'; function DocumentProcessor() { // State for PDF navigation const [pdfNavigation, setPdfNavigation] = useState(null); // State for first LLM response loading const [waitingForFirstResponse, setWaitingForFirstResponse] = useState(false); + // State for welcome screen visibility + const [showWelcomeScreen, setShowWelcomeScreen] = useState(true); + // State for document controls (like scrollToPage) + const [documentControls, setDocumentControls] = useState(null); + + // Function to get the page number of the first chunk + const getFirstChunkPage = () => { + if (testPreloadedHighlights && testPreloadedHighlights[0] && testPreloadedHighlights[0].length > 0) { + return testPreloadedHighlights[0][0].position.boundingRect.pageNumber; + } + return 1; // Default to page 1 if no highlights found + }; + + // Function to handle "Let's start" click + const handleGetStarted = () => { + setShowWelcomeScreen(false); + + // Scroll to the first chunk after a short delay to ensure the PDF viewer is ready + if (documentControls && documentControls.scrollToFirstChunk) { + setTimeout(() => { + documentControls.scrollToFirstChunk(); + }, 500); + } else { + // Retry after a longer delay + setTimeout(() => { + if (documentControls && documentControls.scrollToFirstChunk) { + documentControls.scrollToFirstChunk(); + } + }, 2000); + } + }; // Custom hooks const { fileInputRef, @@ -59,1441 +91,3384 @@ function DocumentProcessor() { } = usePanelResize(50); // Add test preloaded highlights data - keyed by chunk index - // Cian version + // Lennart version const testPreloadedHighlights = { 0: [{ - "id": "highlight_1755504511620", + "id": "highlight_1755510124371", "position": { "boundingRect": { - "x1": 177.89999389648438, - "y1": 160.8833465576172, - "x2": 829.5500183105469, - "y2": 252.83331298828125, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 + "x1": 106.35000610351562, + "y1": 875.3666687011719, + "x2": 793.3666534423828, + "y2": 1116.5166625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 }, "rects": [ { - "x1": 177.89999389648438, - "y1": 160.8833465576172, - "x2": 829.3500366210938, - "y2": 180.9833526611328, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 178.8833465576172, - "x2": 826.1000061035156, - "y2": 198.9833526611328, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 196.7333221435547, - "x2": 829.5500183105469, - "y2": 216.83331298828125, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 214.7333221435547, - "x2": 826.4333190917969, - "y2": 234.83331298828125, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 232.7333221435547, - "x2": 345.4499969482422, - "y2": 252.83331298828125, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 + "x1": 106.96665954589844, + "y1": 875.3666687011719, + "x2": 303.3500061035156, + "y2": 895.3666687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 106.96665954589844, + "y1": 909.0333251953125, + "x2": 791.4333343505859, + "y2": 929.0333251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 106.35000610351562, + "y1": 929.5, + "x2": 791.5333557128906, + "y2": 949.5, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 106.96665954589844, + "y1": 949.9666748046875, + "x2": 510.78334045410156, + "y2": 969.9666748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 436.2833251953125, + "y1": 976.316650390625, + "x2": 439.34999084472656, + "y2": 993.316650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 571.2166748046875, + "y1": 976.316650390625, + "x2": 574.2833557128906, + "y2": 993.316650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 369.66668701171875, + "y1": 986.2166748046875, + "x2": 401.4166717529297, + "y2": 1003.2166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 444, + "y1": 986.566650390625, + "x2": 513.4833679199219, + "y2": 1003.566650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 578.9500122070312, + "y1": 986.566650390625, + "x2": 648.4166564941406, + "y2": 1003.566650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 471.33331298828125, + "y1": 992.7666625976562, + "x2": 476.95001220703125, + "y2": 1002.8666687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 498.2166748046875, + "y1": 992.7666625976562, + "x2": 510.566650390625, + "y2": 1002.8666687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 606.2833251953125, + "y1": 992.7666625976562, + "x2": 611.88330078125, + "y2": 1002.8666687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 633.1500244140625, + "y1": 992.7666625976562, + "x2": 645.5166625976562, + "y2": 1002.8666687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 452.3666687011719, + "y1": 993.2833251953125, + "x2": 460.76666259765625, + "y2": 1003.3833160400391, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 587.316650390625, + "y1": 993.2833251953125, + "x2": 595.7000122070312, + "y2": 1003.3833160400391, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 145, + "y1": 995.5, + "x2": 316.9666748046875, + "y2": 1014.5, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 329.83331298828125, + "y1": 995.5, + "x2": 366.43333435058594, + "y2": 1012.5, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 404.26666259765625, + "y1": 995.5, + "x2": 428.1166687011719, + "y2": 1012.5, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 656.9833374023438, + "y1": 995.5, + "x2": 757.6833648681641, + "y2": 1012.5, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 777.5333251953125, + "y1": 995.5, + "x2": 793.3666534423828, + "y2": 1012.5, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 523.0333251953125, + "y1": 995.7166748046875, + "x2": 562.9500122070312, + "y2": 1011.7166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 153.71665954589844, + "y1": 1001.9166870117188, + "x2": 175.3333282470703, + "y2": 1012.0166778564453, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 530.6666870117188, + "y1": 1001.9166870117188, + "x2": 536.3666534423828, + "y2": 1012.0166778564453, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 738.25, + "y1": 1001.9166870117188, + "x2": 741.6000061035156, + "y2": 1012.0166778564453, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 310.95001220703125, + "y1": 1002.5499877929688, + "x2": 328.9499969482422, + "y2": 1012.6499786376953, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 366.6000061035156, + "y1": 1004.7833251953125, + "x2": 383, + "y2": 1021.7833251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 465.58331298828125, + "y1": 1004.7833251953125, + "x2": 522.6833648681641, + "y2": 1021.7833251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 600.5166625976562, + "y1": 1004.7833251953125, + "x2": 657.6166534423828, + "y2": 1021.7833251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 438.3500061035156, + "y1": 1005, + "x2": 452.7166748046875, + "y2": 1021, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 573.2833251953125, + "y1": 1005, + "x2": 587.6667022705078, + "y2": 1021, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 477, + "y1": 1011.2000122070312, + "x2": 482.70001220703125, + "y2": 1021.3000183105469, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 503.9666748046875, + "y1": 1011.2000122070312, + "x2": 516.2333526611328, + "y2": 1021.3000183105469, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 611.933349609375, + "y1": 1011.2000122070312, + "x2": 617.63330078125, + "y2": 1021.3000183105469, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 638.9166870117188, + "y1": 1011.2000122070312, + "x2": 651.1666412353516, + "y2": 1021.3000183105469, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 446.70001220703125, + "y1": 1011.8333129882812, + "x2": 464.6999969482422, + "y2": 1021.9333190917969, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 581.6500244140625, + "y1": 1011.8333129882812, + "x2": 599.6333465576172, + "y2": 1021.9333190917969, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 385.1166687011719, + "y1": 1014.2166748046875, + "x2": 400.93333435058594, + "y2": 1028.2166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 106.35000610351562, + "y1": 1035.25, + "x2": 167.4166717529297, + "y2": 1055.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 179.43333435058594, + "y1": 1035.25, + "x2": 222.78334045410156, + "y2": 1055.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 247.39999389648438, + "y1": 1035.25, + "x2": 791.8333129882812, + "y2": 1055.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 167.38333129882812, + "y1": 1041.1500244140625, + "x2": 176.53334045410156, + "y2": 1056.1500244140625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 222.76666259765625, + "y1": 1041.1500244140625, + "x2": 240.68333435058594, + "y2": 1056.1500244140625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 229.23333740234375, + "y1": 1047.683349609375, + "x2": 242.6666717529297, + "y2": 1056.1333465576172, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 106.96665954589844, + "y1": 1055.5833129882812, + "x2": 511.36668395996094, + "y2": 1075.5833129882812, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 531.2999877929688, + "y1": 1055.5833129882812, + "x2": 791.9499969482422, + "y2": 1075.5833129882812, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 511.3499755859375, + "y1": 1061.5999755859375, + "x2": 529.2666473388672, + "y2": 1076.5999755859375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 517.816650390625, + "y1": 1068.13330078125, + "x2": 531.2333221435547, + "y2": 1076.5832977294922, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 106.96665954589844, + "y1": 1076.0499877929688, + "x2": 581.6666870117188, + "y2": 1096.0499877929688, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 592.61669921875, + "y1": 1076.0499877929688, + "x2": 791.7833709716797, + "y2": 1096.0499877929688, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 581.816650390625, + "y1": 1082.066650390625, + "x2": 588.0166625976562, + "y2": 1097.066650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 + }, + { + "x1": 106.96665954589844, + "y1": 1096.5166625976562, + "x2": 791.4333343505859, + "y2": 1116.5166625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 11 } ] }, "content": { - "text": "Recurrent neural networks, long short-term memory [13] and gated recurrent [7] neural networks\r in particular, have been firmly established as state of the art approaches in sequence modeling and\r transduction problems such as language modeling and machine translation [ 35 , 2 , 5]. Numerous\r efforts have since continued to push the boundaries of recurrent language models and encoder-decoder\r architectures [38, 24, 15]." + "text": "4.1.1. From PPO to GRPO\r Proximal Policy Optimization (PPO) (Schulman et al., 2017) is an actor-critic RL algorithm that is\r widely used in the RL fine-tuning stage of LLMs (Ouyang et al., 2022). In particular, it optimizes\r LLMs by maximizing the following surrogate objective:\r Jπππ (π) = E[π βΌ π(π), π βΌ πππππ (π|π)] 1\r |π|\r |π|βοΈ\r π‘=1\r min\r \u0014 ππ (ππ‘ |π, π<π‘ )\r πππππ (ππ‘ |π, π<π‘ ) π΄π‘ , clip\r \u0012 ππ (ππ‘ |π, π<π‘ )\r πππππ (ππ‘ |π, π<π‘ ) , 1 β π, 1 + π\r \u0013\r π΄π‘\r \u0015\r , (1)\r where ππ and πππππ are the current and old policy models, and π, π are questions and outputs\r sampled from the question dataset and the old policy πππππ , respectively. π is a clipping-related\r hyper-parameter introduced in PPO for stabilizing training. π΄π‘ is the advantage, which is\r computed by applying Generalized Advantage Estimation (GAE) (Schulman et al., 2015), based" } -}], - 1: [{ - "id": "highlight_1755504528640", +}, { + "id": "highlight_1755510198577", "position": { "boundingRect": { - "x1": 177.89999389648438, - "y1": 259.71665954589844, - "x2": 829.9999694824219, - "y2": 405.5333251953125, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 + "x1": 106.35000610351562, + "y1": 489.98333740234375, + "x2": 793.6666564941406, + "y2": 678.8999938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 }, "rects": [ { - "x1": 177.89999389648438, - "y1": 259.71665954589844, - "x2": 826.1333312988281, - "y2": 279.81666564941406, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 277.71665954589844, - "x2": 826.0833435058594, - "y2": 297.81666564941406, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 295.5833282470703, - "x2": 520.6666564941406, - "y2": 315.68333435058594, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 548.25, - "y1": 295.5833282470703, - "x2": 829.9999694824219, - "y2": 315.68333435058594, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 227.5, - "y1": 302.2999725341797, - "x2": 231.35000610351562, - "y2": 316.81663513183594, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 521.5333251953125, - "y1": 302.2999725341797, - "x2": 545.8999786376953, - "y2": 316.81663513183594, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 313.5832977294922, - "x2": 826.3999938964844, - "y2": 333.6833038330078, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 331.5666961669922, - "x2": 826.1166687011719, - "y2": 351.66668701171875, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 349.5666961669922, - "x2": 829.4666442871094, - "y2": 369.66668701171875, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 367.43333435058594, - "x2": 826.7833251953125, - "y2": 387.5333251953125, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 385.43333435058594, - "x2": 542.1999816894531, - "y2": 405.5333251953125, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 + "x1": 106.96665954589844, + "y1": 489.98333740234375, + "x2": 234.93333435058594, + "y2": 509.98333740234375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 249.2833251953125, + "y1": 489.98333740234375, + "x2": 791.88330078125, + "y2": 509.98333740234375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 483.75, + "y1": 495.8833312988281, + "x2": 491.76666259765625, + "y2": 510.8833312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 235.7166748046875, + "y1": 496.6000061035156, + "x2": 250.25, + "y2": 510.6000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 510.45001220703125, + "x2": 793.4833221435547, + "y2": 530.4500122070312, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 530.7833251953125, + "x2": 791.5166473388672, + "y2": 550.7833251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 551.25, + "x2": 372.1999969482422, + "y2": 571.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 481.1333312988281, + "y1": 582.7666625976562, + "x2": 545.3000183105469, + "y2": 602.7666625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 557.7333374023438, + "y1": 583.4166870117188, + "x2": 562.7333374023438, + "y2": 603.4166870117188, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 491.38330078125, + "y1": 589.433349609375, + "x2": 500.45001220703125, + "y2": 604.433349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 513.5, + "y1": 589.433349609375, + "x2": 519.683349609375, + "y2": 604.433349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 545.5999755859375, + "y1": 589.433349609375, + "x2": 559.066650390625, + "y2": 604.433349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 324.26666259765625, + "y1": 593.9500122070312, + "x2": 360.1499938964844, + "y2": 613.9500122070312, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 370.73333740234375, + "y1": 594.6000061035156, + "x2": 399.3000183105469, + "y2": 614.6000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 413.26666259765625, + "y1": 594.6000061035156, + "x2": 475.6166687011719, + "y2": 614.6000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 570.5833129882812, + "y1": 594.6000061035156, + "x2": 578.4500122070312, + "y2": 614.6000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 774.38330078125, + "y1": 594.6000061035156, + "x2": 793.5832977294922, + "y2": 614.6000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 330.3666687011719, + "y1": 600.6166687011719, + "x2": 336.566650390625, + "y2": 615.6166687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 360.3999938964844, + "y1": 600.6166687011719, + "x2": 371.5333251953125, + "y2": 615.6166687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 400.04998779296875, + "y1": 601.3333129882812, + "x2": 414.6000061035156, + "y2": 615.3333129882812, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 476.4666748046875, + "y1": 605.25, + "x2": 486.65000915527344, + "y2": 625.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 504.23333740234375, + "y1": 605.8999938964844, + "x2": 550.0666809082031, + "y2": 625.8999938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 562.4000244140625, + "y1": 605.8999938964844, + "x2": 571.6166381835938, + "y2": 625.8999938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 486.6166687011719, + "y1": 611.933349609375, + "x2": 503.76666259765625, + "y2": 626.933349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 518.183349609375, + "y1": 611.933349609375, + "x2": 524.36669921875, + "y2": 626.933349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 550.2666625976562, + "y1": 611.933349609375, + "x2": 563.8333129882812, + "y2": 626.933349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.35000610351562, + "y1": 638.566650390625, + "x2": 160.76666259765625, + "y2": 658.566650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 174.6666717529297, + "y1": 638.566650390625, + "x2": 335.5166473388672, + "y2": 658.566650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 356.3500061035156, + "y1": 638.566650390625, + "x2": 793.6666564941406, + "y2": 658.566650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 161.10000610351562, + "y1": 644.4666748046875, + "x2": 172.21665954589844, + "y2": 659.4666748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 335.5, + "y1": 644.4666748046875, + "x2": 352.6499938964844, + "y2": 659.4666748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 658.8999938964844, + "x2": 402.93333435058594, + "y2": 678.8999938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 } ] }, "content": { - "text": "Recurrent models typically factor computation along the symbol positions of the input and output\r sequences. Aligning the positions to steps in computation time, they generate a sequence of hidden\r states ht, as a function of the previous hidden state htβ1 and the input for position t. This inherently\r sequential nature precludes parallelization within training examples, which becomes critical at longer\r sequence lengths, as memory constraints limit batching across examples. Recent work has achieved\r significant improvements in computational efficiency through factorization tricks [ 21 ] and conditional\r computation [ 32 ], while also improving model performance in case of the latter. The fundamental\r constraint of sequential computation, however, remains." + "text": "on the rewards {πβ₯π‘ } and a learned value function ππ. Thus, in PPO, a value function needs to\r be trained alongside the policy model and to mitigate over-optimization of the reward model,\r the standard approach is to add a per-token KL penalty from a reference model in the reward at\r each token (Ouyang et al., 2022), i.e.,\r ππ‘ = ππ (π, πβ€π‘ ) β π½ log ππ (ππ‘ |π, π<π‘ )\r πππ π (ππ‘ |π, π<π‘ ) , (2)\r where ππ is the reward model, πππ π is the reference model, which is usually the initial SFT model,\r and π½ is the coefficient of the KL penalty." } }], - 2: [{ - "id": "highlight_1755504541270", + 1: [{ + "id": "highlight_1755510258186", "position": { "boundingRect": { - "x1": 177.3000030517578, - "y1": 412.4166717529297, - "x2": 828.8333282470703, - "y2": 486.3833312988281, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 + "x1": 106.96665954589844, + "y1": 689.6666717529297, + "x2": 793.5833587646484, + "y2": 811.9833374023438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 }, "rects": [ { - "x1": 177.3000030517578, - "y1": 412.4166717529297, - "x2": 828.8333282470703, - "y2": 432.51666259765625, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 430.4166717529297, - "x2": 826.1333312988281, - "y2": 450.51666259765625, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 448.28334045410156, - "x2": 828.5833282470703, - "y2": 468.3833312988281, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 466.28334045410156, - "x2": 498.3333435058594, - "y2": 486.3833312988281, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 + "x1": 132.60000610351562, + "y1": 689.6666717529297, + "x2": 791.6000061035156, + "y2": 709.6666717529297, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 710.1333312988281, + "x2": 793.5833587646484, + "y2": 730.1333312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 730.6000061035156, + "x2": 791.4833221435547, + "y2": 750.6000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 751.0500030517578, + "x2": 791.5666351318359, + "y2": 771.0500030517578, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 771.5166625976562, + "x2": 791.4499969482422, + "y2": 791.5166625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 791.9833374023438, + "x2": 270.6999969482422, + "y2": 811.9833374023438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 } ] }, "content": { - "text": "Attention mechanisms have become an integral part of compelling sequence modeling and transduc-\r tion models in various tasks, allowing modeling of dependencies without regard to their distance in\r the input or output sequences [ 2, 19 ]. In all but a few cases [ 27 ], however, such attention mechanisms\r are used in conjunction with a recurrent network." + "text": "As the value function employed in PPO is typically another model of comparable size as\r the policy model, it brings a substantial memory and computational burden. Additionally,\r during RL training, the value function is treated as a baseline in the calculation of the advantage\r for variance reduction. While in the LLM context, usually only the last token is assigned a\r reward score by the reward model, which may complicate the training of a value function that is\r accurate at each token" } }], - 3: [{ - "id": "highlight_1755504577785", + 2: [{ + "id": "highlight_1755510434611", "position": { "boundingRect": { - "x1": 177.39999389648438, - "y1": 638.9333190917969, - "x2": 828.9333343505859, - "y2": 784.7333221435547, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 + "x1": 106.96665954589844, + "y1": 791.9833374023438, + "x2": 792.183349609375, + "y2": 873.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 }, "rects": [ { - "x1": 177.39999389648438, - "y1": 638.9333190917969, - "x2": 826.1166687011719, - "y2": 659.0333099365234, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 656.7999877929688, - "x2": 827.5333557128906, - "y2": 676.8999938964844, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 674.7833251953125, - "x2": 828.2166442871094, - "y2": 694.8833312988281, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 692.7833251953125, - "x2": 826.1166687011719, - "y2": 712.8833312988281, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 710.6499938964844, - "x2": 826.1999816894531, - "y2": 730.75, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 728.6499938964844, - "x2": 828.9333343505859, - "y2": 748.75, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 746.6333312988281, - "x2": 826.0833435058594, - "y2": 766.7333221435547, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 764.6333312988281, - "x2": 826.1000061035156, - "y2": 784.7333221435547, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 + "x1": 275.2166748046875, + "y1": 791.9833374023438, + "x2": 792.183349609375, + "y2": 811.9833374023438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 812.4499969482422, + "x2": 791.4999847412109, + "y2": 832.4499969482422, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 832.7833404541016, + "x2": 791.5166473388672, + "y2": 852.7833404541016, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 853.25, + "x2": 379.56666564941406, + "y2": 873.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 } ] }, "content": { - "text": "The goal of reducing sequential computation also forms the foundation of the Extended Neural GPU\r [16 ], ByteNet [ 18 ] and ConvS2S [ 9], all of which use convolutional neural networks as basic building\r block, computing hidden representations in parallel for all input and output positions. In these models,\r the number of operations required to relate signals from two arbitrary input or output positions grows\r in the distance between positions, linearly for ConvS2S and logarithmically for ByteNet. This makes\r it more difficult to learn dependencies between distant positions [12 ]. In the Transformer this is\r reduced to a constant number of operations, albeit at the cost of reduced effective resolution due\r to averaging attention-weighted positions, an effect we counteract with Multi-Head Attention as\r described in section 3.2." + "text": " To address this, as shown in Figure 4, we propose Group Relative Policy\r Optimization (GRPO), which obviates the need for additional value function approximation as\r in PPO, and instead uses the average reward of multiple sampled outputs, produced in response\r to the same question, as the baseline" } }], - 4: [{ - "id": "highlight_1755504661834", + 3: [{ + "id": "highlight_1755510464773", "position": { "boundingRect": { - "x1": 177.39999389648438, - "y1": 809.4833374023438, - "x2": 828.8500061035156, - "y2": 1027.2833557128906, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 + "x1": 106.35000610351562, + "y1": 853.25, + "x2": 794.2499847412109, + "y2": 1066.183349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 }, "rects": [ { - "x1": 177.89999389648438, - "y1": 809.4833374023438, - "x2": 826.1499938964844, - "y2": 829.5833282470703, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 827.4833374023438, - "x2": 826.0833435058594, - "y2": 847.5833282470703, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 845.4833374023438, - "x2": 828.1833190917969, - "y2": 865.5833282470703, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 863.4833374023438, - "x2": 759.1999816894531, - "y2": 883.5833282470703, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 890.4666748046875, - "x2": 828.7833557128906, - "y2": 910.5666809082031, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 908.3333129882812, - "x2": 826.1333312988281, - "y2": 928.4333190917969, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 926.3333129882812, - "x2": 374.46665954589844, - "y2": 946.4333190917969, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.39999389648438, - "y1": 953.316650390625, - "x2": 826.1333312988281, - "y2": 973.4166564941406, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 971.316650390625, - "x2": 828.8500061035156, - "y2": 991.4166564941406, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 989.183349609375, - "x2": 826.1000061035156, - "y2": 1009.2833557128906, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 1007.183349609375, - "x2": 684.2666625976562, - "y2": 1027.2833557128906, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 + "x1": 388.20001220703125, + "y1": 853.25, + "x2": 791.8666839599609, + "y2": 873.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 873.7000122070312, + "x2": 499.58335876464844, + "y2": 893.7000122070312, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 523.6666870117188, + "y1": 873.7000122070312, + "x2": 791.7833557128906, + "y2": 893.7000122070312, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 324.7166748046875, + "y1": 879.7333374023438, + "x2": 334.58331298828125, + "y2": 894.7333374023438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 499.566650390625, + "y1": 879.7333374023438, + "x2": 517.4833221435547, + "y2": 894.7333374023438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 272.2166748046875, + "y1": 880.4500122070312, + "x2": 278.3166809082031, + "y2": 894.4500122070312, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 250.7166748046875, + "y1": 880.566650390625, + "x2": 256.8166809082031, + "y2": 894.566650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 506.0333251953125, + "y1": 886.2666625976562, + "x2": 519.4666442871094, + "y2": 894.7166595458984, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 894.1666870117188, + "x2": 390.5833282470703, + "y2": 914.1666870117188, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 306.6333312988281, + "y1": 923.7166748046875, + "x2": 327.3666534423828, + "y2": 942.7166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 121.71665954589844, + "y1": 925.7166748046875, + "x2": 293.46665954589844, + "y2": 944.7166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 346.2833251953125, + "y1": 925.7166748046875, + "x2": 379.933349609375, + "y2": 942.7166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 306.6333312988281, + "y1": 927.433349609375, + "x2": 333.4166717529297, + "y2": 940.9833526611328, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 129.89999389648438, + "y1": 932.1499938964844, + "x2": 159.3333282470703, + "y2": 942.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 276.8833312988281, + "y1": 932.5166625976562, + "x2": 304.6333312988281, + "y2": 942.6166687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 327.3999938964844, + "y1": 932.7833251953125, + "x2": 345.40000915527344, + "y2": 942.8833160400391, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 293.8666687011719, + "y1": 944.5333251953125, + "x2": 296.93333435058594, + "y2": 961.5333251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 446.70001220703125, + "y1": 944.5333251953125, + "x2": 449.7666778564453, + "y2": 961.5333251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 777.5333251953125, + "y1": 951.0166625976562, + "x2": 793.3666534423828, + "y2": 968.0166625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 745.5333251953125, + "y1": 952.6666870117188, + "x2": 751.566650390625, + "y2": 969.6666870117188, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 301.70001220703125, + "y1": 953.7666625976562, + "x2": 383.0333557128906, + "y2": 969.7666625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 454.5333251953125, + "y1": 953.7666625976562, + "x2": 535.8666687011719, + "y2": 969.7666625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 244.88333129882812, + "y1": 954.1333312988281, + "x2": 248.74998474121094, + "y2": 963.1333312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 178.98333740234375, + "y1": 954.4500122070312, + "x2": 202.76666259765625, + "y2": 971.4500122070312, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 218.26666259765625, + "y1": 954.4500122070312, + "x2": 253.6999969482422, + "y2": 971.4500122070312, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 329.20001220703125, + "y1": 960.2333374023438, + "x2": 340.566650390625, + "y2": 970.3333282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 361.83331298828125, + "y1": 960.2333374023438, + "x2": 380.1166534423828, + "y2": 970.3333282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 482.0333251953125, + "y1": 960.2333374023438, + "x2": 493.4000244140625, + "y2": 970.3333282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 514.6666870117188, + "y1": 960.2333374023438, + "x2": 532.9500122070312, + "y2": 970.3333282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 310.04998779296875, + "y1": 960.4833374023438, + "x2": 318.3666534423828, + "y2": 970.5833282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 462.8833312988281, + "y1": 960.4833374023438, + "x2": 471.20001220703125, + "y2": 970.5833282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 261.8666687011719, + "y1": 963.6000061035156, + "x2": 285.71665954589844, + "y2": 980.6000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 544.5166625976562, + "y1": 963.6000061035156, + "x2": 635.0666809082031, + "y2": 980.6000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 645.11669921875, + "y1": 963.6000061035156, + "x2": 743.1999664306641, + "y2": 980.6000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 760.5499877929688, + "y1": 963.6000061035156, + "x2": 767.0333557128906, + "y2": 980.6000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 392.58331298828125, + "y1": 963.933349609375, + "x2": 438.53334045410156, + "y2": 980.933349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 677.38330078125, + "y1": 970.1499938964844, + "x2": 689.6999664306641, + "y2": 980.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 400.5, + "y1": 970.3999938964844, + "x2": 411.95001220703125, + "y2": 980.5, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 625.9666748046875, + "y1": 970.3999938964844, + "x2": 635.0666809082031, + "y2": 980.5, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 707.1500244140625, + "y1": 970.6499938964844, + "x2": 715.4500122070312, + "y2": 983.6499938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 729.88330078125, + "y1": 970.6499938964844, + "x2": 743.1999664306641, + "y2": 980.75, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 213.23333740234375, + "y1": 972.8833312988281, + "x2": 233.6666717529297, + "y2": 989.8833312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 323.26666259765625, + "y1": 972.8833312988281, + "x2": 392.2333526611328, + "y2": 989.8833312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 476.1000061035156, + "y1": 972.8833312988281, + "x2": 545.0666656494141, + "y2": 989.8833312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 177.81666564941406, + "y1": 973.2166748046875, + "x2": 186.06666564941406, + "y2": 989.2166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 295.95001220703125, + "y1": 973.2166748046875, + "x2": 310.316650390625, + "y2": 989.2166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 448.8666687011719, + "y1": 973.2166748046875, + "x2": 463.2333526611328, + "y2": 989.2166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 223.11666870117188, + "y1": 979.683349609375, + "x2": 251.6166534423828, + "y2": 992.683349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 334.8666687011719, + "y1": 979.683349609375, + "x2": 346.316650390625, + "y2": 989.7833557128906, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 367.5, + "y1": 979.683349609375, + "x2": 385.8833312988281, + "y2": 992.683349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 487.70001220703125, + "y1": 979.683349609375, + "x2": 499.1500244140625, + "y2": 989.7833557128906, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 520.4166870117188, + "y1": 979.683349609375, + "x2": 538.7000122070312, + "y2": 992.683349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 304.29998779296875, + "y1": 979.933349609375, + "x2": 322.3833465576172, + "y2": 990.0333557128906, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 457.2166748046875, + "y1": 979.933349609375, + "x2": 475.21665954589844, + "y2": 990.0333557128906, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 192.28334045410156, + "y1": 982.433349609375, + "x2": 207.38333129882812, + "y2": 996.433349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 235.79998779296875, + "y1": 984.8833312988281, + "x2": 239.1499786376953, + "y2": 994.9833374023438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.35000610351562, + "y1": 1005.25, + "x2": 436.316650390625, + "y2": 1025.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 452.8999938964844, + "y1": 1005.25, + "x2": 791.7166442871094, + "y2": 1025.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 436.2833251953125, + "y1": 1011.1499938964844, + "x2": 448.75, + "y2": 1026.1499938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 1025.7166748046875, + "x2": 794.2499847412109, + "y2": 1045.7166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 1046.183349609375, + "x2": 147.31666564941406, + "y2": 1066.183349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 } ] }, "content": { - "text": "Self-attention, sometimes called intra-attention is an attention mechanism relating different positions\r of a single sequence in order to compute a representation of the sequence. Self-attention has been\r used successfully in a variety of tasks including reading comprehension, abstractive summarization,\r textual entailment and learning task-independent sentence representations [4, 27, 28, 22].\r End-to-end memory networks are based on a recurrent attention mechanism instead of sequence-\r aligned recurrence and have been shown to perform well on simple-language question answering and\r language modeling tasks [34].\r To the best of our knowledge, however, the Transformer is the first transduction model relying\r entirely on self-attention to compute representations of its input and output without using sequence-\r aligned RNNs or convolution. In the following sections, we will describe the Transformer, motivate\r self-attention and discuss its advantages over models such as [17, 18] and [9]." + "text": "More specifically, for each question π, GRPO samples a\r group of outputs {π1, π2, Β· Β· Β· , ππΊ } from the old policy πππππ and then optimizes the policy model\r by maximizing the following objective:\r JπΊπ ππ (π) = E[π βΌ π(π), {ππ }πΊ\r π=1 βΌ πππππ (π|π)]\r 1\r πΊ\r πΊβοΈ\r π=1\r 1\r |ππ |\r |ππ |βοΈ\r π‘=1\r \u001a\r min\r \u0014 ππ (ππ,π‘ |π, ππ,<π‘ )\r πππππ (ππ,π‘ |π, ππ,<π‘ ) Λπ΄π,π‘ , clip\r \u0012 ππ (ππ,π‘ |π, ππ,<π‘ )\r πππππ (ππ,π‘ |π, ππ,<π‘ ) , 1 β π, 1 + π\r \u0013\r Λπ΄π,π‘\r \u0015\r β π½DπΎπΏ\r \u0002ππ ||πππ π\r \u0003 \u001b\r , (3)\r where π and π½ are hyper-parameters, and Λπ΄π,π‘ is the advantage calculated based on relative\r rewards of the outputs inside each group only, which will be detailed in the following subsec-\r tions." } }], - 5: [{ - "id": "highlight_1755504696254", + 4: [{ + "id": "highlight_1755510508789", "position": { "boundingRect": { - "x1": 177.89999389648438, - "y1": 1098.8499755859375, - "x2": 832.8833465576172, - "y2": 1190.7999725341797, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 + "x1": 106.51666259765625, + "y1": 1046.183349609375, + "x2": 794.9166717529297, + "y2": 1148.933349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 }, "rects": [ { - "x1": 177.89999389648438, - "y1": 1098.8499755859375, - "x2": 832.8833465576172, - "y2": 1118.9499816894531, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 1116.8499755859375, - "x2": 829.5166931152344, - "y2": 1136.9499816894531, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 671.316650390625, - "y1": 1123.566650390625, - "x2": 677.1333160400391, - "y2": 1138.0833282470703, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 716.2833251953125, - "y1": 1123.566650390625, - "x2": 722.316650390625, - "y2": 1138.0833282470703, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 1134.8333129882812, - "x2": 828.5333251953125, - "y2": 1154.9333038330078, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 436.45001220703125, - "y1": 1141.4332885742188, - "x2": 442.2666778564453, - "y2": 1155.949966430664, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 479.70001220703125, - "y1": 1141.4332885742188, - "x2": 485.7333526611328, - "y2": 1155.949966430664, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 1152.8333129882812, - "x2": 301.25001525878906, - "y2": 1172.9333038330078, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 313.98333740234375, - "y1": 1152.8333129882812, - "x2": 826.9166564941406, - "y2": 1172.9333038330078, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 257.83331298828125, - "y1": 1159.4332885742188, - "x2": 263.6499786376953, - "y2": 1173.949966430664, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 301.48333740234375, - "y1": 1159.4332885742188, - "x2": 310.6499938964844, - "y2": 1173.949966430664, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 - }, - { - "x1": 177.89999389648438, - "y1": 1170.6999816894531, - "x2": 804.1000061035156, - "y2": 1190.7999725341797, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 2 + "x1": 151.38333129882812, + "y1": 1046.183349609375, + "x2": 791.4166564941406, + "y2": 1066.183349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 1066.6499938964844, + "x2": 791.4999847412109, + "y2": 1086.6499938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 1086.9833374023438, + "x2": 792.1000213623047, + "y2": 1106.9833374023438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.51666259765625, + "y1": 1107.4500122070312, + "x2": 791.4166870117188, + "y2": 1127.4500122070312, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 106.96665954589844, + "y1": 1127.8999938964844, + "x2": 779.6499633789062, + "y2": 1147.8999938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 790.75, + "y1": 1127.8999938964844, + "x2": 794.9166717529297, + "y2": 1147.8999938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 + }, + { + "x1": 779.5999755859375, + "y1": 1133.933349609375, + "x2": 792.0833129882812, + "y2": 1148.933349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 13 } ] }, "content": { - "text": "Most competitive neural sequence transduction models have an encoder-decoder structure [ 5, 2 , 35].\r Here, the encoder maps an input sequence of symbol representations (x1, ..., xn) to a sequence\r of continuous representations z = (z1, ..., zn). Given z, the decoder then generates an output\r sequence (y1, ..., ym) of symbols one element at a time. At each step the model is auto-regressive\r [10], consuming the previously generated symbols as additional input when generating the next." + "text": "The group relative way that GRPO leverages to calculate the advantages, aligns well with\r the comparative nature of rewards models, as reward models are typically trained on datasets\r of comparisons between outputs on the same question. Also note that, instead of adding KL\r penalty in the reward, GRPO regularizes by directly adding the KL divergence between the\r trained policy and the reference policy to the loss, avoiding complicating the calculation of Λπ΄π,π‘ ." } }], - 6: [{ - "id": "highlight_1755504745750", + 5: [{ + "id": "highlight_1755510538733", "position": { "boundingRect": { - "x1": 177.3000030517578, - "y1": 826.0499877929688, - "x2": 829.4333343505859, - "y2": 1099.6499938964844, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 + "x1": 106.35000610351562, + "y1": 458.6000061035156, + "x2": 793.5333557128906, + "y2": 584.8500061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 }, "rects": [ { - "x1": 177.89999389648438, - "y1": 826.0499877929688, - "x2": 247.6333465576172, - "y2": 846.1499938964844, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 260.25, - "y1": 826.0499877929688, - "x2": 828.5832977294922, - "y2": 846.1499938964844, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.89999389648438, - "y1": 843.9166564941406, - "x2": 828.8666687011719, - "y2": 864.0166473388672, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.3000030517578, - "y1": 861.9166564941406, - "x2": 829.4333343505859, - "y2": 882.0166473388672, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.89999389648438, - "y1": 879.8999938964844, - "x2": 828.3499755859375, - "y2": 899.9999847412109, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.89999389648438, - "y1": 897.8999938964844, - "x2": 828.4833374023438, - "y2": 917.9999847412109, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.89999389648438, - "y1": 915.7666625976562, - "x2": 826.1833190917969, - "y2": 935.8666687011719, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.89999389648438, - "y1": 933.7666625976562, - "x2": 433.48333740234375, - "y2": 953.8666687011719, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 467.5, - "y1": 933.7666625976562, - "x2": 513.5666809082031, - "y2": 953.8666687011719, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 433.433349609375, - "y1": 940.3499755859375, - "x2": 464.7666778564453, - "y2": 954.8666381835938, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.89999389648438, - "y1": 971.8333129882812, - "x2": 244.10000610351562, - "y2": 991.9333038330078, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 256.83331298828125, - "y1": 971.8333129882812, - "x2": 828.5499877929688, - "y2": 991.9333038330078, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.89999389648438, - "y1": 989.8333129882812, - "x2": 826.1333312988281, - "y2": 1009.9333038330078, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.89999389648438, - "y1": 1007.6999816894531, - "x2": 826.1333312988281, - "y2": 1027.7999877929688, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.89999389648438, - "y1": 1025.6999816894531, - "x2": 826.1000061035156, - "y2": 1045.7999877929688, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.89999389648438, - "y1": 1043.6833190917969, - "x2": 826.1833190917969, - "y2": 1063.7833251953125, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.89999389648438, - "y1": 1061.6833190917969, - "x2": 826.1000061035156, - "y2": 1081.7833251953125, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.89999389648438, - "y1": 1079.5499877929688, - "x2": 752.1666564941406, - "y2": 1099.6499938964844, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 + "x1": 106.35000610351562, + "y1": 458.6000061035156, + "x2": 791.3333435058594, + "y2": 478.6000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 479.06666564941406, + "x2": 453.6999969482422, + "y2": 499.06666564941406, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 373.433349609375, + "y1": 511.18333435058594, + "x2": 382.70001220703125, + "y2": 530.1833343505859, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 512.3333129882812, + "y1": 511.18333435058594, + "x2": 521.5999908447266, + "y2": 530.1833343505859, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 353.0333251953125, + "y1": 511.5, + "x2": 356.4333190917969, + "y2": 529.5, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 422.70001220703125, + "y1": 512.2666625976562, + "x2": 468.6166534423828, + "y2": 531.2666625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 561.5999755859375, + "y1": 512.2666625976562, + "x2": 607.5999908447266, + "y2": 531.2666625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 373.433349609375, + "y1": 514.75, + "x2": 423.28334045410156, + "y2": 528.75, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 512.3333129882812, + "y1": 514.75, + "x2": 562.1666412353516, + "y2": 528.75, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 446.9666748046875, + "y1": 519.8333282470703, + "x2": 465.26666259765625, + "y2": 529.9333343505859, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 521.5833129882812, + "y1": 519.8333282470703, + "x2": 537.2666625976562, + "y2": 529.9333343505859, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 585.86669921875, + "y1": 519.8333282470703, + "x2": 604.1500244140625, + "y2": 529.9333343505859, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 298.4666748046875, + "y1": 522.3666687011719, + "x2": 332.183349609375, + "y2": 541.3666687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 261.683349609375, + "y1": 523.5833282470703, + "x2": 270.95001220703125, + "y2": 541.5833282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 474.566650390625, + "y1": 523.5833282470703, + "x2": 511.4166717529297, + "y2": 541.5833282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 613.4666748046875, + "y1": 523.5833282470703, + "x2": 640.6999816894531, + "y2": 541.5833282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 775.9166870117188, + "y1": 523.5833282470703, + "x2": 793.5333557128906, + "y2": 541.5833282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 358.51666259765625, + "y1": 524.7833404541016, + "x2": 398.3666534423828, + "y2": 539.8333282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 276.16668701171875, + "y1": 528.6999969482422, + "x2": 345.5833282470703, + "y2": 542.6999969482422, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 377.75, + "y1": 532.6666717529297, + "x2": 407.24998474121094, + "y2": 551.6666717529297, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 516.6500244140625, + "y1": 532.6666717529297, + "x2": 546.1500091552734, + "y2": 551.6666717529297, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 418.29998779296875, + "y1": 533.8666687011719, + "x2": 442.40000915527344, + "y2": 552.8666687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 557.2833251953125, + "y1": 533.8666687011719, + "x2": 581.3000335693359, + "y2": 552.8666687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 377.75, + "y1": 536.2333374023438, + "x2": 468.1666717529297, + "y2": 554.2333374023438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 516.6500244140625, + "y1": 536.2333374023438, + "x2": 607.0666809082031, + "y2": 554.2333374023438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 387.01666259765625, + "y1": 541.4333343505859, + "x2": 395.316650390625, + "y2": 551.5333404541016, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 407.51666259765625, + "y1": 541.4333343505859, + "x2": 418.8666534423828, + "y2": 551.5333404541016, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 442.66668701171875, + "y1": 541.4333343505859, + "x2": 460.95001220703125, + "y2": 551.5333404541016, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 526, + "y1": 541.4333343505859, + "x2": 534.3000030517578, + "y2": 551.5333404541016, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 546.4000244140625, + "y1": 541.4333343505859, + "x2": 557.8499908447266, + "y2": 551.5333404541016, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 581.5499877929688, + "y1": 541.4333343505859, + "x2": 599.8333129882812, + "y2": 551.5333404541016, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.35000610351562, + "y1": 564.8500061035156, + "x2": 360.01666259765625, + "y2": 584.8500061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 } ] }, "content": { - "text": "Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two\r sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position-\r wise fully connected feed-forward network. We employ a residual connection [ 11 ] around each of\r the two sub-layers, followed by layer normalization [1]. That is, the output of each sub-layer is\r LayerNorm(x + Sublayer(x)), where Sublayer(x) is the function implemented by the sub-layer\r itself. To facilitate these residual connections, all sub-layers in the model, as well as the embedding\r layers, produce outputs of dimension dmodel = 512.\r Decoder: The decoder is also composed of a stack of N = 6 identical layers. In addition to the two\r sub-layers in each encoder layer, the decoder inserts a third sub-layer, which performs multi-head\r attention over the output of the encoder stack. Similar to the encoder, we employ residual connections\r around each of the sub-layers, followed by layer normalization. We also modify the self-attention\r sub-layer in the decoder stack to prevent positions from attending to subsequent positions. This\r masking, combined with fact that the output embeddings are offset by one position, ensures that the\r predictions for position i can depend only on the known outputs at positions less than i." + "text": "And different from the KL penalty term used in (2), we estimate the KL divergence with the\r following unbiased estimator (Schulman, 2020):\r DπΎπΏ\r \u0002ππ ||πππ π\r \u0003 = πππ π (ππ,π‘ |π, ππ,<π‘ )\r ππ (ππ,π‘ |π, ππ,<π‘ ) β log πππ π (ππ,π‘ |π, ππ,<π‘ )\r ππ (ππ,π‘ |π, ππ,<π‘ ) β 1, (4)\r which is guaranteed to be positive." } }], - 7: [{ - "id": "highlight_1755504777374", + 6: [{ + "id": "highlight_1755510571191", "position": { "boundingRect": { - "x1": 177.3000030517578, - "y1": 1119.8500213623047, - "x2": 828.1166534423828, - "y2": 1190.8000030517578, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 + "x1": 106.51666259765625, + "y1": 644.6666717529297, + "x2": 792.4666442871094, + "y2": 789.6166687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 }, "rects": [ { - "x1": 177.89999389648438, - "y1": 1119.8500213623047, - "x2": 202.06666564941406, - "y2": 1139.9500274658203, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 214.8000030517578, - "y1": 1119.8500213623047, - "x2": 280.93333435058594, - "y2": 1139.9500274658203, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.3000030517578, - "y1": 1152.8333740234375, - "x2": 828.1166534423828, - "y2": 1172.9333801269531, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 - }, - { - "x1": 177.3000030517578, - "y1": 1170.7000122070312, - "x2": 826.1166534423828, - "y2": 1190.8000030517578, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 3 + "x1": 106.96665954589844, + "y1": 644.6666717529297, + "x2": 791.8499755859375, + "y2": 664.6666717529297, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 576.61669921875, + "y1": 650.6999969482422, + "x2": 586.4833374023438, + "y2": 665.6999969482422, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 524.0166625976562, + "y1": 651.4166717529297, + "x2": 530.1166687011719, + "y2": 665.4166717529297, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 502.5333251953125, + "y1": 651.5333404541016, + "x2": 508.63331604003906, + "y2": 665.5333404541016, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.51666259765625, + "y1": 665.1333312988281, + "x2": 221.53334045410156, + "y2": 685.1333312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 241.4666748046875, + "y1": 665.1333312988281, + "x2": 791.9000091552734, + "y2": 685.1333312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 221.5, + "y1": 671.1499938964844, + "x2": 239.43333435058594, + "y2": 686.1499938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 227.98333740234375, + "y1": 677.5666656494141, + "x2": 241.39999389648438, + "y2": 686.0166625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 685.6000061035156, + "x2": 791.6000061035156, + "y2": 705.6000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 216.28334045410156, + "y1": 691.6166687011719, + "x2": 226.1666717529297, + "y2": 706.6166687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 165.5833282470703, + "y1": 692.3333282470703, + "x2": 171.68333435058594, + "y2": 706.3333282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 145.89999389648438, + "y1": 692.4666595458984, + "x2": 152, + "y2": 706.4666595458984, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 706.066650390625, + "x2": 791.5833587646484, + "y2": 726.066650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 726.5166625976562, + "x2": 669.5166625976562, + "y2": 746.5166625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 684.7666625976562, + "y1": 726.5166625976562, + "x2": 791.9833374023438, + "y2": 746.5166625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 472.683349609375, + "y1": 732.5499877929688, + "x2": 478.33331298828125, + "y2": 747.5499877929688, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 669.4833374023438, + "y1": 732.5499877929688, + "x2": 681.9500122070312, + "y2": 747.5499877929688, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 482.75, + "y1": 744.75, + "x2": 543.1333312988281, + "y2": 759.75, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 749.1499938964844, + "x2": 421.75, + "y2": 769.1499938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 546.316650390625, + "y1": 749.1499938964844, + "x2": 792.4666442871094, + "y2": 769.1499938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 487.33331298828125, + "y1": 751.0333251953125, + "x2": 491.68333435058594, + "y2": 759.4833221435547, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 421.7166748046875, + "y1": 755.1666870117188, + "x2": 486.3000030517578, + "y2": 770.1666870117188, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 497.5, + "y1": 759.316650390625, + "x2": 531.4999847412109, + "y2": 773.316650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 769.6166687011719, + "x2": 463.21665954589844, + "y2": 789.6166687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 } ] }, "content": { - "text": "3.2 Attention\r An attention function can be described as mapping a query and a set of key-value pairs to an output,\r where the query, keys, values, and output are all vectors. The output is computed as a weighted sum" + "text": "Formally, for each question π, a group of outputs {π1, π2, Β· Β· Β· , ππΊ } are sampled from the old\r policy model πππππ . A reward model is then used to score the outputs, yielding πΊ rewards\r r = {π1, π2, Β· Β· Β· , ππΊ } correspondingly. Subsequently, these rewards are normalized by subtracting\r the group average and dividing by the group standard deviation. Outcome supervision provides\r the normalized reward at the end of each output ππ and sets the advantages Λπ΄π,π‘ of all tokens in\r the output as the normalized reward, i.e., Λπ΄π,π‘ = eππ = ππ βmean(r)\r std(r) , and then optimizes the policy by\r maximizing the objective defined in equation (3)." } -}, { - "id": "highlight_1755504793920", +}], + 7: [{ + "id": "highlight_1755510596028", "position": { "boundingRect": { - "x1": 177.89999389648438, - "y1": 520, - "x2": 826.0500183105469, - "y2": 557.9666748046875, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 + "x1": 106.51666259765625, + "y1": 849.433349609375, + "x2": 793.9666290283203, + "y2": 1087.1499938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 }, "rects": [ { - "x1": 177.89999389648438, - "y1": 520, - "x2": 826.0500183105469, - "y2": 540.1000061035156, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 537.8666687011719, - "x2": 399.73333740234375, - "y2": 557.9666748046875, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 + "x1": 106.96665954589844, + "y1": 849.433349609375, + "x2": 791.4833221435547, + "y2": 869.433349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 869.8999938964844, + "x2": 791.4333343505859, + "y2": 889.8999938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 890.3500061035156, + "x2": 791.4666595458984, + "y2": 910.3500061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 910.816650390625, + "x2": 791.9500122070312, + "y2": 930.816650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 758.75, + "y1": 916.8333129882812, + "x2": 768.61669921875, + "y2": 931.8333129882812, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 706.1500244140625, + "y1": 917.566650390625, + "x2": 712.2500305175781, + "y2": 931.566650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 684.5833129882812, + "y1": 917.683349609375, + "x2": 690.6833190917969, + "y2": 931.683349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.51666259765625, + "y1": 931.2833251953125, + "x2": 793.6499633789062, + "y2": 951.2833251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 159.56666564941406, + "y1": 949.3833312988281, + "x2": 202.5833282470703, + "y2": 964.3833312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 249.36666870117188, + "y1": 949.3833312988281, + "x2": 299.2333221435547, + "y2": 964.3833312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 361.2166748046875, + "y1": 949.3833312988281, + "x2": 404.2333221435547, + "y2": 964.3833312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 451.01666259765625, + "y1": 949.3833312988281, + "x2": 502.2333221435547, + "y2": 964.3833312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 954.0333251953125, + "x2": 158.78334045410156, + "y2": 974.0333251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 204.6999969482422, + "y1": 954.0333251953125, + "x2": 248.59999084472656, + "y2": 974.0333251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 301.7833251953125, + "y1": 954.0333251953125, + "x2": 360.5166778564453, + "y2": 974.0333251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 406.33331298828125, + "y1": 954.0333251953125, + "x2": 450.33331298828125, + "y2": 974.0333251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 504.683349609375, + "y1": 954.0333251953125, + "x2": 792.2833251953125, + "y2": 974.0333251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 289.6499938964844, + "y1": 954.6333312988281, + "x2": 296.31666564941406, + "y2": 965.6333312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 491.29998779296875, + "y1": 955.7833251953125, + "x2": 498.7833709716797, + "y2": 964.2333221435547, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 360.316650390625, + "y1": 962.9833374023438, + "x2": 370.183349609375, + "y2": 977.9833374023438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 450.1166687011719, + "y1": 962.9833374023438, + "x2": 460, + "y2": 977.9833374023438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 158.75, + "y1": 964.5833129882812, + "x2": 167.5, + "y2": 978.5833129882812, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 248.64999389648438, + "y1": 964.5833129882812, + "x2": 257.29998779296875, + "y2": 978.5833129882812, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 974.5, + "x2": 791.7666625976562, + "y2": 994.5, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 266.183349609375, + "y1": 980.5166625976562, + "x2": 271.83331298828125, + "y2": 995.5166625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 588.2000122070312, + "y1": 996.7833251953125, + "x2": 683.1999816894531, + "y2": 1011.7833251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 523.8333129882812, + "y1": 998.7000122070312, + "x2": 565.4333190917969, + "y2": 1013.7000122070312, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 1003.3500061035156, + "x2": 523.4166870117188, + "y2": 1023.3500061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 686.38330078125, + "y1": 1003.3500061035156, + "x2": 793.9666290283203, + "y2": 1023.3500061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 592.7999877929688, + "y1": 1003.9666748046875, + "x2": 597.1333465576172, + "y2": 1014.9666748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 572.11669921875, + "y1": 1004.8500061035156, + "x2": 591.7499847412109, + "y2": 1021.3333282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 523.38330078125, + "y1": 1012.6666870117188, + "x2": 529.0333251953125, + "y2": 1027.6666870117188, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 618.2333374023438, + "y1": 1013.5166625976562, + "x2": 656.2833251953125, + "y2": 1027.5166625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 1023.816650390625, + "x2": 791.4666595458984, + "y2": 1043.816650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 424.316650390625, + "y1": 1035.25, + "x2": 428.4666442871094, + "y2": 1055.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 504.23333740234375, + "y1": 1042.0333251953125, + "x2": 545.7333221435547, + "y2": 1057.0333251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 1046.683349609375, + "x2": 394.33331298828125, + "y2": 1066.683349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 495.51666259765625, + "y1": 1046.683349609375, + "x2": 503.71665954589844, + "y2": 1066.683349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 547.8499755859375, + "y1": 1046.683349609375, + "x2": 792.4333038330078, + "y2": 1066.683349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 394.29998779296875, + "y1": 1052.7166748046875, + "x2": 509.3499755859375, + "y2": 1067.7166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 1067.1499938964844, + "x2": 463.21665954589844, + "y2": 1087.1499938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 } ] }, "content": { - "text": "of the values, where the weight assigned to each value is computed by a compatibility function of the\r query with the corresponding key." + "text": "Outcome supervision only provides a reward at the end of each output, which may not be\r sufficient and efficient to supervise the policy in complex mathematical tasks. Following Wang\r et al. (2023b), we also explore process supervision, which provides a reward at the end of\r each reasoning step. Formally, given the question π and πΊ sampled outputs {π1, π2, Β· Β· Β· , ππΊ }, a\r process reward model is used to score each step of the outputs, yielding corresponding rewards:\r R = {{ππππππ₯ (1)\r 1 , Β· Β· Β· , ππππππ₯ (πΎ1 )\r 1 }, Β· Β· Β· , {ππππππ₯ (1)\r πΊ , Β· Β· Β· , ππππππ₯ (πΎπΊ )\r πΊ }}, where πππππ₯ ( π) is the end token index\r of the π-th step, and πΎπ is the total number of steps in the π-th output. We also normalize these\r rewards with the average and the standard deviation, i.e., eππππππ₯ ( π)\r π = ππππππ₯ ( π)\r π βmean(R)\r std(R) . Subsequently,\r the process supervision calculates the advantage of each token as the sum of the normalized\r rewards from the following steps, i.e., Λπ΄π,π‘ = Γπππππ₯ ( π) β₯π‘ eππππππ₯ ( π)\r π , and then optimizes the policy by\r maximizing the objective defined in equation (3)." } }], 8: [{ - "id": "highlight_1755504830102", + "id": "highlight_1755510638776", "position": { "boundingRect": { - "x1": 177.10000610351562, - "y1": 606.5833282470703, - "x2": 831.0499877929688, - "y2": 810.25, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 + "x1": 106.35000610351562, + "y1": 163.98333740234375, + "x2": 791.8666839599609, + "y2": 306.6333312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 15 }, "rects": [ { - "x1": 177.10000610351562, - "y1": 606.5833282470703, - "x2": 826.0666809082031, - "y2": 626.6833343505859, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 624.4499969482422, - "x2": 828.5833587646484, - "y2": 644.5500030517578, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 411.8666687011719, - "y1": 629.6666717529297, - "x2": 420.03334045410156, - "y2": 649.7666625976562, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 390.08331298828125, - "y1": 631.1833343505859, - "x2": 395.29998779296875, - "y2": 645.6999969482422, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 578.88330078125, - "y1": 631.1833343505859, - "x2": 588.0333557128906, - "y2": 645.6999969482422, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 642.4499969482422, - "x2": 411.43333435058594, - "y2": 662.5500030517578, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 425.4666748046875, - "y1": 642.4499969482422, - "x2": 827.6499938964844, - "y2": 662.5500030517578, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 434.0333251953125, - "y1": 649.1666717529297, - "x2": 439.24998474121094, - "y2": 663.6833343505859, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.5, - "y1": 660.4499969482422, - "x2": 222.8333282470703, - "y2": 680.5500030517578, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 687.4333343505859, - "x2": 826.3833312988281, - "y2": 707.5333404541016, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 705.4333343505859, - "x2": 829.4833221435547, - "y2": 725.5333404541016, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 723.2999877929688, - "x2": 336.43333435058594, - "y2": 743.3999786376953, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 613.7666625976562, - "y1": 764.0499877929688, - "x2": 619.3666687011719, - "y2": 778.566650390625, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 585.7333374023438, - "y1": 765.7999877929688, - "x2": 612.4833374023438, - "y2": 785.8999786376953, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 362.26666259765625, - "y1": 776.8833312988281, - "x2": 597.9333343505859, - "y2": 796.9833374023438, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 626.066650390625, - "y1": 776.8833312988281, - "x2": 645.7833251953125, - "y2": 796.9833374023438, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 812.0333251953125, - "y1": 776.8833312988281, - "x2": 831.0499877929688, - "y2": 796.9833374023438, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 603.4833374023438, - "y1": 789.1499938964844, - "x2": 612.1000061035156, - "y2": 809.25, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 611.9500122070312, - "y1": 795.7333374023438, - "x2": 617.1666870117188, - "y2": 810.25, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 + "x1": 106.35000610351562, + "y1": 163.98333740234375, + "x2": 791.4333190917969, + "y2": 183.98333740234375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 15 + }, + { + "x1": 106.96665954589844, + "y1": 184.45001220703125, + "x2": 791.8666839599609, + "y2": 204.45001220703125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 15 + }, + { + "x1": 106.35000610351562, + "y1": 204.89999389648438, + "x2": 791.4833068847656, + "y2": 224.89999389648438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 15 + }, + { + "x1": 106.96665954589844, + "y1": 225.36666870117188, + "x2": 791.5166473388672, + "y2": 245.36666870117188, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 15 + }, + { + "x1": 106.96665954589844, + "y1": 245.83331298828125, + "x2": 791.5333099365234, + "y2": 265.83331298828125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 15 + }, + { + "x1": 106.96665954589844, + "y1": 266.16668701171875, + "x2": 791.4166717529297, + "y2": 286.16668701171875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 15 + }, + { + "x1": 106.96665954589844, + "y1": 286.6333312988281, + "x2": 249.06666564941406, + "y2": 306.6333312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 15 } ] }, "content": { - "text": "We call our particular attention \"Scaled Dot-Product Attention\" (Figure 2). The input consists of\r queries and keys of dimension dk, and values of dimension dv . We compute the dot products of the\r query with all keys, divide each by βdk, and apply a softmax function to obtain the weights on the\r values.\r In practice, we compute the attention function on a set of queries simultaneously, packed together\r into a matrix Q. The keys and values are also packed together into matrices K and V . We compute\r the matrix of outputs as:\r Attention(Q, K, V ) = softmax( QKT\r βdk\r )V (1)" + "text": "As the reinforcement learning training process progresses, the old reward model may not be\r sufficient to supervise the current policy model. Therefore, we also explore the iterative RL\r with GRPO. As shown in Algorithm 1, in iterative GRPO, we generate new training sets for the\r reward model based on the sampling results from the policy model and continually train the\r old reward model using a replay mechanism that incorporates 10% of historical data. Then, we\r set the reference model as the policy model, and continually train the policy model with the\r new reward model." } -}], - 9: [{ - "id": "highlight_1755504856607", +}, { + "id": "highlight_1755510653135", "position": { "boundingRect": { - "x1": 177.10000610351562, - "y1": 820.183349609375, - "x2": 831.7500305175781, - "y2": 1020.2000122070312, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 + "x1": 106.35000610351562, + "y1": 129.78334045410156, + "x2": 718.7999877929688, + "y2": 425, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 }, "rects": [ { - "x1": 177.39999389648438, - "y1": 820.183349609375, - "x2": 831.7500305175781, - "y2": 840.2833557128906, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 838.183349609375, - "x2": 826.3999938964844, - "y2": 858.2833557128906, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 856.0499877929688, - "x2": 195.4166717529297, - "y2": 876.1499786376953, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 224.46665954589844, - "y1": 856.0499877929688, - "x2": 826.3500213623047, - "y2": 876.1499786376953, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 197.76666259765625, - "y1": 858.3333129882812, - "x2": 212.64999389648438, - "y2": 872.8499908447266, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 208.5500030517578, - "y1": 867.2000122070312, - "x2": 219.03334045410156, - "y2": 881.7166748046875, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 215.3000030517578, - "y1": 873.0166625976562, - "x2": 219.03334045410156, - "y2": 881.2166595458984, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 877.816650390625, - "x2": 826.1000061035156, - "y2": 897.9166564941406, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 895.683349609375, - "x2": 826.0833435058594, - "y2": 915.7833557128906, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 913.683349609375, - "x2": 355.03334045410156, - "y2": 933.7833557128906, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.10000610351562, - "y1": 940.6666870117188, - "x2": 357.0666809082031, - "y2": 960.7666778564453, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 369.1166687011719, - "y1": 940.6666870117188, - "x2": 827.2833557128906, - "y2": 960.7666778564453, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 357.0333251953125, - "y1": 947.2666625976562, - "x2": 366.5833282470703, - "y2": 961.7833404541016, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 958.6666870117188, - "x2": 562.6999816894531, - "y2": 978.7666778564453, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 574.75, - "y1": 958.6666870117188, - "x2": 828.6000213623047, - "y2": 978.7666778564453, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 562.5499877929688, - "y1": 965.2666625976562, - "x2": 572.1166687011719, - "y2": 979.7833404541016, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 976.6666870117188, - "x2": 826.1999664306641, - "y2": 996.7666778564453, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 186.36666870117188, - "y1": 983.25, - "x2": 191.5833282470703, - "y2": 997.7666625976562, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 351.2833251953125, - "y1": 992.7833251953125, - "x2": 740.8499908447266, - "y2": 1007.2999877929688, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 994.5333251953125, - "x2": 723.4999847412109, - "y2": 1014.6333160400391, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 752.6666870117188, - "y1": 994.5333251953125, - "x2": 756.8000183105469, - "y2": 1014.6333160400391, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 725.8499755859375, - "y1": 998.2000122070312, - "x2": 731.5666351318359, - "y2": 1009.683349609375, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 736.63330078125, - "y1": 1005.683349609375, - "x2": 747.2333374023438, - "y2": 1020.2000122070312, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 743.5, - "y1": 1011.5, - "x2": 747.2333374023438, - "y2": 1019.6999969482422, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 + "x1": 106.35000610351562, + "y1": 129.78334045410156, + "x2": 528.1833343505859, + "y2": 149.78334045410156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 153.3333282470703, + "x2": 291.816650390625, + "y2": 172.3333282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 311.5, + "y1": 153.3333282470703, + "x2": 715.3333129882812, + "y2": 172.3333282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 422.9666748046875, + "y1": 155.81666564941406, + "x2": 436.6499786376953, + "y2": 169.6666717529297, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 676.0333251953125, + "y1": 155.81666564941406, + "x2": 715.3333129882812, + "y2": 169.6666717529297, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 291.79998779296875, + "y1": 158.5833282470703, + "x2": 311.8333435058594, + "y2": 172.5833282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 428.816650390625, + "y1": 161.03334045410156, + "x2": 436.6499786376953, + "y2": 171.13333129882812, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 297.6499938964844, + "y1": 164.3333282470703, + "x2": 311.8333435058594, + "y2": 172.61666870117188, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 114.88333129882812, + "y1": 175.3333282470703, + "x2": 290.7666778564453, + "y2": 192.3333282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 232.75, + "y1": 182, + "x2": 241.0500030517578, + "y2": 195, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 278.41668701171875, + "y1": 185.3000030517578, + "x2": 290.7666778564453, + "y2": 193.5833282470703, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 114.88333129882812, + "y1": 193.36666870117188, + "x2": 296.3666687011719, + "y2": 210.36666870117188, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 155.1666717529297, + "y1": 210.5333251953125, + "x2": 274.816650390625, + "y2": 228.5333251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 294.5, + "y1": 210.5333251953125, + "x2": 323.3666687011719, + "y2": 228.5333251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 114.88333129882812, + "y1": 211.41668701171875, + "x2": 128.11666870117188, + "y2": 228.41668701171875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 274.8999938964844, + "y1": 215.64999389648438, + "x2": 290.50001525878906, + "y2": 229.64999389648438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 323.3666687011719, + "y1": 215.64999389648438, + "x2": 329.3833312988281, + "y2": 229.64999389648438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 155.1666717529297, + "y1": 228.58331298828125, + "x2": 299.4333190917969, + "y2": 246.58331298828125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 114.88333129882812, + "y1": 229.4666748046875, + "x2": 128.11666870117188, + "y2": 246.4666748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 177.71665954589844, + "y1": 246.61666870117188, + "x2": 291.6333312988281, + "y2": 264.6166687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 305.01666259765625, + "y1": 246.61666870117188, + "x2": 350.51666259765625, + "y2": 264.6166687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 114.88333129882812, + "y1": 247.51666259765625, + "x2": 128.11666870117188, + "y2": 264.51666259765625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 295.3999938964844, + "y1": 251.75, + "x2": 302.816650390625, + "y2": 265.75, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 177.71665954589844, + "y1": 264.66668701171875, + "x2": 381.34999084472656, + "y2": 282.66668701171875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 403.816650390625, + "y1": 264.66668701171875, + "x2": 432.59999084472656, + "y2": 282.66668701171875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 114.88333129882812, + "y1": 265.566650390625, + "x2": 128.11666870117188, + "y2": 282.566650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 381.3500061035156, + "y1": 269.7833251953125, + "x2": 397.6499786376953, + "y2": 283.7833251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 432.683349609375, + "y1": 269.7833251953125, + "x2": 438.70001220703125, + "y2": 283.7833251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 387.2833251953125, + "y1": 275.933349609375, + "x2": 399.433349609375, + "y2": 283.5500183105469, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 344.66668701171875, + "y1": 280.7166748046875, + "x2": 367.683349609375, + "y2": 300.7166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 177.71665954589844, + "y1": 282.7166748046875, + "x2": 331.1333465576172, + "y2": 300.7166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 386.6499938964844, + "y1": 282.7166748046875, + "x2": 580.9166564941406, + "y2": 301.7166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 114.88333129882812, + "y1": 283.6166687011719, + "x2": 128.11666870117188, + "y2": 300.6166687011719, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 358.41668701171875, + "y1": 285.20001220703125, + "x2": 373.70001220703125, + "y2": 299.20001220703125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 408.95001220703125, + "y1": 285.20001220703125, + "x2": 416.0166778564453, + "y2": 299.0500183105469, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 543.5333251953125, + "y1": 285.20001220703125, + "x2": 553.6666564941406, + "y2": 299.0500183105469, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 313.83331298828125, + "y1": 287.83331298828125, + "x2": 342.20001220703125, + "y2": 301.83331298828125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 580.933349609375, + "y1": 287.83331298828125, + "x2": 586.0333557128906, + "y2": 301.83331298828125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 367.683349609375, + "y1": 290.2833251953125, + "x2": 385.7666473388672, + "y2": 300.38331604003906, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 324.7166748046875, + "y1": 293.7166748046875, + "x2": 342.20001220703125, + "y2": 303.8166809082031, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 325.066650390625, + "y1": 299.7833251953125, + "x2": 331.7666473388672, + "y2": 313.7833251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 177.71665954589844, + "y1": 302.29998779296875, + "x2": 323.3333282470703, + "y2": 321.29998779296875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 344.933349609375, + "y1": 302.29998779296875, + "x2": 609.9833374023438, + "y2": 320.29998779296875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 114.88333129882812, + "y1": 303.183349609375, + "x2": 128.11666870117188, + "y2": 320.183349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 314.4666748046875, + "y1": 307.41668701171875, + "x2": 342.933349609375, + "y2": 321.41668701171875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 518.9000244140625, + "y1": 307.41668701171875, + "x2": 524.0500030517578, + "y2": 321.41668701171875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 610.316650390625, + "y1": 307.41668701171875, + "x2": 618.1499786376953, + "y2": 321.41668701171875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 325.433349609375, + "y1": 313.29998779296875, + "x2": 342.933349609375, + "y2": 323.3999786376953, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 177.71665954589844, + "y1": 322, + "x2": 253.31668090820312, + "y2": 341, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 114.88333129882812, + "y1": 322.8833312988281, + "x2": 128.11666870117188, + "y2": 339.8833312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 244.9666748046875, + "y1": 324.3500061035156, + "x2": 718.7999877929688, + "y2": 338.3500061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 253.683349609375, + "y1": 329.566650390625, + "x2": 265.1499938964844, + "y2": 339.6666564941406, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 177.71665954589844, + "y1": 340.04998779296875, + "x2": 391.6666564941406, + "y2": 359.04998779296875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 108.13333129882812, + "y1": 340.933349609375, + "x2": 128.11666870117188, + "y2": 357.933349609375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 200.28334045410156, + "y1": 358.08331298828125, + "x2": 378.3833312988281, + "y2": 376.08331298828125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 388.98333740234375, + "y1": 358.08331298828125, + "x2": 716.6166687011719, + "y2": 376.08331298828125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 108.13333129882812, + "y1": 358.98333740234375, + "x2": 128.11666870117188, + "y2": 375.98333740234375, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 378.3833312988281, + "y1": 363.2166748046875, + "x2": 386.78334045410156, + "y2": 377.2166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 155.1666717529297, + "y1": 380.33331298828125, + "x2": 213.96665954589844, + "y2": 398.33331298828125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 226.81666564941406, + "y1": 380.33331298828125, + "x2": 598.2666778564453, + "y2": 398.33331298828125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 108.13333129882812, + "y1": 381.3500061035156, + "x2": 128.11666870117188, + "y2": 398.3500061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 214.31666564941406, + "y1": 385.58331298828125, + "x2": 224.5, + "y2": 399.58331298828125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 106.96665954589844, + "y1": 405.8833312988281, + "x2": 170.18333435058594, + "y2": 423.8833312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 + }, + { + "x1": 170.1666717529297, + "y1": 411, + "x2": 176.18333435058594, + "y2": 425, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 14 } ] }, "content": { - "text": "The two most commonly used attention functions are additive attention [ 2], and dot-product (multi-\r plicative) attention. Dot-product attention is identical to our algorithm, except for the scaling factor\r of 1βdk\r . Additive attention computes the compatibility function using a feed-forward network with\r a single hidden layer. While the two are similar in theoretical complexity, dot-product attention is\r much faster and more space-efficient in practice, since it can be implemented using highly optimized\r matrix multiplication code.\r While for small values of dk the two mechanisms perform similarly, additive attention outperforms\r dot product attention without scaling for larger values of dk [3 ]. We suspect that for large values of\r dk, the dot products grow large in magnitude, pushing the softmax function into regions where it has\r extremely small gradients 4. To counteract this effect, we scale the dot products by 1βdk\r ." + "text": "Algorithm 1 Iterative Group Relative Policy Optimization\r Input initial policy model ππinit ; reward models ππ; task prompts D; hyperparameters π, π½, π\r 1: policy model ππ β ππinit\r 2: for iteration = 1, . . . , I do\r 3: reference model πππ π β ππ\r 4: for step = 1, . . . , M do\r 5: Sample a batch Dπ from D\r 6: Update the old policy model πππππ β ππ\r 7: Sample πΊ outputs \r {ππ }πΊ\r π=1 βΌ πππππ (Β· | π) for each question π β Dπ\r 8: Compute rewards {ππ }πΊ\r π=1 for each sampled output ππ by running ππ\r 9: Compute Λπ΄π,π‘ for the π‘-th token of ππ through group relative advantage estimation.\r 10: for GRPO iteration = 1, . . . , π do\r 11: Update the policy model ππ by maximizing the GRPO objective (Equation 21)\r 12: Update ππ through continuous training using a replay mechanism.\r Output ππ" } }], - 10: [{ - "id": "highlight_1755504882387", + 9: [{ + "id": "highlight_1755510779442", "position": { "boundingRect": { - "x1": 177.3000030517578, - "y1": 1067.1666870117188, - "x2": 830.683349609375, - "y2": 1142.1333770751953, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 + "x1": 106.96665954589844, + "y1": 691.9500122070312, + "x2": 791.3999786376953, + "y2": 732.4166870117188, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 19 }, "rects": [ { - "x1": 177.89999389648438, - "y1": 1067.1666870117188, - "x2": 546.6833190917969, - "y2": 1087.2666931152344, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 576.1666870117188, - "y1": 1067.1666870117188, - "x2": 830.683349609375, - "y2": 1087.2666931152344, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 546.5333251953125, - "y1": 1073.7500305175781, - "x2": 575.1999816894531, - "y2": 1088.2666931152344, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.3000030517578, - "y1": 1085.0333557128906, - "x2": 828.9999847412109, - "y2": 1105.1333618164062, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 1103.0166931152344, - "x2": 347.1999816894531, - "y2": 1123.116683959961, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 359.25, - "y1": 1103.0166931152344, - "x2": 395.7833557128906, - "y2": 1123.116683959961, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 407.6333312988281, - "y1": 1103.0166931152344, - "x2": 827.6000061035156, - "y2": 1123.116683959961, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 322.25, - "y1": 1109.7500305175781, - "x2": 327.46665954589844, - "y2": 1124.2666931152344, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 347.04998779296875, - "y1": 1109.7500305175781, - "x2": 356.6999969482422, - "y2": 1124.2666931152344, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 395.73333740234375, - "y1": 1109.7500305175781, - "x2": 404.88331604003906, - "y2": 1124.2666931152344, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 177.89999389648438, - "y1": 1121.0166931152344, - "x2": 829.6333465576172, - "y2": 1141.116683959961, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 - }, - { - "x1": 736.8333129882812, - "y1": 1127.61669921875, - "x2": 746.1000061035156, - "y2": 1142.1333770751953, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 4 + "x1": 106.96665954589844, + "y1": 691.9500122070312, + "x2": 791.3999786376953, + "y2": 711.9500122070312, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 19 + }, + { + "x1": 106.96665954589844, + "y1": 712.4166870117188, + "x2": 429.93333435058594, + "y2": 732.4166870117188, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 19 } ] }, "content": { - "text": "Instead of performing a single attention function with dmodel-dimensional keys, values and queries,\r we found it beneficial to linearly project the queries, keys and values h times with different, learned\r linear projections to dk, dk and dv dimensions, respectively. On each of these projected versions of\r queries, keys and values we then perform the attention function in parallel, yielding dv -dimensional" + "text": "Figure 5 | Performance of the DeepSeekMath-Instruct 1.3B model, which was further trained\r using various methods, on two benchmarks." } }, { - "id": "highlight_1755504918851", + "id": "highlight_1755510798001", "position": { "boundingRect": { - "x1": 177.89999389648438, - "y1": 120.20001220703125, - "x2": 826.1166687011719, - "y2": 158.28335571289062, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 5 + "x1": 106.51666259765625, + "y1": 737.4499969482422, + "x2": 794.6166839599609, + "y2": 1074.4500122070312, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 }, "rects": [ { - "x1": 177.89999389648438, - "y1": 120.20001220703125, - "x2": 826.1166687011719, - "y2": 140.30001831054688, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 5 - }, - { - "x1": 177.89999389648438, - "y1": 138.183349609375, - "x2": 312.8666687011719, - "y2": 158.28335571289062, - "width": 1008, - "height": 1304.4705882352941, - "pageNumber": 5 + "x1": 106.96665954589844, + "y1": 737.4499969482422, + "x2": 416.2833251953125, + "y2": 757.4499969482422, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 428.8999938964844, + "y1": 737.4499969482422, + "x2": 791.7166442871094, + "y2": 757.4499969482422, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 757.9166717529297, + "x2": 794.6166839599609, + "y2": 777.9166717529297, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 778.3833312988281, + "x2": 791.5833587646484, + "y2": 798.3833312988281, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 798.8500061035156, + "x2": 791.4499969482422, + "y2": 818.8500061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 819.3000030517578, + "x2": 791.4833221435547, + "y2": 839.3000030517578, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 839.6499938964844, + "x2": 791.4666595458984, + "y2": 859.6499938964844, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 860.1000061035156, + "x2": 791.7499847412109, + "y2": 880.1000061035156, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 880.566650390625, + "x2": 794.3333587646484, + "y2": 900.566650390625, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 901.0333251953125, + "x2": 792.1333465576172, + "y2": 921.0333251953125, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 921.5, + "x2": 644.6333465576172, + "y2": 941.5, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 132.60000610351562, + "y1": 952.25, + "x2": 792.1499938964844, + "y2": 972.25, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 972.7166748046875, + "x2": 791.8500213623047, + "y2": 992.7166748046875, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.51666259765625, + "y1": 993.0499877929688, + "x2": 791.566650390625, + "y2": 1013.0499877929688, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 1013.5166625976562, + "x2": 791.2666473388672, + "y2": 1033.5166625976562, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 1033.9833374023438, + "x2": 792.0833587646484, + "y2": 1053.9833374023438, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 + }, + { + "x1": 106.96665954589844, + "y1": 1054.4500122070312, + "x2": 526.6666717529297, + "y2": 1074.4500122070312, + "width": 899, + "height": 1271.4423393518302, + "pageNumber": 20 } ] }, "content": { - "text": "output values. These are concatenated and once again projected, resulting in the final values, as\r depicted in Figure 2." + "text": "Observation about Gradient Coefficient The algorithm processes the reward signal to the\r gradient coefficient to update the model parameter. We divide the reward function as βRuleβ\r and βModelβ in our experiments. Rule refers to judging the quality of a response based on\r the correctness of the answer, and Model denotes that we train a reward model to score each\r response. The training data of the reward model is based on the rule judgment. Equations 10\r and 21 highlight a key difference between GRPO and Online RFT: GRPO uniquely adjusts its\r gradient coefficient based on the reward value provided by the reward model. This allows for\r differential reinforcement and penalization of responses according to their varying magnitudes.\r In contrast, Online RFT lacks this feature; it does not penalize incorrect responses and uniformly\r reinforces all responses with correct answers at the same level of intensity.\r As demonstrated in Figure 5, GRPO surpasses online RFT, thereby highlighting the efficiency\r of altering positive and negative gradient coefficients. In addition, GRPO+PS shows superior\r performance compared to GRPO+OS, indicating the benefits of using fine-grained, step-aware\r gradient coefficients. Furthermore, we explore the iterative RL, in our experiments, we conduct\r two rounds of iteration. As shown in Figure 6, we notice that the iterative RL significantly\r improves the performance, especially at the first iteration." } -}], +}], }; // Temporarily inject test highlights into documentData for testing @@ -1577,7 +3552,8 @@ function DocumentProcessor() { documentData={documentDataWithHighlights} onPageChange={setPdfNavigation} preloadedHighlights={documentDataWithHighlights?.preloadedHighlights || null} - currentChatId={currentChunkIndex} + currentChunkIndex={currentChunkIndex} + onDocumentReady={setDocumentControls} /> @@ -1610,24 +3586,28 @@ function DocumentProcessor() { /> - {/* Chunk Panel */} + {/* Right Panel Content - Welcome Screen or Chunk Panel */}