3v324v23 commited on
Commit
5cf9bb7
·
1 Parent(s): 66ad25b

Update codebase with latest fixes and improvements

Browse files
mp1/.env DELETED
@@ -1,15 +0,0 @@
1
- # NVIDIA NIM Multi-model Keys
2
- NVIDIA_API_KEY_NANO=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
3
- NVIDIA_API_KEY_SUPER=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
4
- NVIDIA_API_KEY_VL=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
5
- NVIDIA_API_KEY_EMBED=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
6
- NVIDIA_API_KEY_RERANK=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
7
- NVIDIA_API_KEY_ULTRA=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
8
-
9
- # Global fallback (defaults to Super if specific not found)
10
- NVIDIA_API_KEY=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
11
-
12
- # Keep Groq as fallback
13
- GROQ_API_KEY=gsk_xxxxxxxxxxxxxxxxxxxx
14
- MISTRAL_API_KEY=...
15
- GOOGLE_API_KEY=AIzaSyDp-mzHD9Nyk1T3xCPRyrc1RCiVLZzkNy8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
mp1/corpus/.extraction_cache.json CHANGED
@@ -1878,5 +1878,1013 @@
1878
  "chunk_summary": "The text discusses two main challenges in the integration of neural and symbolic AI systems: Interoperability & Integration (difficulty in integrating with real-world data and software ecosystems) and Governance & Accountability (liability and regulatory challenges with emergent behaviors). Proposed solutions include developing bridging standards/APIs for the former and paradigm-specific regulatory models for the latter."
1879
  },
1880
  "cached_at": "2026-03-30T12:54:43.367750+00:00"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1881
  }
1882
  }
 
1878
  "chunk_summary": "The text discusses two main challenges in the integration of neural and symbolic AI systems: Interoperability & Integration (difficulty in integrating with real-world data and software ecosystems) and Governance & Accountability (liability and regulatory challenges with emergent behaviors). Proposed solutions include developing bridging standards/APIs for the former and paradigm-specific regulatory models for the latter."
1879
  },
1880
  "cached_at": "2026-03-30T12:54:43.367750+00:00"
1881
+ },
1882
+ "11dc4fc7fef5e33d8f4a9919ff21db44c12efb5f1e7335e92503dae98125ce6d": {
1883
+ "stage": "extract",
1884
+ "doc_id": "m3",
1885
+ "chunk_id": "C100",
1886
+ "chunk_hash": "453f2499fc3f008aee11b271057c320105d2bb3767d6027ee7eab88bf68094ba",
1887
+ "chunk_type": "figure",
1888
+ "mode_used": "MODE_VISION",
1889
+ "model_id": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1",
1890
+ "extracted": {
1891
+ "claims": [
1892
+ {
1893
+ "claim_id": "m3-C100-CL1",
1894
+ "text": "The paper aims to explore how sub-quadratic models can balance performance with inference speed.",
1895
+ "importance": "high",
1896
+ "support_type": "implicit",
1897
+ "numbers": [
1898
+ "440M",
1899
+ "2",
1900
+ "16",
1901
+ "32",
1902
+ "64",
1903
+ "128"
1904
+ ],
1905
+ "entities": [
1906
+ "validation perplexity",
1907
+ "model performance",
1908
+ "Mamba models",
1909
+ "Fineweb-Edu dataset",
1910
+ "Chinchilla optimal tokens",
1911
+ "𝑑state"
1912
+ ],
1913
+ "dependencies": [],
1914
+ "evidence": {
1915
+ "doc_id": "m3",
1916
+ "chunk_id": "C100",
1917
+ "where": "chunk C100",
1918
+ "quote": "By plotting the validation perplexity (a proxy for model performance) as a function of 𝑑state, we aim to formulate a holistic picture about how sub-quadratic models can trade off performance with infe"
1919
+ }
1920
+ },
1921
+ {
1922
+ "claim_id": "m3-C100-CL2",
1923
+ "text": "The paper trains 440M parameter models on the Fineweb-Edu dataset with different 𝑑state values (16, 32, 64, 128) and observes an inverse correlation between validation loss and 𝑑state.",
1924
+ "importance": "medium",
1925
+ "support_type": "explicit",
1926
+ "numbers": [
1927
+ "440M",
1928
+ "16",
1929
+ "32",
1930
+ "64",
1931
+ "128"
1932
+ ],
1933
+ "entities": [
1934
+ "validation loss",
1935
+ "𝑑state"
1936
+ ],
1937
+ "dependencies": [],
1938
+ "evidence": {
1939
+ "doc_id": "m3",
1940
+ "chunk_id": "C100",
1941
+ "where": "chunk C100",
1942
+ "quote": "For each data point, we train a 440M parameter model to 2×Chinchilla optimal tokens on the Fineweb-Edu dataset, where the model is configured with a 𝑑state of {16, 32, 64, 128}. As expected, we observ"
1943
+ }
1944
+ }
1945
+ ],
1946
+ "definitions": [
1947
+ {
1948
+ "term": "validation perplexity",
1949
+ "definition": "A proxy for model performance used in the paper."
1950
+ },
1951
+ {
1952
+ "term": "Mamba models",
1953
+ "definition": "A specific type of model used in the paper to explore the trade-off between performance and inference speed."
1954
+ },
1955
+ {
1956
+ "term": "Fineweb-Edu dataset",
1957
+ "definition": "The dataset used in the paper to train and evaluate the models."
1958
+ },
1959
+ {
1960
+ "term": "Chinchilla optimal tokens",
1961
+ "definition": "A specific type of token used in the paper to configure the models."
1962
+ },
1963
+ {
1964
+ "term": "𝑑state",
1965
+ "definition": "A parameter used in the paper to configure the models and explore the trade-off between performance and inference speed."
1966
+ }
1967
+ ],
1968
+ "math": [
1969
+ {
1970
+ "expression": "2×Chinchilla optimal tokens",
1971
+ "interpretation": "A specific configuration used in the paper to train the models.",
1972
+ "evidence": null
1973
+ }
1974
+ ],
1975
+ "table": [],
1976
+ "figure": [],
1977
+ "code": [],
1978
+ "chunk_summary": "The paper explores how sub-quadratic models can balance performance with inference speed by training 440M parameter models on the Fineweb-Edu dataset with different 𝑑state values and observing an inverse correlation between validation loss and 𝑑state."
1979
+ },
1980
+ "cached_at": "2026-03-31T06:11:11.513711+00:00"
1981
+ },
1982
+ "453f2499fc3f008aee11b271057c320105d2bb3767d6027ee7eab88bf68094ba": {
1983
+ "stage": "extract",
1984
+ "doc_id": "m3",
1985
+ "chunk_id": "C100",
1986
+ "chunk_hash": "453f2499fc3f008aee11b271057c320105d2bb3767d6027ee7eab88bf68094ba",
1987
+ "chunk_type": "figure",
1988
+ "mode_used": "MODE_VISION",
1989
+ "model_id": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1",
1990
+ "extracted": {
1991
+ "claims": [
1992
+ {
1993
+ "claim_id": "m3-C100-CL1",
1994
+ "text": "The paper aims to explore how sub-quadratic models can balance performance with inference speed.",
1995
+ "importance": "high",
1996
+ "support_type": "implicit",
1997
+ "numbers": [
1998
+ "440M",
1999
+ "2",
2000
+ "16",
2001
+ "32",
2002
+ "64",
2003
+ "128"
2004
+ ],
2005
+ "entities": [
2006
+ "validation perplexity",
2007
+ "model performance",
2008
+ "Mamba models",
2009
+ "Fineweb-Edu dataset",
2010
+ "Chinchilla optimal tokens",
2011
+ "𝑑state"
2012
+ ],
2013
+ "dependencies": [],
2014
+ "evidence": {
2015
+ "doc_id": "m3",
2016
+ "chunk_id": "C100",
2017
+ "where": "chunk C100",
2018
+ "quote": "By plotting the validation perplexity (a proxy for model performance) as a function of 𝑑state, we aim to formulate a holistic picture about how sub-quadratic models can trade off performance with infe"
2019
+ }
2020
+ },
2021
+ {
2022
+ "claim_id": "m3-C100-CL2",
2023
+ "text": "The paper trains 440M parameter models on the Fineweb-Edu dataset with different 𝑑state values (16, 32, 64, 128) and observes an inverse correlation between validation loss and 𝑑state.",
2024
+ "importance": "medium",
2025
+ "support_type": "explicit",
2026
+ "numbers": [
2027
+ "440M",
2028
+ "16",
2029
+ "32",
2030
+ "64",
2031
+ "128"
2032
+ ],
2033
+ "entities": [
2034
+ "validation loss",
2035
+ "𝑑state"
2036
+ ],
2037
+ "dependencies": [],
2038
+ "evidence": {
2039
+ "doc_id": "m3",
2040
+ "chunk_id": "C100",
2041
+ "where": "chunk C100",
2042
+ "quote": "For each data point, we train a 440M parameter model to 2×Chinchilla optimal tokens on the Fineweb-Edu dataset, where the model is configured with a 𝑑state of {16, 32, 64, 128}. As expected, we observ"
2043
+ }
2044
+ }
2045
+ ],
2046
+ "definitions": [
2047
+ {
2048
+ "term": "validation perplexity",
2049
+ "definition": "A proxy for model performance used in the paper."
2050
+ },
2051
+ {
2052
+ "term": "Mamba models",
2053
+ "definition": "A specific type of model used in the paper to explore the trade-off between performance and inference speed."
2054
+ },
2055
+ {
2056
+ "term": "Fineweb-Edu dataset",
2057
+ "definition": "The dataset used in the paper to train and evaluate the models."
2058
+ },
2059
+ {
2060
+ "term": "Chinchilla optimal tokens",
2061
+ "definition": "A specific type of token used in the paper to configure the models."
2062
+ },
2063
+ {
2064
+ "term": "𝑑state",
2065
+ "definition": "A parameter used in the paper to configure the models and explore the trade-off between performance and inference speed."
2066
+ }
2067
+ ],
2068
+ "math": [
2069
+ {
2070
+ "expression": "2×Chinchilla optimal tokens",
2071
+ "interpretation": "A specific configuration used in the paper to train the models.",
2072
+ "evidence": null
2073
+ }
2074
+ ],
2075
+ "table": [],
2076
+ "figure": [],
2077
+ "code": [],
2078
+ "chunk_summary": "The paper explores how sub-quadratic models can balance performance with inference speed by training 440M parameter models on the Fineweb-Edu dataset with different 𝑑state values and observing an inverse correlation between validation loss and 𝑑state."
2079
+ },
2080
+ "cached_at": "2026-03-31T06:11:11.513711+00:00"
2081
+ },
2082
+ "8c8d3a749758cf0155178011770f7a5dad2cc169a562482015bccc0564fc0e7d": {
2083
+ "stage": "extract",
2084
+ "doc_id": "m3",
2085
+ "chunk_id": "C35",
2086
+ "chunk_hash": "dc835d283dae7b2755b59d73031fa23ef2af880a4370612763a4ae6c290666e8",
2087
+ "chunk_type": "table",
2088
+ "mode_used": "MODE_REASONING",
2089
+ "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
2090
+ "extracted": {
2091
+ "claims": [
2092
+ {
2093
+ "claim_id": "m3-C35-CL1",
2094
+ "text": "This paper discusses the difference in representation of parameter B_t between the current work and Mamba-2.",
2095
+ "importance": "high",
2096
+ "support_type": "explicit",
2097
+ "numbers": [],
2098
+ "entities": [
2099
+ "B_t",
2100
+ "Mamba-2",
2101
+ "this paper"
2102
+ ],
2103
+ "dependencies": [],
2104
+ "evidence": {
2105
+ "doc_id": "m3",
2106
+ "chunk_id": "C35",
2107
+ "where": "chunk C35",
2108
+ "quote": "B_t represents the continuous parameter, whereas in Mamba-2, B_t represents the discretized parameter"
2109
+ }
2110
+ },
2111
+ {
2112
+ "claim_id": "m3-C35-CL2",
2113
+ "text": "The paper extends the prior Mamba discretization theory by introducing an exponential-trapezoidal method.",
2114
+ "importance": "high",
2115
+ "support_type": "explicit",
2116
+ "numbers": [],
2117
+ "entities": [
2118
+ "Mamba discretization",
2119
+ "exponential-trapezoidal method"
2120
+ ],
2121
+ "dependencies": [
2122
+ "m3-C35-CL1"
2123
+ ],
2124
+ "evidence": {
2125
+ "doc_id": "m3",
2126
+ "chunk_id": "C35",
2127
+ "where": "chunk C35",
2128
+ "quote": "Our theory formalizes the prior Mamba discretization as exponential-Euler and extends it"
2129
+ }
2130
+ },
2131
+ {
2132
+ "claim_id": "m3-C35-CL3",
2133
+ "text": "Mamba-1's reported ZOH discretization differs from its actual implementation (as per a GitHub issue).",
2134
+ "importance": "medium",
2135
+ "support_type": "explicit",
2136
+ "numbers": [
2137
+ "#129"
2138
+ ],
2139
+ "entities": [
2140
+ "Mamba-1",
2141
+ "ZOH discretization",
2142
+ "GitHub"
2143
+ ],
2144
+ "dependencies": [],
2145
+ "evidence": {
2146
+ "doc_id": "m3",
2147
+ "chunk_id": "C35",
2148
+ "where": "chunk C35",
2149
+ "quote": "While the Mamba-1 paper reports ZOH discretization, the implementation follows https://github.com/state-spaces/mamba/issues/129"
2150
+ }
2151
+ },
2152
+ {
2153
+ "claim_id": "m3-C35-CL4",
2154
+ "text": "Table 1 lists canonical linear-time invariant discretizations and custom linear-time varying discretizations from the exponential-adjusted framework.",
2155
+ "importance": "low",
2156
+ "support_type": "explicit",
2157
+ "numbers": [
2158
+ "1"
2159
+ ],
2160
+ "entities": [
2161
+ "Table 1"
2162
+ ],
2163
+ "dependencies": [],
2164
+ "evidence": {
2165
+ "doc_id": "m3",
2166
+ "chunk_id": "C35",
2167
+ "where": "chunk C35",
2168
+ "quote": "Table 1: Table of canonical linear-time invariant discretizations (top) and custom linear-time varying discretizations"
2169
+ }
2170
+ }
2171
+ ],
2172
+ "definitions": [
2173
+ {
2174
+ "term": "B_t",
2175
+ "definition": "A parameter represented differently as continuous in this paper and discretized in Mamba-2 (equivalent to γ_tB_t)"
2176
+ },
2177
+ {
2178
+ "term": "Exponential-Euler (discretization)",
2179
+ "definition": "The formalized representation of the prior Mamba discretization in this paper"
2180
+ },
2181
+ {
2182
+ "term": "Exponential-Trapezoidal Method",
2183
+ "definition": "A new, more expressive discretization method introduced in this paper as an extension to Mamba"
2184
+ }
2185
+ ],
2186
+ "math": [
2187
+ {
2188
+ "expression": "γ_tB_t",
2189
+ "interpretation": "The discretized form of parameter B_t in Mamba-2, where γ_t is a scaling factor",
2190
+ "evidence": null
2191
+ }
2192
+ ],
2193
+ "table": [],
2194
+ "figure": [],
2195
+ "code": [],
2196
+ "chunk_summary": "This paper contrasts its continuous parameter representation with Mamba-2's discretized approach, extends Mamba's discretization theory with a new method, and references a discrepancy in Mamba-1's implementation. It also presents a table on various discretizations."
2197
+ },
2198
+ "cached_at": "2026-03-31T06:11:33.529241+00:00"
2199
+ },
2200
+ "dc835d283dae7b2755b59d73031fa23ef2af880a4370612763a4ae6c290666e8": {
2201
+ "stage": "extract",
2202
+ "doc_id": "m3",
2203
+ "chunk_id": "C35",
2204
+ "chunk_hash": "dc835d283dae7b2755b59d73031fa23ef2af880a4370612763a4ae6c290666e8",
2205
+ "chunk_type": "table",
2206
+ "mode_used": "MODE_REASONING",
2207
+ "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
2208
+ "extracted": {
2209
+ "claims": [
2210
+ {
2211
+ "claim_id": "m3-C35-CL1",
2212
+ "text": "This paper discusses the difference in representation of parameter B_t between the current work and Mamba-2.",
2213
+ "importance": "high",
2214
+ "support_type": "explicit",
2215
+ "numbers": [],
2216
+ "entities": [
2217
+ "B_t",
2218
+ "Mamba-2",
2219
+ "this paper"
2220
+ ],
2221
+ "dependencies": [],
2222
+ "evidence": {
2223
+ "doc_id": "m3",
2224
+ "chunk_id": "C35",
2225
+ "where": "chunk C35",
2226
+ "quote": "B_t represents the continuous parameter, whereas in Mamba-2, B_t represents the discretized parameter"
2227
+ }
2228
+ },
2229
+ {
2230
+ "claim_id": "m3-C35-CL2",
2231
+ "text": "The paper extends the prior Mamba discretization theory by introducing an exponential-trapezoidal method.",
2232
+ "importance": "high",
2233
+ "support_type": "explicit",
2234
+ "numbers": [],
2235
+ "entities": [
2236
+ "Mamba discretization",
2237
+ "exponential-trapezoidal method"
2238
+ ],
2239
+ "dependencies": [
2240
+ "m3-C35-CL1"
2241
+ ],
2242
+ "evidence": {
2243
+ "doc_id": "m3",
2244
+ "chunk_id": "C35",
2245
+ "where": "chunk C35",
2246
+ "quote": "Our theory formalizes the prior Mamba discretization as exponential-Euler and extends it"
2247
+ }
2248
+ },
2249
+ {
2250
+ "claim_id": "m3-C35-CL3",
2251
+ "text": "Mamba-1's reported ZOH discretization differs from its actual implementation (as per a GitHub issue).",
2252
+ "importance": "medium",
2253
+ "support_type": "explicit",
2254
+ "numbers": [
2255
+ "#129"
2256
+ ],
2257
+ "entities": [
2258
+ "Mamba-1",
2259
+ "ZOH discretization",
2260
+ "GitHub"
2261
+ ],
2262
+ "dependencies": [],
2263
+ "evidence": {
2264
+ "doc_id": "m3",
2265
+ "chunk_id": "C35",
2266
+ "where": "chunk C35",
2267
+ "quote": "While the Mamba-1 paper reports ZOH discretization, the implementation follows https://github.com/state-spaces/mamba/issues/129"
2268
+ }
2269
+ },
2270
+ {
2271
+ "claim_id": "m3-C35-CL4",
2272
+ "text": "Table 1 lists canonical linear-time invariant discretizations and custom linear-time varying discretizations from the exponential-adjusted framework.",
2273
+ "importance": "low",
2274
+ "support_type": "explicit",
2275
+ "numbers": [
2276
+ "1"
2277
+ ],
2278
+ "entities": [
2279
+ "Table 1"
2280
+ ],
2281
+ "dependencies": [],
2282
+ "evidence": {
2283
+ "doc_id": "m3",
2284
+ "chunk_id": "C35",
2285
+ "where": "chunk C35",
2286
+ "quote": "Table 1: Table of canonical linear-time invariant discretizations (top) and custom linear-time varying discretizations"
2287
+ }
2288
+ }
2289
+ ],
2290
+ "definitions": [
2291
+ {
2292
+ "term": "B_t",
2293
+ "definition": "A parameter represented differently as continuous in this paper and discretized in Mamba-2 (equivalent to γ_tB_t)"
2294
+ },
2295
+ {
2296
+ "term": "Exponential-Euler (discretization)",
2297
+ "definition": "The formalized representation of the prior Mamba discretization in this paper"
2298
+ },
2299
+ {
2300
+ "term": "Exponential-Trapezoidal Method",
2301
+ "definition": "A new, more expressive discretization method introduced in this paper as an extension to Mamba"
2302
+ }
2303
+ ],
2304
+ "math": [
2305
+ {
2306
+ "expression": "γ_tB_t",
2307
+ "interpretation": "The discretized form of parameter B_t in Mamba-2, where γ_t is a scaling factor",
2308
+ "evidence": null
2309
+ }
2310
+ ],
2311
+ "table": [],
2312
+ "figure": [],
2313
+ "code": [],
2314
+ "chunk_summary": "This paper contrasts its continuous parameter representation with Mamba-2's discretized approach, extends Mamba's discretization theory with a new method, and references a discrepancy in Mamba-1's implementation. It also presents a table on various discretizations."
2315
+ },
2316
+ "cached_at": "2026-03-31T06:11:33.529241+00:00"
2317
+ },
2318
+ "5640580cd928bf6ac61d7eb47ebdee7c1c7cde531b46d5bb55c3af4610a556f1": {
2319
+ "stage": "extract",
2320
+ "doc_id": "m3",
2321
+ "chunk_id": "C64",
2322
+ "chunk_hash": "7bb33741c71279b8237c5c73251f09ced4fd43ad5963e8363116a210d46bf467",
2323
+ "chunk_type": "table",
2324
+ "mode_used": "MODE_REASONING",
2325
+ "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
2326
+ "extracted": {
2327
+ "claims": [
2328
+ {
2329
+ "claim_id": "m3-C64-CL1",
2330
+ "text": "The paper discusses the efficiency of Mamba and compares its arithmetic intensity to NVIDIA H100-SXM5's bfloat16 matmul.",
2331
+ "importance": "high",
2332
+ "support_type": "explicit",
2333
+ "numbers": [
2334
+ "2.5",
2335
+ "295"
2336
+ ],
2337
+ "entities": [
2338
+ "Mamba",
2339
+ "NVIDIA H100-SXM5"
2340
+ ],
2341
+ "dependencies": [],
2342
+ "evidence": {
2343
+ "doc_id": "m3",
2344
+ "chunk_id": "C64",
2345
+ "where": "chunk C64",
2346
+ "quote": "More concretely, the arithmetic intensity for a single generation in Mamba is around 2.5ops per byte (Table 2a), while the arithmetic intensity for bfloat16 matmul is about 295ops per byte for NVIDIA "
2347
+ }
2348
+ },
2349
+ {
2350
+ "claim_id": "m3-C64-CL2",
2351
+ "text": "SSM decoding in Mamba falls short of a compute-bound regime, with unclear parameter adjustments for mitigation.",
2352
+ "importance": "high",
2353
+ "support_type": "explicit",
2354
+ "numbers": [],
2355
+ "entities": [
2356
+ "SSM decoding",
2357
+ "Mamba"
2358
+ ],
2359
+ "dependencies": [
2360
+ "m3-C64-CL1"
2361
+ ],
2362
+ "evidence": {
2363
+ "doc_id": "m3",
2364
+ "chunk_id": "C64",
2365
+ "where": "chunk C64",
2366
+ "quote": "Consequently, SSM decoding falls far short of a compute-bound regime, and moreover it is not clear how one can adjust the existing parameters in Mamba to mitigate the lack of hardware efficiency."
2367
+ }
2368
+ },
2369
+ {
2370
+ "claim_id": "m3-C64-CL3",
2371
+ "text": "The observation about inefficiency applies to other sub-quadratic models like causal linear attention.",
2372
+ "importance": "medium",
2373
+ "support_type": "explicit",
2374
+ "numbers": [],
2375
+ "entities": [
2376
+ "sub-quadratic models",
2377
+ "causal linear attention"
2378
+ ],
2379
+ "dependencies": [
2380
+ "m3-C64-CL2"
2381
+ ],
2382
+ "evidence": {
2383
+ "doc_id": "m3",
2384
+ "chunk_id": "C64",
2385
+ "where": "chunk C64",
2386
+ "quote": "We note that this observation applies generally to other sub-quadratic models, such as causal linear attention."
2387
+ }
2388
+ },
2389
+ {
2390
+ "claim_id": "m3-C64-CL4",
2391
+ "text": "The paper technically analyzes SSM, transitioning from SISO to MIMO, involving specific mathematical formulations.",
2392
+ "importance": "high",
2393
+ "support_type": "explicit",
2394
+ "numbers": [],
2395
+ "entities": [
2396
+ "SSM",
2397
+ "SISO",
2398
+ "MIMO"
2399
+ ],
2400
+ "dependencies": [],
2401
+ "evidence": {
2402
+ "doc_id": "m3",
2403
+ "chunk_id": "C64",
2404
+ "where": "chunk C64",
2405
+ "quote": "From SISO to MIMO. Consider a single head of a typical SSM..."
2406
+ }
2407
+ },
2408
+ {
2409
+ "claim_id": "m3-C64-CL5",
2410
+ "text": "In MIMO SSM, memory traffic is dominated by state hₜ, and computation by the outer product Bₜx⊤ₜ with FLOPs proportional to N×P.",
2411
+ "importance": "medium",
2412
+ "support_type": "explicit",
2413
+ "numbers": [],
2414
+ "entities": [
2415
+ "MIMO SSM",
2416
+ "hₜ",
2417
+ "Bₜx⊤ₜ"
2418
+ ],
2419
+ "dependencies": [
2420
+ "m3-C64-CL4"
2421
+ ],
2422
+ "evidence": {
2423
+ "doc_id": "m3",
2424
+ "chunk_id": "C64",
2425
+ "where": "chunk C64",
2426
+ "quote": "Note that the memory traffic... is dominated by the state hₜ, while the computation mainly comprises the outer product Bₜx⊤ₜ which has FLOPs proportional to N𝑃."
2427
+ }
2428
+ }
2429
+ ],
2430
+ "definitions": [
2431
+ {
2432
+ "term": "Arithmetic Intensity",
2433
+ "definition": "Ratio of computational operations (ops) to memory traffic (bytes), implicitly defined in the context."
2434
+ },
2435
+ {
2436
+ "term": "SSM",
2437
+ "definition": "Implicitly defined as a model type within the paper's context, possibly 'State Space Model' based on external knowledge."
2438
+ },
2439
+ {
2440
+ "term": "SISO/MIMO",
2441
+ "definition": "Single-Input Single-Output / Multiple-Input Multiple-Output, contextual to the model's architecture."
2442
+ }
2443
+ ],
2444
+ "math": [
2445
+ {
2446
+ "expression": "2.5ops/byte, 295ops/byte",
2447
+ "interpretation": "Arithmetic intensity measurements for Mamba and NVIDIA H100-SXM5's bfloat16 matmul, respectively.",
2448
+ "evidence": null
2449
+ },
2450
+ {
2451
+ "expression": "N×P (FLOPs proportionality)",
2452
+ "interpretation": "Proportionality of Floating Point Operations to the product of dimensions N and P in MIMO SSM computation.",
2453
+ "evidence": null
2454
+ }
2455
+ ],
2456
+ "table": [],
2457
+ "figure": [],
2458
+ "code": [],
2459
+ "chunk_summary": "This introduction discusses the inefficiency of Mamba's SSM decoding compared to NVIDIA H100-SXM5, its broader applicability to sub-quadratic models, and delves into the technical analysis of transitioning SSM from SISO to MIMO, highlighting memory and computation aspects."
2460
+ },
2461
+ "cached_at": "2026-03-31T06:12:01.423458+00:00"
2462
+ },
2463
+ "7bb33741c71279b8237c5c73251f09ced4fd43ad5963e8363116a210d46bf467": {
2464
+ "stage": "extract",
2465
+ "doc_id": "m3",
2466
+ "chunk_id": "C64",
2467
+ "chunk_hash": "7bb33741c71279b8237c5c73251f09ced4fd43ad5963e8363116a210d46bf467",
2468
+ "chunk_type": "table",
2469
+ "mode_used": "MODE_REASONING",
2470
+ "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
2471
+ "extracted": {
2472
+ "claims": [
2473
+ {
2474
+ "claim_id": "m3-C64-CL1",
2475
+ "text": "The paper discusses the efficiency of Mamba and compares its arithmetic intensity to NVIDIA H100-SXM5's bfloat16 matmul.",
2476
+ "importance": "high",
2477
+ "support_type": "explicit",
2478
+ "numbers": [
2479
+ "2.5",
2480
+ "295"
2481
+ ],
2482
+ "entities": [
2483
+ "Mamba",
2484
+ "NVIDIA H100-SXM5"
2485
+ ],
2486
+ "dependencies": [],
2487
+ "evidence": {
2488
+ "doc_id": "m3",
2489
+ "chunk_id": "C64",
2490
+ "where": "chunk C64",
2491
+ "quote": "More concretely, the arithmetic intensity for a single generation in Mamba is around 2.5ops per byte (Table 2a), while the arithmetic intensity for bfloat16 matmul is about 295ops per byte for NVIDIA "
2492
+ }
2493
+ },
2494
+ {
2495
+ "claim_id": "m3-C64-CL2",
2496
+ "text": "SSM decoding in Mamba falls short of a compute-bound regime, with unclear parameter adjustments for mitigation.",
2497
+ "importance": "high",
2498
+ "support_type": "explicit",
2499
+ "numbers": [],
2500
+ "entities": [
2501
+ "SSM decoding",
2502
+ "Mamba"
2503
+ ],
2504
+ "dependencies": [
2505
+ "m3-C64-CL1"
2506
+ ],
2507
+ "evidence": {
2508
+ "doc_id": "m3",
2509
+ "chunk_id": "C64",
2510
+ "where": "chunk C64",
2511
+ "quote": "Consequently, SSM decoding falls far short of a compute-bound regime, and moreover it is not clear how one can adjust the existing parameters in Mamba to mitigate the lack of hardware efficiency."
2512
+ }
2513
+ },
2514
+ {
2515
+ "claim_id": "m3-C64-CL3",
2516
+ "text": "The observation about inefficiency applies to other sub-quadratic models like causal linear attention.",
2517
+ "importance": "medium",
2518
+ "support_type": "explicit",
2519
+ "numbers": [],
2520
+ "entities": [
2521
+ "sub-quadratic models",
2522
+ "causal linear attention"
2523
+ ],
2524
+ "dependencies": [
2525
+ "m3-C64-CL2"
2526
+ ],
2527
+ "evidence": {
2528
+ "doc_id": "m3",
2529
+ "chunk_id": "C64",
2530
+ "where": "chunk C64",
2531
+ "quote": "We note that this observation applies generally to other sub-quadratic models, such as causal linear attention."
2532
+ }
2533
+ },
2534
+ {
2535
+ "claim_id": "m3-C64-CL4",
2536
+ "text": "The paper technically analyzes SSM, transitioning from SISO to MIMO, involving specific mathematical formulations.",
2537
+ "importance": "high",
2538
+ "support_type": "explicit",
2539
+ "numbers": [],
2540
+ "entities": [
2541
+ "SSM",
2542
+ "SISO",
2543
+ "MIMO"
2544
+ ],
2545
+ "dependencies": [],
2546
+ "evidence": {
2547
+ "doc_id": "m3",
2548
+ "chunk_id": "C64",
2549
+ "where": "chunk C64",
2550
+ "quote": "From SISO to MIMO. Consider a single head of a typical SSM..."
2551
+ }
2552
+ },
2553
+ {
2554
+ "claim_id": "m3-C64-CL5",
2555
+ "text": "In MIMO SSM, memory traffic is dominated by state hₜ, and computation by the outer product Bₜx⊤ₜ with FLOPs proportional to N×P.",
2556
+ "importance": "medium",
2557
+ "support_type": "explicit",
2558
+ "numbers": [],
2559
+ "entities": [
2560
+ "MIMO SSM",
2561
+ "hₜ",
2562
+ "Bₜx⊤ₜ"
2563
+ ],
2564
+ "dependencies": [
2565
+ "m3-C64-CL4"
2566
+ ],
2567
+ "evidence": {
2568
+ "doc_id": "m3",
2569
+ "chunk_id": "C64",
2570
+ "where": "chunk C64",
2571
+ "quote": "Note that the memory traffic... is dominated by the state hₜ, while the computation mainly comprises the outer product Bₜx⊤ₜ which has FLOPs proportional to N𝑃."
2572
+ }
2573
+ }
2574
+ ],
2575
+ "definitions": [
2576
+ {
2577
+ "term": "Arithmetic Intensity",
2578
+ "definition": "Ratio of computational operations (ops) to memory traffic (bytes), implicitly defined in the context."
2579
+ },
2580
+ {
2581
+ "term": "SSM",
2582
+ "definition": "Implicitly defined as a model type within the paper's context, possibly 'State Space Model' based on external knowledge."
2583
+ },
2584
+ {
2585
+ "term": "SISO/MIMO",
2586
+ "definition": "Single-Input Single-Output / Multiple-Input Multiple-Output, contextual to the model's architecture."
2587
+ }
2588
+ ],
2589
+ "math": [
2590
+ {
2591
+ "expression": "2.5ops/byte, 295ops/byte",
2592
+ "interpretation": "Arithmetic intensity measurements for Mamba and NVIDIA H100-SXM5's bfloat16 matmul, respectively.",
2593
+ "evidence": null
2594
+ },
2595
+ {
2596
+ "expression": "N×P (FLOPs proportionality)",
2597
+ "interpretation": "Proportionality of Floating Point Operations to the product of dimensions N and P in MIMO SSM computation.",
2598
+ "evidence": null
2599
+ }
2600
+ ],
2601
+ "table": [],
2602
+ "figure": [],
2603
+ "code": [],
2604
+ "chunk_summary": "This introduction discusses the inefficiency of Mamba's SSM decoding compared to NVIDIA H100-SXM5, its broader applicability to sub-quadratic models, and delves into the technical analysis of transitioning SSM from SISO to MIMO, highlighting memory and computation aspects."
2605
+ },
2606
+ "cached_at": "2026-03-31T06:12:01.423458+00:00"
2607
+ },
2608
+ "5b323bb2c1e955746b99089ab8edd1708edd9608e962335c1c324fd33d060dc4": {
2609
+ "stage": "extract",
2610
+ "doc_id": "m3",
2611
+ "chunk_id": "C77",
2612
+ "chunk_hash": "b60edb2fa7e49a8eb0690fdbda1edb54c65d05fe38cba79de41ec1ec6475470c",
2613
+ "chunk_type": "figure",
2614
+ "mode_used": "MODE_VISION",
2615
+ "model_id": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1",
2616
+ "extracted": {
2617
+ "claims": [
2618
+ {
2619
+ "claim_id": "m3-C77-CL1",
2620
+ "text": "The paper keeps the original SISO projection and scales each dimension of the projected output to size R with a learnable, data-independent vector.",
2621
+ "importance": "high",
2622
+ "support_type": "explicit",
2623
+ "numbers": [
2624
+ "R"
2625
+ ],
2626
+ "entities": [
2627
+ "SISO projection",
2628
+ "learnable vector"
2629
+ ],
2630
+ "dependencies": [],
2631
+ "evidence": {
2632
+ "doc_id": "m3",
2633
+ "chunk_id": "C77",
2634
+ "where": "chunk C77",
2635
+ "quote": "Instead, we keep the original SISO projection and element-wise scale each dimension of the projected output to size 𝑅with a learnable, data-independent vector, resulting in 𝐷𝑃+𝑃𝑅parameters for each he"
2636
+ }
2637
+ },
2638
+ {
2639
+ "claim_id": "m3-C77-CL2",
2640
+ "text": "The paper introduces exponential-trapezoidal discretization, data-dependent RoPE embeddings, MIMO projections, QK normalization, and learnable biases to mitigate the multiplicative increase in parameters.",
2641
+ "importance": "medium",
2642
+ "support_type": "explicit",
2643
+ "numbers": [],
2644
+ "entities": [
2645
+ "exponential-trapezoidal discretization",
2646
+ "data-dependent RoPE embeddings",
2647
+ "MIMO projections",
2648
+ "QK normalization",
2649
+ "learnable biases"
2650
+ ],
2651
+ "dependencies": [],
2652
+ "evidence": {
2653
+ "doc_id": "m3",
2654
+ "chunk_id": "C77",
2655
+ "where": "chunk C77",
2656
+ "quote": "This mitigates the multiplicative increase to a more reasonable additive parameter count increase."
2657
+ }
2658
+ },
2659
+ {
2660
+ "claim_id": "m3-C77-CL3",
2661
+ "text": "The paper parameter-matches all MIMO-variants to their SISO counterparts by reducing the MLP width.",
2662
+ "importance": "medium",
2663
+ "support_type": "explicit",
2664
+ "numbers": [],
2665
+ "entities": [
2666
+ "MIMO-variants",
2667
+ "SISO counterparts",
2668
+ "MLP width"
2669
+ ],
2670
+ "dependencies": [],
2671
+ "evidence": {
2672
+ "doc_id": "m3",
2673
+ "chunk_id": "C77",
2674
+ "where": "chunk C77",
2675
+ "quote": "Appendix C details the parameterization, and all MIMO-variants in our paper are parameter-matched to their SISO counterparts by reducing the MLP width."
2676
+ }
2677
+ }
2678
+ ],
2679
+ "definitions": [
2680
+ {
2681
+ "term": "SISO projection",
2682
+ "definition": "A projection method that processes data sequentially, one dimension at a time."
2683
+ },
2684
+ {
2685
+ "term": "RoPE embeddings",
2686
+ "definition": "A method for generating embeddings that uses relative position encoding."
2687
+ },
2688
+ {
2689
+ "term": "MIMO projection",
2690
+ "definition": "A projection method that processes multiple dimensions simultaneously."
2691
+ },
2692
+ {
2693
+ "term": "QK normalization",
2694
+ "definition": "A normalization method that uses a query-key-value attention mechanism."
2695
+ }
2696
+ ],
2697
+ "math": [
2698
+ {
2699
+ "expression": "DP + PR",
2700
+ "interpretation": "The total number of parameters for each head after scaling and adding learnable vectors.",
2701
+ "evidence": null
2702
+ }
2703
+ ],
2704
+ "table": [],
2705
+ "figure": [],
2706
+ "code": [],
2707
+ "chunk_summary": "The paper introduces several updates to the original SISO projection, including scaling, exponential-trapezoidal discretization, data-dependent RoPE embeddings, MIMO projections, QK normalization, and learnable biases. These updates aim to mitigate the multiplicative increase in parameters, and all MIMO-variants are parameter-matched to their SISO counterparts by reducing the MLP width."
2708
+ },
2709
+ "cached_at": "2026-03-31T06:12:20.641235+00:00"
2710
+ },
2711
+ "b60edb2fa7e49a8eb0690fdbda1edb54c65d05fe38cba79de41ec1ec6475470c": {
2712
+ "stage": "extract",
2713
+ "doc_id": "m3",
2714
+ "chunk_id": "C77",
2715
+ "chunk_hash": "b60edb2fa7e49a8eb0690fdbda1edb54c65d05fe38cba79de41ec1ec6475470c",
2716
+ "chunk_type": "figure",
2717
+ "mode_used": "MODE_VISION",
2718
+ "model_id": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1",
2719
+ "extracted": {
2720
+ "claims": [
2721
+ {
2722
+ "claim_id": "m3-C77-CL1",
2723
+ "text": "The paper keeps the original SISO projection and scales each dimension of the projected output to size R with a learnable, data-independent vector.",
2724
+ "importance": "high",
2725
+ "support_type": "explicit",
2726
+ "numbers": [
2727
+ "R"
2728
+ ],
2729
+ "entities": [
2730
+ "SISO projection",
2731
+ "learnable vector"
2732
+ ],
2733
+ "dependencies": [],
2734
+ "evidence": {
2735
+ "doc_id": "m3",
2736
+ "chunk_id": "C77",
2737
+ "where": "chunk C77",
2738
+ "quote": "Instead, we keep the original SISO projection and element-wise scale each dimension of the projected output to size 𝑅with a learnable, data-independent vector, resulting in 𝐷𝑃+𝑃𝑅parameters for each he"
2739
+ }
2740
+ },
2741
+ {
2742
+ "claim_id": "m3-C77-CL2",
2743
+ "text": "The paper introduces exponential-trapezoidal discretization, data-dependent RoPE embeddings, MIMO projections, QK normalization, and learnable biases to mitigate the multiplicative increase in parameters.",
2744
+ "importance": "medium",
2745
+ "support_type": "explicit",
2746
+ "numbers": [],
2747
+ "entities": [
2748
+ "exponential-trapezoidal discretization",
2749
+ "data-dependent RoPE embeddings",
2750
+ "MIMO projections",
2751
+ "QK normalization",
2752
+ "learnable biases"
2753
+ ],
2754
+ "dependencies": [],
2755
+ "evidence": {
2756
+ "doc_id": "m3",
2757
+ "chunk_id": "C77",
2758
+ "where": "chunk C77",
2759
+ "quote": "This mitigates the multiplicative increase to a more reasonable additive parameter count increase."
2760
+ }
2761
+ },
2762
+ {
2763
+ "claim_id": "m3-C77-CL3",
2764
+ "text": "The paper parameter-matches all MIMO-variants to their SISO counterparts by reducing the MLP width.",
2765
+ "importance": "medium",
2766
+ "support_type": "explicit",
2767
+ "numbers": [],
2768
+ "entities": [
2769
+ "MIMO-variants",
2770
+ "SISO counterparts",
2771
+ "MLP width"
2772
+ ],
2773
+ "dependencies": [],
2774
+ "evidence": {
2775
+ "doc_id": "m3",
2776
+ "chunk_id": "C77",
2777
+ "where": "chunk C77",
2778
+ "quote": "Appendix C details the parameterization, and all MIMO-variants in our paper are parameter-matched to their SISO counterparts by reducing the MLP width."
2779
+ }
2780
+ }
2781
+ ],
2782
+ "definitions": [
2783
+ {
2784
+ "term": "SISO projection",
2785
+ "definition": "A projection method that processes data sequentially, one dimension at a time."
2786
+ },
2787
+ {
2788
+ "term": "RoPE embeddings",
2789
+ "definition": "A method for generating embeddings that uses relative position encoding."
2790
+ },
2791
+ {
2792
+ "term": "MIMO projection",
2793
+ "definition": "A projection method that processes multiple dimensions simultaneously."
2794
+ },
2795
+ {
2796
+ "term": "QK normalization",
2797
+ "definition": "A normalization method that uses a query-key-value attention mechanism."
2798
+ }
2799
+ ],
2800
+ "math": [
2801
+ {
2802
+ "expression": "DP + PR",
2803
+ "interpretation": "The total number of parameters for each head after scaling and adding learnable vectors.",
2804
+ "evidence": null
2805
+ }
2806
+ ],
2807
+ "table": [],
2808
+ "figure": [],
2809
+ "code": [],
2810
+ "chunk_summary": "The paper introduces several updates to the original SISO projection, including scaling, exponential-trapezoidal discretization, data-dependent RoPE embeddings, MIMO projections, QK normalization, and learnable biases. These updates aim to mitigate the multiplicative increase in parameters, and all MIMO-variants are parameter-matched to their SISO counterparts by reducing the MLP width."
2811
+ },
2812
+ "cached_at": "2026-03-31T06:12:20.641235+00:00"
2813
+ },
2814
+ "a06cce7e93a52c44a5d98615a3323cbf3ac07b900073919927bce7d091f2cda8": {
2815
+ "stage": "extract",
2816
+ "doc_id": "m3",
2817
+ "chunk_id": "C99",
2818
+ "chunk_hash": "41843d338df422981417cd2ef468355284584f7999c0b9fbdb2e9d1026be1bae",
2819
+ "chunk_type": "text",
2820
+ "mode_used": "MODE_REASONING",
2821
+ "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
2822
+ "extracted": {
2823
+ "claims": [],
2824
+ "definitions": [],
2825
+ "math": [],
2826
+ "table": [],
2827
+ "figure": [],
2828
+ "code": [],
2829
+ "chunk_summary": "Here is the extracted JSON in the requested schema, prioritizing facts relevant to the user's question \"What is this paper about?\":\n\n```\n{\n \"claims\": [\n {\n \"claim_id\": \"m3-C0-CL1\",\n \"text\": \"The paper discusses the tradeoff between inference efficiency and performance in sub-quadratic models.\",\n \"importance\": \"high\",\n \"support_type\": \"explicit\",\n \"numbers\": [],\n \"entities\": [\"inference efficiency\", \"performance\", \"sub-quadratic models\"],\n \"dependencies\": []"
2830
+ },
2831
+ "cached_at": "2026-03-31T06:12:39.008804+00:00"
2832
+ },
2833
+ "41843d338df422981417cd2ef468355284584f7999c0b9fbdb2e9d1026be1bae": {
2834
+ "stage": "extract",
2835
+ "doc_id": "m3",
2836
+ "chunk_id": "C99",
2837
+ "chunk_hash": "41843d338df422981417cd2ef468355284584f7999c0b9fbdb2e9d1026be1bae",
2838
+ "chunk_type": "text",
2839
+ "mode_used": "MODE_REASONING",
2840
+ "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
2841
+ "extracted": {
2842
+ "claims": [],
2843
+ "definitions": [],
2844
+ "math": [],
2845
+ "table": [],
2846
+ "figure": [],
2847
+ "code": [],
2848
+ "chunk_summary": "Here is the extracted JSON in the requested schema, prioritizing facts relevant to the user's question \"What is this paper about?\":\n\n```\n{\n \"claims\": [\n {\n \"claim_id\": \"m3-C0-CL1\",\n \"text\": \"The paper discusses the tradeoff between inference efficiency and performance in sub-quadratic models.\",\n \"importance\": \"high\",\n \"support_type\": \"explicit\",\n \"numbers\": [],\n \"entities\": [\"inference efficiency\", \"performance\", \"sub-quadratic models\"],\n \"dependencies\": []"
2849
+ },
2850
+ "cached_at": "2026-03-31T06:12:39.008804+00:00"
2851
+ },
2852
+ "d65db15fea30f37d4ebae83e0d6535c1b7c04ae0cea7935f606036123ac7679d": {
2853
+ "stage": "extract",
2854
+ "doc_id": "m3",
2855
+ "chunk_id": "C0",
2856
+ "chunk_hash": "5fbcc7098a4bb6fc7b9bef508d86a1bc253826726c7059d55be3d82de56187ce",
2857
+ "chunk_type": "text",
2858
+ "mode_used": "MODE_REASONING",
2859
+ "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
2860
+ "extracted": {
2861
+ "claims": [],
2862
+ "definitions": [],
2863
+ "math": [],
2864
+ "table": [],
2865
+ "figure": [],
2866
+ "code": [],
2867
+ "chunk_summary": "Here is the extraction in the requested JSON format, prioritizing facts relevant to the user's question \"What is this paper about?\":\n\n```\n{\n \"claims\": [\n {\n \"claim_id\": \"m3-C0-CL1\",\n \"text\": \"The paper introduces Mamba-3, an improved sequence modeling approach based on state space principles.\",\n \"importance\": \"high\",\n \"support_type\": \"explicit\",\n \"numbers\": [],\n \"entities\": [\"Mamba-3\", \"sequence modeling\", \"state space principles\"],\n \"dependencies\": [],\n "
2868
+ },
2869
+ "cached_at": "2026-03-31T06:12:56.425042+00:00"
2870
+ },
2871
+ "5fbcc7098a4bb6fc7b9bef508d86a1bc253826726c7059d55be3d82de56187ce": {
2872
+ "stage": "extract",
2873
+ "doc_id": "m3",
2874
+ "chunk_id": "C0",
2875
+ "chunk_hash": "5fbcc7098a4bb6fc7b9bef508d86a1bc253826726c7059d55be3d82de56187ce",
2876
+ "chunk_type": "text",
2877
+ "mode_used": "MODE_REASONING",
2878
+ "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
2879
+ "extracted": {
2880
+ "claims": [],
2881
+ "definitions": [],
2882
+ "math": [],
2883
+ "table": [],
2884
+ "figure": [],
2885
+ "code": [],
2886
+ "chunk_summary": "Here is the extraction in the requested JSON format, prioritizing facts relevant to the user's question \"What is this paper about?\":\n\n```\n{\n \"claims\": [\n {\n \"claim_id\": \"m3-C0-CL1\",\n \"text\": \"The paper introduces Mamba-3, an improved sequence modeling approach based on state space principles.\",\n \"importance\": \"high\",\n \"support_type\": \"explicit\",\n \"numbers\": [],\n \"entities\": [\"Mamba-3\", \"sequence modeling\", \"state space principles\"],\n \"dependencies\": [],\n "
2887
+ },
2888
+ "cached_at": "2026-03-31T06:12:56.425042+00:00"
2889
  }
2890
  }
mp1/pluto/models.py CHANGED
@@ -10,7 +10,9 @@ import hashlib
10
  from enum import Enum
11
  from typing import Optional
12
 
13
- from pydantic import BaseModel, Field
 
 
14
 
15
 
16
  # ── Enums ──────────────────────────────────────────────────────────────────────
@@ -63,6 +65,11 @@ class Evidence(BaseModel):
63
  where: str = ""
64
  quote: str = Field(default="", max_length=200)
65
 
 
 
 
 
 
66
 
67
  # ── S0 ROUTE ───────────────────────────────────────────────────────────────────
68
 
@@ -70,6 +77,11 @@ class DocScope(BaseModel):
70
  doc_id: str
71
  reason: str
72
 
 
 
 
 
 
73
 
74
  class ChunkPlan(BaseModel):
75
  doc_id: str
@@ -80,6 +92,11 @@ class ChunkPlan(BaseModel):
80
  priority: Priority = Priority.MEDIUM
81
  task: str = ""
82
 
 
 
 
 
 
83
 
84
  class Budgets(BaseModel):
85
  max_chunks_to_read: int = 200
@@ -106,12 +123,27 @@ class Claim(BaseModel):
106
  dependencies: list[str] = Field(default_factory=list)
107
  evidence: Evidence | None = None
108
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  class MathItem(BaseModel):
111
  expression: str
112
  interpretation: str = ""
113
  evidence: Evidence | None = None
114
 
 
 
 
 
 
115
 
116
  class TableItem(BaseModel):
117
  caption: str = ""
@@ -119,12 +151,35 @@ class TableItem(BaseModel):
119
  rows: list[list[str]] = Field(default_factory=list)
120
  evidence: Evidence | None = None
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  class FigureItem(BaseModel):
124
  caption: str = ""
125
  description: str = ""
126
  evidence: Evidence | None = None
127
 
 
 
 
 
 
128
 
129
  class CodeItem(BaseModel):
130
  language: str = ""
@@ -132,6 +187,11 @@ class CodeItem(BaseModel):
132
  description: str = ""
133
  evidence: Evidence | None = None
134
 
 
 
 
 
 
135
 
136
  class ExtractedContent(BaseModel):
137
  claims: list[Claim] = Field(default_factory=list)
@@ -142,6 +202,11 @@ class ExtractedContent(BaseModel):
142
  code: list[CodeItem] = Field(default_factory=list)
143
  chunk_summary: str = ""
144
 
 
 
 
 
 
145
 
146
  class ExtractOutput(BaseModel):
147
  stage: str = "extract"
@@ -160,18 +225,38 @@ class SectionPoint(BaseModel):
160
  section: str
161
  points: list[str] = Field(default_factory=list)
162
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  class KeyClaim(BaseModel):
165
  claim: str
166
  support: ClaimStatus = ClaimStatus.SUPPORTED
167
  evidence_refs: list[Evidence] = Field(default_factory=list)
168
 
 
 
 
 
 
169
 
170
  class Synthesis(BaseModel):
171
  answer_outline: list[SectionPoint] = Field(default_factory=list)
172
  key_claims: list[KeyClaim] = Field(default_factory=list)
173
  open_gaps: list[str] = Field(default_factory=list)
174
 
 
 
 
 
 
175
 
176
  class MergeOutput(BaseModel):
177
  stage: str = "merge"
@@ -185,12 +270,22 @@ class CheckedClaim(BaseModel):
185
  status: ClaimStatus
186
  evidence: list[Evidence] = Field(default_factory=list)
187
 
 
 
 
 
 
188
 
189
  class Verification(BaseModel):
190
  checked_claims: list[CheckedClaim] = Field(default_factory=list)
191
  unsupported_claims: list[str] = Field(default_factory=list)
192
  required_followups: list[str] = Field(default_factory=list)
193
 
 
 
 
 
 
194
 
195
  class VerifyOutput(BaseModel):
196
  stage: str = "verify"
@@ -203,11 +298,21 @@ class Section(BaseModel):
203
  title: str
204
  content: str
205
 
 
 
 
 
 
206
 
207
  class FinalAnswer(BaseModel):
208
  response: str
209
  sections: list[Section] = Field(default_factory=list)
210
 
 
 
 
 
 
211
 
212
  class FinalEvidence(BaseModel):
213
  doc_id: str
@@ -216,6 +321,11 @@ class FinalEvidence(BaseModel):
216
  supports: str = ""
217
  quote: str = Field(default="", max_length=200)
218
 
 
 
 
 
 
219
 
220
  class TraceSummary(BaseModel):
221
  real_switching: bool = False
@@ -226,6 +336,16 @@ class TraceSummary(BaseModel):
226
  search_queries: list[str] = Field(default_factory=list)
227
  budget_notes: str = ""
228
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  class FinalOutput(BaseModel):
231
  final_answer: FinalAnswer = Field(default_factory=FinalAnswer)
@@ -236,6 +356,11 @@ class FinalOutput(BaseModel):
236
  next_actions: list[str] = Field(default_factory=list)
237
  bus_messages: list[dict] = Field(default_factory=list)
238
 
 
 
 
 
 
239
 
240
  # ── Helpers ────────────────────────────────────────────────────────────────────
241
 
 
10
  from enum import Enum
11
  from typing import Optional
12
 
13
+ from pydantic import BaseModel, Field, field_validator
14
+
15
+ from pluto.utils import coerce_string, coerce_string_list, ensure_list
16
 
17
 
18
  # ── Enums ──────────────────────────────────────────────────────────────────────
 
65
  where: str = ""
66
  quote: str = Field(default="", max_length=200)
67
 
68
+ @field_validator("doc_id", "chunk_id", "where", "quote", mode="before")
69
+ @classmethod
70
+ def _normalize_text_fields(cls, value):
71
+ return coerce_string(value, default="")
72
+
73
 
74
  # ── S0 ROUTE ───────────────────────────────────────────────────────────────────
75
 
 
77
  doc_id: str
78
  reason: str
79
 
80
+ @field_validator("doc_id", "reason", mode="before")
81
+ @classmethod
82
+ def _normalize_doc_scope_fields(cls, value):
83
+ return coerce_string(value, default="")
84
+
85
 
86
  class ChunkPlan(BaseModel):
87
  doc_id: str
 
92
  priority: Priority = Priority.MEDIUM
93
  task: str = ""
94
 
95
+ @field_validator("doc_id", "chunk_id", "where", "task", mode="before")
96
+ @classmethod
97
+ def _normalize_chunk_plan_text_fields(cls, value):
98
+ return coerce_string(value, default="")
99
+
100
 
101
  class Budgets(BaseModel):
102
  max_chunks_to_read: int = 200
 
123
  dependencies: list[str] = Field(default_factory=list)
124
  evidence: Evidence | None = None
125
 
126
+ @field_validator("claim_id", "text", mode="before")
127
+ @classmethod
128
+ def _normalize_claim_text_fields(cls, value):
129
+ return coerce_string(value, default="")
130
+
131
+ @field_validator("numbers", "entities", "dependencies", mode="before")
132
+ @classmethod
133
+ def _normalize_claim_lists(cls, value):
134
+ return coerce_string_list(value)
135
+
136
 
137
  class MathItem(BaseModel):
138
  expression: str
139
  interpretation: str = ""
140
  evidence: Evidence | None = None
141
 
142
+ @field_validator("expression", "interpretation", mode="before")
143
+ @classmethod
144
+ def _normalize_math_fields(cls, value):
145
+ return coerce_string(value, default="")
146
+
147
 
148
  class TableItem(BaseModel):
149
  caption: str = ""
 
151
  rows: list[list[str]] = Field(default_factory=list)
152
  evidence: Evidence | None = None
153
 
154
+ @field_validator("caption", mode="before")
155
+ @classmethod
156
+ def _normalize_table_caption(cls, value):
157
+ return coerce_string(value, default="")
158
+
159
+ @field_validator("headers", mode="before")
160
+ @classmethod
161
+ def _normalize_table_headers(cls, value):
162
+ return coerce_string_list(value)
163
+
164
+ @field_validator("rows", mode="before")
165
+ @classmethod
166
+ def _normalize_table_rows(cls, value):
167
+ rows = []
168
+ for row in ensure_list(value):
169
+ rows.append(coerce_string_list(row))
170
+ return [row for row in rows if row]
171
+
172
 
173
  class FigureItem(BaseModel):
174
  caption: str = ""
175
  description: str = ""
176
  evidence: Evidence | None = None
177
 
178
+ @field_validator("caption", "description", mode="before")
179
+ @classmethod
180
+ def _normalize_figure_fields(cls, value):
181
+ return coerce_string(value, default="")
182
+
183
 
184
  class CodeItem(BaseModel):
185
  language: str = ""
 
187
  description: str = ""
188
  evidence: Evidence | None = None
189
 
190
+ @field_validator("language", "snippet", "description", mode="before")
191
+ @classmethod
192
+ def _normalize_code_fields(cls, value):
193
+ return coerce_string(value, default="")
194
+
195
 
196
  class ExtractedContent(BaseModel):
197
  claims: list[Claim] = Field(default_factory=list)
 
202
  code: list[CodeItem] = Field(default_factory=list)
203
  chunk_summary: str = ""
204
 
205
+ @field_validator("chunk_summary", mode="before")
206
+ @classmethod
207
+ def _normalize_chunk_summary(cls, value):
208
+ return coerce_string(value, default="")
209
+
210
 
211
  class ExtractOutput(BaseModel):
212
  stage: str = "extract"
 
225
  section: str
226
  points: list[str] = Field(default_factory=list)
227
 
228
+ @field_validator("section", mode="before")
229
+ @classmethod
230
+ def _normalize_section_name(cls, value):
231
+ return coerce_string(value, default="")
232
+
233
+ @field_validator("points", mode="before")
234
+ @classmethod
235
+ def _normalize_section_points(cls, value):
236
+ return coerce_string_list(value)
237
+
238
 
239
  class KeyClaim(BaseModel):
240
  claim: str
241
  support: ClaimStatus = ClaimStatus.SUPPORTED
242
  evidence_refs: list[Evidence] = Field(default_factory=list)
243
 
244
+ @field_validator("claim", mode="before")
245
+ @classmethod
246
+ def _normalize_key_claim(cls, value):
247
+ return coerce_string(value, default="")
248
+
249
 
250
  class Synthesis(BaseModel):
251
  answer_outline: list[SectionPoint] = Field(default_factory=list)
252
  key_claims: list[KeyClaim] = Field(default_factory=list)
253
  open_gaps: list[str] = Field(default_factory=list)
254
 
255
+ @field_validator("open_gaps", mode="before")
256
+ @classmethod
257
+ def _normalize_open_gap_list(cls, value):
258
+ return coerce_string_list(value)
259
+
260
 
261
  class MergeOutput(BaseModel):
262
  stage: str = "merge"
 
270
  status: ClaimStatus
271
  evidence: list[Evidence] = Field(default_factory=list)
272
 
273
+ @field_validator("claim", mode="before")
274
+ @classmethod
275
+ def _normalize_checked_claim(cls, value):
276
+ return coerce_string(value, default="")
277
+
278
 
279
  class Verification(BaseModel):
280
  checked_claims: list[CheckedClaim] = Field(default_factory=list)
281
  unsupported_claims: list[str] = Field(default_factory=list)
282
  required_followups: list[str] = Field(default_factory=list)
283
 
284
+ @field_validator("unsupported_claims", "required_followups", mode="before")
285
+ @classmethod
286
+ def _normalize_verification_lists(cls, value):
287
+ return coerce_string_list(value)
288
+
289
 
290
  class VerifyOutput(BaseModel):
291
  stage: str = "verify"
 
298
  title: str
299
  content: str
300
 
301
+ @field_validator("title", "content", mode="before")
302
+ @classmethod
303
+ def _normalize_section_fields(cls, value):
304
+ return coerce_string(value, default="")
305
+
306
 
307
  class FinalAnswer(BaseModel):
308
  response: str
309
  sections: list[Section] = Field(default_factory=list)
310
 
311
+ @field_validator("response", mode="before")
312
+ @classmethod
313
+ def _normalize_response(cls, value):
314
+ return coerce_string(value, default="")
315
+
316
 
317
  class FinalEvidence(BaseModel):
318
  doc_id: str
 
321
  supports: str = ""
322
  quote: str = Field(default="", max_length=200)
323
 
324
+ @field_validator("doc_id", "chunk_id", "where", "supports", "quote", mode="before")
325
+ @classmethod
326
+ def _normalize_final_evidence_fields(cls, value):
327
+ return coerce_string(value, default="")
328
+
329
 
330
  class TraceSummary(BaseModel):
331
  real_switching: bool = False
 
336
  search_queries: list[str] = Field(default_factory=list)
337
  budget_notes: str = ""
338
 
339
+ @field_validator("models_used", "docs_opened", "search_queries", mode="before")
340
+ @classmethod
341
+ def _normalize_trace_lists(cls, value):
342
+ return coerce_string_list(value)
343
+
344
+ @field_validator("budget_notes", mode="before")
345
+ @classmethod
346
+ def _normalize_budget_notes(cls, value):
347
+ return coerce_string(value, default="")
348
+
349
 
350
  class FinalOutput(BaseModel):
351
  final_answer: FinalAnswer = Field(default_factory=FinalAnswer)
 
356
  next_actions: list[str] = Field(default_factory=list)
357
  bus_messages: list[dict] = Field(default_factory=list)
358
 
359
+ @field_validator("missing_info", "next_actions", mode="before")
360
+ @classmethod
361
+ def _normalize_final_output_lists(cls, value):
362
+ return coerce_string_list(value)
363
+
364
 
365
  # ── Helpers ────────────────────────────────────────────────────────────────────
366
 
mp1/pluto/stages/merge.py CHANGED
@@ -27,6 +27,7 @@ from pluto.models import (
27
  Synthesis,
28
  )
29
  from pluto.tracer import Tracer
 
30
 
31
 
32
  _BATCH_PROMPT = """You are synthesizing extracted facts from a document chunk batch. Produce a focused sub-summary for the user's question.
@@ -314,20 +315,18 @@ def _parse_merge(raw: str) -> MergeOutput:
314
  section=sec.get("section", ""),
315
  points=sec.get("points", []),
316
  )
317
- for sec in data.get("answer_outline", [])
318
  if isinstance(sec, dict)
319
  if sec.get("section") or sec.get("points")
320
  ]
321
 
322
  key_claims: list[KeyClaim] = []
323
- for kc in data.get("key_claims", []):
324
  if not isinstance(kc, dict):
325
  continue
326
- evidence_refs = []
327
- for doc_id, chunk_id in zip(kc.get("evidence_doc_ids") or [], kc.get("evidence_chunk_ids") or []):
328
- evidence_refs.append(Evidence(doc_id=doc_id or "", chunk_id=chunk_id or ""))
329
 
330
- support_str = str(kc.get("support", "supported")).lower()
331
  try:
332
  support = ClaimStatus(support_str)
333
  except ValueError:
@@ -369,6 +368,8 @@ def _stabilize_merge(result: MergeOutput, query: str = "", detail_level: str = "
369
  outline = _synthesize_outline_from_claims(key_claims, query=query, detail_level=detail_level)
370
  elif outline:
371
  outline = _top_up_outline(outline, key_claims, detail_level=detail_level)
 
 
372
 
373
  return MergeOutput(
374
  synthesis=Synthesis(
@@ -558,6 +559,73 @@ def _top_up_outline(
558
  return outline
559
 
560
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  def _normalize_detail_level(detail_level: str | None) -> str:
562
  return "detailed" if str(detail_level or "").strip().lower() == "detailed" else "standard"
563
 
@@ -638,3 +706,43 @@ def _normalize_open_gaps(raw_open_gaps) -> list[str]:
638
  if text:
639
  normalized.append(text)
640
  return normalized
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  Synthesis,
28
  )
29
  from pluto.tracer import Tracer
30
+ from pluto.utils import coerce_string, coerce_string_list, ensure_list, pair_string_lists
31
 
32
 
33
  _BATCH_PROMPT = """You are synthesizing extracted facts from a document chunk batch. Produce a focused sub-summary for the user's question.
 
315
  section=sec.get("section", ""),
316
  points=sec.get("points", []),
317
  )
318
+ for sec in ensure_list(data.get("answer_outline", []))
319
  if isinstance(sec, dict)
320
  if sec.get("section") or sec.get("points")
321
  ]
322
 
323
  key_claims: list[KeyClaim] = []
324
+ for kc in ensure_list(data.get("key_claims", [])):
325
  if not isinstance(kc, dict):
326
  continue
327
+ evidence_refs = _parse_evidence_refs(kc)
 
 
328
 
329
+ support_str = coerce_string(kc.get("support", "supported"), default="supported").lower()
330
  try:
331
  support = ClaimStatus(support_str)
332
  except ValueError:
 
368
  outline = _synthesize_outline_from_claims(key_claims, query=query, detail_level=detail_level)
369
  elif outline:
370
  outline = _top_up_outline(outline, key_claims, detail_level=detail_level)
371
+ if detail_level == "detailed" and key_claims:
372
+ outline = _enrich_detailed_outline(outline, key_claims, query=query)
373
 
374
  return MergeOutput(
375
  synthesis=Synthesis(
 
559
  return outline
560
 
561
 
562
+ def _enrich_detailed_outline(
563
+ outline: list[SectionPoint],
564
+ key_claims: list[KeyClaim],
565
+ query: str = "",
566
+ ) -> list[SectionPoint]:
567
+ """Guarantee richer structure for detailed mode when evidence is available."""
568
+ synthesized = _synthesize_outline_from_claims(key_claims, query=query, detail_level="detailed")
569
+ if not synthesized:
570
+ return outline
571
+ if not outline:
572
+ return synthesized
573
+ return _merge_outline_variants(outline, synthesized, point_cap=7, section_cap=5)
574
+
575
+
576
+ def _merge_outline_variants(
577
+ primary: list[SectionPoint],
578
+ secondary: list[SectionPoint],
579
+ point_cap: int,
580
+ section_cap: int,
581
+ ) -> list[SectionPoint]:
582
+ """Merge outline variants while preserving order and deduplicating points."""
583
+ merged: list[SectionPoint] = []
584
+ title_to_index: dict[str, int] = {}
585
+
586
+ def add_section(section: SectionPoint) -> None:
587
+ title = _clean_text(section.section)
588
+ if not title:
589
+ return
590
+
591
+ title_key = _fingerprint(title)
592
+ clean_points: list[str] = []
593
+ seen_local: set[str] = set()
594
+ for point in section.points:
595
+ text = _clean_text(point)
596
+ fingerprint = _fingerprint(text)
597
+ if not text or fingerprint in seen_local:
598
+ continue
599
+ seen_local.add(fingerprint)
600
+ clean_points.append(text)
601
+ if not clean_points:
602
+ return
603
+
604
+ if title_key in title_to_index:
605
+ existing = merged[title_to_index[title_key]]
606
+ seen_existing = {_fingerprint(point) for point in existing.points}
607
+ for point in clean_points:
608
+ fingerprint = _fingerprint(point)
609
+ if fingerprint in seen_existing or len(existing.points) >= point_cap:
610
+ continue
611
+ existing.points.append(point)
612
+ seen_existing.add(fingerprint)
613
+ return
614
+
615
+ if len(merged) >= section_cap:
616
+ return
617
+
618
+ title_to_index[title_key] = len(merged)
619
+ merged.append(SectionPoint(section=title, points=clean_points[:point_cap]))
620
+
621
+ for section in primary:
622
+ add_section(section)
623
+ for section in secondary:
624
+ add_section(section)
625
+
626
+ return merged or primary or secondary
627
+
628
+
629
  def _normalize_detail_level(detail_level: str | None) -> str:
630
  return "detailed" if str(detail_level or "").strip().lower() == "detailed" else "standard"
631
 
 
706
  if text:
707
  normalized.append(text)
708
  return normalized
709
+
710
+
711
+ def _parse_evidence_refs(raw_item: dict) -> list[Evidence]:
712
+ """Normalize evidence refs from scalar, list, or nested-object shapes."""
713
+ evidence_refs: list[Evidence] = []
714
+
715
+ raw_refs = raw_item.get("evidence_refs") or raw_item.get("evidence") or []
716
+ for ref in ensure_list(raw_refs):
717
+ if not isinstance(ref, dict):
718
+ continue
719
+ for doc_id, chunk_id in pair_string_lists(
720
+ ref.get("doc_id") or ref.get("evidence_doc_id") or ref.get("doc_ids"),
721
+ ref.get("chunk_id") or ref.get("evidence_chunk_id") or ref.get("chunk_ids"),
722
+ ):
723
+ evidence_refs.append(
724
+ Evidence(
725
+ doc_id=doc_id,
726
+ chunk_id=chunk_id,
727
+ where=coerce_string(ref.get("where", ""), default=""),
728
+ quote=coerce_string(ref.get("quote", ""), default="")[:200],
729
+ )
730
+ )
731
+
732
+ if evidence_refs:
733
+ return _dedupe_evidence_refs(evidence_refs)
734
+
735
+ for doc_id, chunk_id in pair_string_lists(
736
+ raw_item.get("evidence_doc_ids") or raw_item.get("evidence_doc_id"),
737
+ raw_item.get("evidence_chunk_ids") or raw_item.get("evidence_chunk_id"),
738
+ ):
739
+ evidence_refs.append(Evidence(doc_id=doc_id, chunk_id=chunk_id))
740
+
741
+ # Last-resort fallback when the model emits one combined evidence object.
742
+ if not evidence_refs:
743
+ chunk_ids = coerce_string_list(raw_item.get("chunk_ids") or raw_item.get("chunk_id"))
744
+ doc_ids = coerce_string_list(raw_item.get("doc_ids") or raw_item.get("doc_id"))
745
+ for doc_id, chunk_id in pair_string_lists(doc_ids, chunk_ids):
746
+ evidence_refs.append(Evidence(doc_id=doc_id, chunk_id=chunk_id))
747
+
748
+ return _dedupe_evidence_refs(evidence_refs)
mp1/pluto/stages/verify.py CHANGED
@@ -24,7 +24,7 @@ from pluto.models import (
24
  VerifyOutput,
25
  )
26
  from pluto.tracer import Tracer
27
- from pluto.utils import extract_json_from_response
28
 
29
  DIRECT_SUPPORT_THRESHOLD = 0.72
30
  LLM_CHECK_THRESHOLD = 0.18
@@ -306,18 +306,8 @@ def _extract_single_verdict(v_data: dict, candidates: list[dict]) -> tuple[Claim
306
  except ValueError:
307
  return None, []
308
 
309
- evidence = []
310
- doc_id = item.get("evidence_doc_id")
311
- chunk_id = item.get("evidence_chunk_id")
312
- if doc_id:
313
- evidence.append(
314
- Evidence(
315
- doc_id=doc_id,
316
- chunk_id=chunk_id or "",
317
- quote=item.get("quote", ""),
318
- )
319
- )
320
- elif candidates and status != ClaimStatus.UNSUPPORTED:
321
  evidence.append(_candidate_to_evidence(candidates[0]))
322
 
323
  return status, evidence
@@ -341,7 +331,7 @@ def _parse_verify(raw: str) -> VerifyOutput:
341
  data = _parse_verify_json(raw)
342
 
343
  checked_claims = []
344
- for item in data.get("checked_claims", []):
345
  if not isinstance(item, dict):
346
  continue
347
  status_raw = str(item.get("status", "unsupported")).lower()
@@ -350,17 +340,7 @@ def _parse_verify(raw: str) -> VerifyOutput:
350
  except ValueError:
351
  status = ClaimStatus.UNSUPPORTED
352
 
353
- evidence = []
354
- doc_id = item.get("evidence_doc_id")
355
- if doc_id:
356
- evidence.append(
357
- Evidence(
358
- doc_id=doc_id,
359
- chunk_id=item.get("evidence_chunk_id", ""),
360
- where=item.get("where", ""),
361
- quote=item.get("quote", ""),
362
- )
363
- )
364
 
365
  checked_claims.append(
366
  CheckedClaim(
@@ -387,6 +367,46 @@ def _parse_verify(raw: str) -> VerifyOutput:
387
  )
388
 
389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  def _should_generate_followups(checked_results: list[CheckedClaim]) -> bool:
391
  unsupported_count = sum(1 for item in checked_results if item.status == ClaimStatus.UNSUPPORTED)
392
  if unsupported_count == 0:
 
24
  VerifyOutput,
25
  )
26
  from pluto.tracer import Tracer
27
+ from pluto.utils import coerce_string, ensure_list, extract_json_from_response, pair_string_lists
28
 
29
  DIRECT_SUPPORT_THRESHOLD = 0.72
30
  LLM_CHECK_THRESHOLD = 0.18
 
306
  except ValueError:
307
  return None, []
308
 
309
+ evidence = _parse_evidence_items(item)
310
+ if not evidence and candidates and status != ClaimStatus.UNSUPPORTED:
 
 
 
 
 
 
 
 
 
 
311
  evidence.append(_candidate_to_evidence(candidates[0]))
312
 
313
  return status, evidence
 
331
  data = _parse_verify_json(raw)
332
 
333
  checked_claims = []
334
+ for item in ensure_list(data.get("checked_claims", [])):
335
  if not isinstance(item, dict):
336
  continue
337
  status_raw = str(item.get("status", "unsupported")).lower()
 
340
  except ValueError:
341
  status = ClaimStatus.UNSUPPORTED
342
 
343
+ evidence = _parse_evidence_items(item)
 
 
 
 
 
 
 
 
 
 
344
 
345
  checked_claims.append(
346
  CheckedClaim(
 
367
  )
368
 
369
 
370
+ def _parse_evidence_items(raw_item: dict) -> list[Evidence]:
371
+ """Normalize verifier evidence from nested refs or scalar/list doc/chunk ids."""
372
+ evidence: list[Evidence] = []
373
+
374
+ raw_refs = raw_item.get("evidence") or raw_item.get("evidence_refs") or []
375
+ for ref in ensure_list(raw_refs):
376
+ if not isinstance(ref, dict):
377
+ continue
378
+ for doc_id, chunk_id in pair_string_lists(
379
+ ref.get("doc_id") or ref.get("evidence_doc_id") or ref.get("doc_ids"),
380
+ ref.get("chunk_id") or ref.get("evidence_chunk_id") or ref.get("chunk_ids"),
381
+ ):
382
+ evidence.append(
383
+ Evidence(
384
+ doc_id=doc_id,
385
+ chunk_id=chunk_id,
386
+ where=coerce_string(ref.get("where", ""), default=""),
387
+ quote=coerce_string(ref.get("quote", ""), default="")[:200],
388
+ )
389
+ )
390
+
391
+ if evidence:
392
+ return evidence
393
+
394
+ for doc_id, chunk_id in pair_string_lists(
395
+ raw_item.get("evidence_doc_id") or raw_item.get("evidence_doc_ids"),
396
+ raw_item.get("evidence_chunk_id") or raw_item.get("evidence_chunk_ids"),
397
+ ):
398
+ evidence.append(
399
+ Evidence(
400
+ doc_id=doc_id,
401
+ chunk_id=chunk_id,
402
+ where=coerce_string(raw_item.get("where", ""), default=""),
403
+ quote=coerce_string(raw_item.get("quote", ""), default="")[:200],
404
+ )
405
+ )
406
+
407
+ return evidence
408
+
409
+
410
  def _should_generate_followups(checked_results: list[CheckedClaim]) -> bool:
411
  unsupported_count = sum(1 for item in checked_results if item.status == ClaimStatus.UNSUPPORTED)
412
  if unsupported_count == 0:
mp1/pluto/utils.py CHANGED
@@ -4,7 +4,25 @@ pluto/utils.py — Shared utilities for response parsing.
4
 
5
  from __future__ import annotations
6
 
 
7
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  def strip_think_block(text: str) -> str:
@@ -28,3 +46,81 @@ def extract_json_from_response(raw: str) -> str:
28
  return brace_match.group(0).strip()
29
 
30
  return cleaned.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  from __future__ import annotations
6
 
7
+ import json
8
  import re
9
+ from itertools import zip_longest
10
+
11
+
12
+ _PREFERRED_TEXT_KEYS = (
13
+ "chunk_id",
14
+ "doc_id",
15
+ "value",
16
+ "text",
17
+ "title",
18
+ "label",
19
+ "name",
20
+ "id",
21
+ "where",
22
+ "quote",
23
+ "claim",
24
+ "section",
25
+ )
26
 
27
 
28
  def strip_think_block(text: str) -> str:
 
46
  return brace_match.group(0).strip()
47
 
48
  return cleaned.strip()
49
+
50
+
51
+ def ensure_list(value):
52
+ """Return *value* as a list while preserving existing lists."""
53
+ if value is None:
54
+ return []
55
+ if isinstance(value, list):
56
+ return value
57
+ if isinstance(value, (tuple, set)):
58
+ return list(value)
59
+ return [value]
60
+
61
+
62
+ def flatten_string_values(value) -> list[str]:
63
+ """Flatten nested scalars/collections into a list of non-empty strings."""
64
+ values: list[str] = []
65
+
66
+ def _walk(item) -> None:
67
+ if item is None:
68
+ return
69
+ if isinstance(item, dict):
70
+ for key in _PREFERRED_TEXT_KEYS:
71
+ if key in item and item[key] not in (None, ""):
72
+ _walk(item[key])
73
+ return
74
+ dumped = json.dumps(item, ensure_ascii=False, sort_keys=True).strip()
75
+ if dumped:
76
+ values.append(dumped)
77
+ return
78
+ if isinstance(item, (list, tuple, set)):
79
+ for part in item:
80
+ _walk(part)
81
+ return
82
+
83
+ text = str(item).strip()
84
+ if text:
85
+ values.append(text)
86
+
87
+ _walk(value)
88
+ return values
89
+
90
+
91
+ def coerce_string(value, default: str = "") -> str:
92
+ """Normalize mixed scalar/list inputs into one printable string."""
93
+ parts = flatten_string_values(value)
94
+ return ", ".join(parts) if parts else default
95
+
96
+
97
+ def coerce_string_list(value) -> list[str]:
98
+ """Normalize mixed scalar/list inputs into a deduplicated string list."""
99
+ seen: set[str] = set()
100
+ normalized: list[str] = []
101
+ for item in flatten_string_values(value):
102
+ if item in seen:
103
+ continue
104
+ seen.add(item)
105
+ normalized.append(item)
106
+ return normalized
107
+
108
+
109
+ def pair_string_lists(left, right) -> list[tuple[str, str]]:
110
+ """Broadcast or zip mixed scalar/list inputs into string pairs."""
111
+ left_items = coerce_string_list(left)
112
+ right_items = coerce_string_list(right)
113
+
114
+ if not left_items and not right_items:
115
+ return []
116
+ if not left_items:
117
+ left_items = [""]
118
+ if not right_items:
119
+ right_items = [""]
120
+
121
+ if len(left_items) == 1 and len(right_items) > 1:
122
+ return [(left_items[0], item) for item in right_items]
123
+ if len(right_items) == 1 and len(left_items) > 1:
124
+ return [(item, right_items[0]) for item in left_items]
125
+
126
+ return list(zip_longest(left_items, right_items, fillvalue=""))
mp1/test_merge.py CHANGED
@@ -9,7 +9,7 @@ from pluto.models import (
9
  Synthesis,
10
  )
11
  from pluto.stages import merge as merge_stage
12
- from pluto.stages.merge import run_merge
13
  from pluto.tracer import Tracer
14
 
15
 
@@ -78,3 +78,117 @@ def test_merge_synthesizes_outline_when_model_returns_only_key_claims(monkeypatc
78
  for section in result.synthesis.answer_outline
79
  for point in section.points
80
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  Synthesis,
10
  )
11
  from pluto.stages import merge as merge_stage
12
+ from pluto.stages.merge import _parse_merge, run_merge
13
  from pluto.tracer import Tracer
14
 
15
 
 
78
  for section in result.synthesis.answer_outline
79
  for point in section.points
80
  )
81
+
82
+
83
+ def test_parse_merge_normalizes_scalar_doc_and_multi_chunk_evidence():
84
+ raw = """
85
+ {
86
+ "answer_outline": [
87
+ {
88
+ "section": "Overview",
89
+ "points": "The method uses evidence from multiple chunks."
90
+ }
91
+ ],
92
+ "key_claims": [
93
+ {
94
+ "claim": "The method is supported across several chunks.",
95
+ "support": "supported",
96
+ "evidence_doc_ids": "paper_a",
97
+ "evidence_chunk_ids": [["C18", "C46", "C81"]]
98
+ }
99
+ ],
100
+ "open_gaps": []
101
+ }
102
+ """
103
+
104
+ out = _parse_merge(raw)
105
+
106
+ assert out.synthesis.answer_outline[0].points == ["The method uses evidence from multiple chunks."]
107
+ refs = out.synthesis.key_claims[0].evidence_refs
108
+ assert len(refs) == 3
109
+ assert [ref.doc_id for ref in refs] == ["paper_a", "paper_a", "paper_a"]
110
+ assert [ref.chunk_id for ref in refs] == ["C18", "C46", "C81"]
111
+
112
+
113
+ def test_merge_detailed_mode_produces_richer_answer_structure(monkeypatch):
114
+ raw_merge = """
115
+ {
116
+ "answer_outline": [
117
+ {
118
+ "section": "Overview",
119
+ "points": [
120
+ "The paper introduces a multi-agent defense coordinator.",
121
+ "The system reports strong defended-scenario performance."
122
+ ]
123
+ }
124
+ ],
125
+ "key_claims": [
126
+ {
127
+ "claim": "The paper introduces a multi-agent defense coordinator for prompt-injection mitigation.",
128
+ "support": "supported",
129
+ "evidence_doc_ids": ["multi_agent"],
130
+ "evidence_chunk_ids": ["C1"]
131
+ },
132
+ {
133
+ "claim": "The evaluation reports 0% ASR across defended scenarios.",
134
+ "support": "supported",
135
+ "evidence_doc_ids": ["multi_agent"],
136
+ "evidence_chunk_ids": ["C2"]
137
+ },
138
+ {
139
+ "claim": "The method routes adversarial prompts through a defense worker.",
140
+ "support": "supported",
141
+ "evidence_doc_ids": ["multi_agent"],
142
+ "evidence_chunk_ids": ["C3"]
143
+ },
144
+ {
145
+ "claim": "The architecture includes a recovery worker for post-attack repair.",
146
+ "support": "supported",
147
+ "evidence_doc_ids": ["multi_agent"],
148
+ "evidence_chunk_ids": ["C4"]
149
+ },
150
+ {
151
+ "claim": "The paper discusses limitations and future work for the coordinator pipeline.",
152
+ "support": "supported",
153
+ "evidence_doc_ids": ["multi_agent"],
154
+ "evidence_chunk_ids": ["C5"]
155
+ },
156
+ {
157
+ "claim": "The benchmark comparison highlights gains over baselines.",
158
+ "support": "supported",
159
+ "evidence_doc_ids": ["multi_agent"],
160
+ "evidence_chunk_ids": ["C6"]
161
+ }
162
+ ],
163
+ "open_gaps": []
164
+ }
165
+ """
166
+
167
+ monkeypatch.setattr(merge_stage, "dispatch", lambda *args, **kwargs: raw_merge)
168
+
169
+ extraction = ExtractOutput(
170
+ doc_id="multi_agent",
171
+ chunk_id="C1",
172
+ chunk_type=ChunkType.TEXT,
173
+ mode_used=ModeName.MODE_REASONING,
174
+ extracted=ExtractedContent(
175
+ claims=[
176
+ Claim(
177
+ claim_id="cl1",
178
+ text="The paper introduces a multi-agent defense coordinator for prompt-injection mitigation.",
179
+ importance=Importance.HIGH,
180
+ evidence=Evidence(doc_id="multi_agent", chunk_id="C1", where="overview", quote="multi-agent defense coordinator"),
181
+ )
182
+ ],
183
+ chunk_summary="Coordinator overview and results.",
184
+ ),
185
+ )
186
+
187
+ standard = run_merge("Summarize the paper.", [extraction], Tracer(), detail_level="standard")
188
+ detailed = run_merge("Summarize the paper.", [extraction], Tracer(), detail_level="detailed")
189
+
190
+ standard_points = sum(len(section.points) for section in standard.synthesis.answer_outline)
191
+ detailed_points = sum(len(section.points) for section in detailed.synthesis.answer_outline)
192
+
193
+ assert len(detailed.synthesis.answer_outline) >= len(standard.synthesis.answer_outline)
194
+ assert detailed_points > standard_points
mp1/test_schema.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pluto.models import Evidence, FinalEvidence, SectionPoint, Verification
2
+
3
+
4
+ def test_schema_coerces_mixed_scalar_and_list_inputs():
5
+ evidence = Evidence(
6
+ doc_id=["paper_a"],
7
+ chunk_id=["C1", "C2"],
8
+ where={"text": "results"},
9
+ quote=["alpha", "beta"],
10
+ )
11
+
12
+ assert evidence.doc_id == "paper_a"
13
+ assert evidence.chunk_id == "C1, C2"
14
+ assert evidence.where == "results"
15
+ assert evidence.quote == "alpha, beta"
16
+
17
+ final_evidence = FinalEvidence(
18
+ doc_id="paper_a",
19
+ chunk_id=["C4", "C5"],
20
+ where=["method"],
21
+ supports=["Main claim"],
22
+ quote=["quoted", "support"],
23
+ )
24
+
25
+ assert final_evidence.chunk_id == "C4, C5"
26
+ assert final_evidence.where == "method"
27
+ assert final_evidence.supports == "Main claim"
28
+ assert final_evidence.quote == "quoted, support"
29
+
30
+
31
+ def test_schema_coerces_outline_and_followup_lists():
32
+ section = SectionPoint(section=["Overview"], points="Single normalized point")
33
+ verification = Verification(
34
+ unsupported_claims="Missing metric support",
35
+ required_followups={"text": "Where is the metric reported?"},
36
+ )
37
+
38
+ assert section.section == "Overview"
39
+ assert section.points == ["Single normalized point"]
40
+ assert verification.unsupported_claims == ["Missing metric support"]
41
+ assert verification.required_followups == ["Where is the metric reported?"]
mp1/test_server.py CHANGED
@@ -189,3 +189,27 @@ def test_stream_progress_serializes_pydantic_payloads(monkeypatch):
189
  payload = json.loads(body.removeprefix("data: ").strip())
190
  assert payload["payload"]["plan"][0]["doc_id"] == "paper"
191
  assert payload["payload"]["plan"][0]["chunk_type"] == "text"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  payload = json.loads(body.removeprefix("data: ").strip())
190
  assert payload["payload"]["plan"][0]["doc_id"] == "paper"
191
  assert payload["payload"]["plan"][0]["chunk_type"] == "text"
192
+
193
+
194
+ def test_server_cache_stats_route_returns_json(monkeypatch):
195
+ class FakeCache:
196
+ def stats(self):
197
+ return {"hits": 7, "misses": 3, "entries": 10}
198
+
199
+ monkeypatch.setattr(server, "_extraction_cache", FakeCache())
200
+
201
+ client = TestClient(server.app)
202
+ response = client.get("/api/cache/stats")
203
+
204
+ assert response.status_code == 200
205
+ assert response.json() == {"hits": 7, "misses": 3, "entries": 10}
206
+
207
+
208
+ def test_server_result_route_returns_404_when_empty(monkeypatch):
209
+ monkeypatch.setattr(server, "_latest_result", None)
210
+
211
+ client = TestClient(server.app)
212
+ response = client.get("/api/result")
213
+
214
+ assert response.status_code == 404
215
+ assert response.json()["error"] == "No result yet"
mp1/test_verify.py CHANGED
@@ -49,6 +49,31 @@ def test_parse_verify_dump():
49
  assert out.verification.required_followups == ["Upload the appendix for dataset details."]
50
 
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def test_verify_directly_supports_matching_claim_without_dispatch(monkeypatch):
53
  def fail_dispatch(*args, **kwargs):
54
  raise AssertionError("dispatch should not be called for an obvious direct evidence match")
 
49
  assert out.verification.required_followups == ["Upload the appendix for dataset details."]
50
 
51
 
52
+ def test_parse_verify_handles_multi_chunk_evidence_ids():
53
+ raw = """
54
+ {
55
+ "checked_claims": [
56
+ {
57
+ "claim": "The results are supported across multiple chunks.",
58
+ "status": "supported",
59
+ "evidence_doc_id": "paper_a",
60
+ "evidence_chunk_id": ["C18", "C46", "C81"],
61
+ "quote": "results are supported"
62
+ }
63
+ ],
64
+ "unsupported_claims": [],
65
+ "required_followups": []
66
+ }
67
+ """
68
+
69
+ out = _parse_verify(raw)
70
+
71
+ evidence = out.verification.checked_claims[0].evidence
72
+ assert len(evidence) == 3
73
+ assert [item.doc_id for item in evidence] == ["paper_a", "paper_a", "paper_a"]
74
+ assert [item.chunk_id for item in evidence] == ["C18", "C46", "C81"]
75
+
76
+
77
  def test_verify_directly_supports_matching_claim_without_dispatch(monkeypatch):
78
  def fail_dispatch(*args, **kwargs):
79
  raise AssertionError("dispatch should not be called for an obvious direct evidence match")