Spaces:

ayushKishor
/

plutoV2_miniProject_3rd-yr

Sleeping

App Files Files Community

3v324v23 commited on Mar 31

Commit

5cf9bb7

1 Parent(s): 66ad25b

Update codebase with latest fixes and improvements

Browse files

Files changed (10) hide show

mp1/.env +0 -15
mp1/corpus/.extraction_cache.json +1008 -0
mp1/pluto/models.py +126 -1
mp1/pluto/stages/merge.py +114 -6
mp1/pluto/stages/verify.py +45 -25
mp1/pluto/utils.py +96 -0
mp1/test_merge.py +115 -1
mp1/test_schema.py +41 -0
mp1/test_server.py +24 -0
mp1/test_verify.py +25 -0

mp1/.env DELETED Viewed

@@ -1,15 +0,0 @@
-# NVIDIA NIM Multi-model Keys
-NVIDIA_API_KEY_NANO=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
-NVIDIA_API_KEY_SUPER=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
-NVIDIA_API_KEY_VL=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
-NVIDIA_API_KEY_EMBED=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
-NVIDIA_API_KEY_RERANK=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
-NVIDIA_API_KEY_ULTRA=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
-# Global fallback (defaults to Super if specific not found)
-NVIDIA_API_KEY=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
-# Keep Groq as fallback
-GROQ_API_KEY=gsk_xxxxxxxxxxxxxxxxxxxx
-MISTRAL_API_KEY=...
-GOOGLE_API_KEY=AIzaSyDp-mzHD9Nyk1T3xCPRyrc1RCiVLZzkNy8

mp1/corpus/.extraction_cache.json CHANGED Viewed

@@ -1878,5 +1878,1013 @@
       "chunk_summary": "The text discusses two main challenges in the integration of neural and symbolic AI systems: Interoperability & Integration (difficulty in integrating with real-world data and software ecosystems) and Governance & Accountability (liability and regulatory challenges with emergent behaviors). Proposed solutions include developing bridging standards/APIs for the former and paradigm-specific regulatory models for the latter."
     },
     "cached_at": "2026-03-30T12:54:43.367750+00:00"
   }
 }

       "chunk_summary": "The text discusses two main challenges in the integration of neural and symbolic AI systems: Interoperability & Integration (difficulty in integrating with real-world data and software ecosystems) and Governance & Accountability (liability and regulatory challenges with emergent behaviors). Proposed solutions include developing bridging standards/APIs for the former and paradigm-specific regulatory models for the latter."
     },
     "cached_at": "2026-03-30T12:54:43.367750+00:00"
+  },
+  "11dc4fc7fef5e33d8f4a9919ff21db44c12efb5f1e7335e92503dae98125ce6d": {
+    "stage": "extract",
+    "doc_id": "m3",
+    "chunk_id": "C100",
+    "chunk_hash": "453f2499fc3f008aee11b271057c320105d2bb3767d6027ee7eab88bf68094ba",
+    "chunk_type": "figure",
+    "mode_used": "MODE_VISION",
+    "model_id": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1",
+    "extracted": {
+      "claims": [
+        {
+          "claim_id": "m3-C100-CL1",
+          "text": "The paper aims to explore how sub-quadratic models can balance performance with inference speed.",
+          "importance": "high",
+          "support_type": "implicit",
+          "numbers": [
+            "440M",
+            "2",
+            "16",
+            "32",
+            "64",
+            "128"
+          ],
+          "entities": [
+            "validation perplexity",
+            "model performance",
+            "Mamba models",
+            "Fineweb-Edu dataset",
+            "Chinchilla optimal tokens",
+            "𝑑state"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C100",
+            "where": "chunk C100",
+            "quote": "By plotting the validation perplexity (a proxy for model performance) as a function of 𝑑state, we aim to formulate a holistic picture about how sub-quadratic models can trade off performance with infe"
+          }
+        },
+        {
+          "claim_id": "m3-C100-CL2",
+          "text": "The paper trains 440M parameter models on the Fineweb-Edu dataset with different 𝑑state values (16, 32, 64, 128) and observes an inverse correlation between validation loss and 𝑑state.",
+          "importance": "medium",
+          "support_type": "explicit",
+          "numbers": [
+            "440M",
+            "16",
+            "32",
+            "64",
+            "128"
+          ],
+          "entities": [
+            "validation loss",
+            "𝑑state"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C100",
+            "where": "chunk C100",
+            "quote": "For each data point, we train a 440M parameter model to 2×Chinchilla optimal tokens on the Fineweb-Edu dataset, where the model is configured with a 𝑑state of {16, 32, 64, 128}. As expected, we observ"
+          }
+        }
+      ],
+      "definitions": [
+        {
+          "term": "validation perplexity",
+          "definition": "A proxy for model performance used in the paper."
+        },
+        {
+          "term": "Mamba models",
+          "definition": "A specific type of model used in the paper to explore the trade-off between performance and inference speed."
+        },
+        {
+          "term": "Fineweb-Edu dataset",
+          "definition": "The dataset used in the paper to train and evaluate the models."
+        },
+        {
+          "term": "Chinchilla optimal tokens",
+          "definition": "A specific type of token used in the paper to configure the models."
+        },
+        {
+          "term": "𝑑state",
+          "definition": "A parameter used in the paper to configure the models and explore the trade-off between performance and inference speed."
+        }
+      ],
+      "math": [
+        {
+          "expression": "2×Chinchilla optimal tokens",
+          "interpretation": "A specific configuration used in the paper to train the models.",
+          "evidence": null
+        }
+      ],
+      "table": [],
+      "figure": [],
+      "code": [],
+      "chunk_summary": "The paper explores how sub-quadratic models can balance performance with inference speed by training 440M parameter models on the Fineweb-Edu dataset with different 𝑑state values and observing an inverse correlation between validation loss and 𝑑state."
+    },
+    "cached_at": "2026-03-31T06:11:11.513711+00:00"
+  },
+  "453f2499fc3f008aee11b271057c320105d2bb3767d6027ee7eab88bf68094ba": {
+    "stage": "extract",
+    "doc_id": "m3",
+    "chunk_id": "C100",
+    "chunk_hash": "453f2499fc3f008aee11b271057c320105d2bb3767d6027ee7eab88bf68094ba",
+    "chunk_type": "figure",
+    "mode_used": "MODE_VISION",
+    "model_id": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1",
+    "extracted": {
+      "claims": [
+        {
+          "claim_id": "m3-C100-CL1",
+          "text": "The paper aims to explore how sub-quadratic models can balance performance with inference speed.",
+          "importance": "high",
+          "support_type": "implicit",
+          "numbers": [
+            "440M",
+            "2",
+            "16",
+            "32",
+            "64",
+            "128"
+          ],
+          "entities": [
+            "validation perplexity",
+            "model performance",
+            "Mamba models",
+            "Fineweb-Edu dataset",
+            "Chinchilla optimal tokens",
+            "𝑑state"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C100",
+            "where": "chunk C100",
+            "quote": "By plotting the validation perplexity (a proxy for model performance) as a function of 𝑑state, we aim to formulate a holistic picture about how sub-quadratic models can trade off performance with infe"
+          }
+        },
+        {
+          "claim_id": "m3-C100-CL2",
+          "text": "The paper trains 440M parameter models on the Fineweb-Edu dataset with different 𝑑state values (16, 32, 64, 128) and observes an inverse correlation between validation loss and 𝑑state.",
+          "importance": "medium",
+          "support_type": "explicit",
+          "numbers": [
+            "440M",
+            "16",
+            "32",
+            "64",
+            "128"
+          ],
+          "entities": [
+            "validation loss",
+            "𝑑state"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C100",
+            "where": "chunk C100",
+            "quote": "For each data point, we train a 440M parameter model to 2×Chinchilla optimal tokens on the Fineweb-Edu dataset, where the model is configured with a 𝑑state of {16, 32, 64, 128}. As expected, we observ"
+          }
+        }
+      ],
+      "definitions": [
+        {
+          "term": "validation perplexity",
+          "definition": "A proxy for model performance used in the paper."
+        },
+        {
+          "term": "Mamba models",
+          "definition": "A specific type of model used in the paper to explore the trade-off between performance and inference speed."
+        },
+        {
+          "term": "Fineweb-Edu dataset",
+          "definition": "The dataset used in the paper to train and evaluate the models."
+        },
+        {
+          "term": "Chinchilla optimal tokens",
+          "definition": "A specific type of token used in the paper to configure the models."
+        },
+        {
+          "term": "𝑑state",
+          "definition": "A parameter used in the paper to configure the models and explore the trade-off between performance and inference speed."
+        }
+      ],
+      "math": [
+        {
+          "expression": "2×Chinchilla optimal tokens",
+          "interpretation": "A specific configuration used in the paper to train the models.",
+          "evidence": null
+        }
+      ],
+      "table": [],
+      "figure": [],
+      "code": [],
+      "chunk_summary": "The paper explores how sub-quadratic models can balance performance with inference speed by training 440M parameter models on the Fineweb-Edu dataset with different 𝑑state values and observing an inverse correlation between validation loss and 𝑑state."
+    },
+    "cached_at": "2026-03-31T06:11:11.513711+00:00"
+  },
+  "8c8d3a749758cf0155178011770f7a5dad2cc169a562482015bccc0564fc0e7d": {
+    "stage": "extract",
+    "doc_id": "m3",
+    "chunk_id": "C35",
+    "chunk_hash": "dc835d283dae7b2755b59d73031fa23ef2af880a4370612763a4ae6c290666e8",
+    "chunk_type": "table",
+    "mode_used": "MODE_REASONING",
+    "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
+    "extracted": {
+      "claims": [
+        {
+          "claim_id": "m3-C35-CL1",
+          "text": "This paper discusses the difference in representation of parameter B_t between the current work and Mamba-2.",
+          "importance": "high",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "B_t",
+            "Mamba-2",
+            "this paper"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C35",
+            "where": "chunk C35",
+            "quote": "B_t represents the continuous parameter, whereas in Mamba-2, B_t represents the discretized parameter"
+          }
+        },
+        {
+          "claim_id": "m3-C35-CL2",
+          "text": "The paper extends the prior Mamba discretization theory by introducing an exponential-trapezoidal method.",
+          "importance": "high",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "Mamba discretization",
+            "exponential-trapezoidal method"
+          ],
+          "dependencies": [
+            "m3-C35-CL1"
+          ],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C35",
+            "where": "chunk C35",
+            "quote": "Our theory formalizes the prior Mamba discretization as exponential-Euler and extends it"
+          }
+        },
+        {
+          "claim_id": "m3-C35-CL3",
+          "text": "Mamba-1's reported ZOH discretization differs from its actual implementation (as per a GitHub issue).",
+          "importance": "medium",
+          "support_type": "explicit",
+          "numbers": [
+            "#129"
+          ],
+          "entities": [
+            "Mamba-1",
+            "ZOH discretization",
+            "GitHub"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C35",
+            "where": "chunk C35",
+            "quote": "While the Mamba-1 paper reports ZOH discretization, the implementation follows https://github.com/state-spaces/mamba/issues/129"
+          }
+        },
+        {
+          "claim_id": "m3-C35-CL4",
+          "text": "Table 1 lists canonical linear-time invariant discretizations and custom linear-time varying discretizations from the exponential-adjusted framework.",
+          "importance": "low",
+          "support_type": "explicit",
+          "numbers": [
+            "1"
+          ],
+          "entities": [
+            "Table 1"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C35",
+            "where": "chunk C35",
+            "quote": "Table 1: Table of canonical linear-time invariant discretizations (top) and custom linear-time varying discretizations"
+          }
+        }
+      ],
+      "definitions": [
+        {
+          "term": "B_t",
+          "definition": "A parameter represented differently as continuous in this paper and discretized in Mamba-2 (equivalent to γ_tB_t)"
+        },
+        {
+          "term": "Exponential-Euler (discretization)",
+          "definition": "The formalized representation of the prior Mamba discretization in this paper"
+        },
+        {
+          "term": "Exponential-Trapezoidal Method",
+          "definition": "A new, more expressive discretization method introduced in this paper as an extension to Mamba"
+        }
+      ],
+      "math": [
+        {
+          "expression": "γ_tB_t",
+          "interpretation": "The discretized form of parameter B_t in Mamba-2, where γ_t is a scaling factor",
+          "evidence": null
+        }
+      ],
+      "table": [],
+      "figure": [],
+      "code": [],
+      "chunk_summary": "This paper contrasts its continuous parameter representation with Mamba-2's discretized approach, extends Mamba's discretization theory with a new method, and references a discrepancy in Mamba-1's implementation. It also presents a table on various discretizations."
+    },
+    "cached_at": "2026-03-31T06:11:33.529241+00:00"
+  },
+  "dc835d283dae7b2755b59d73031fa23ef2af880a4370612763a4ae6c290666e8": {
+    "stage": "extract",
+    "doc_id": "m3",
+    "chunk_id": "C35",
+    "chunk_hash": "dc835d283dae7b2755b59d73031fa23ef2af880a4370612763a4ae6c290666e8",
+    "chunk_type": "table",
+    "mode_used": "MODE_REASONING",
+    "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
+    "extracted": {
+      "claims": [
+        {
+          "claim_id": "m3-C35-CL1",
+          "text": "This paper discusses the difference in representation of parameter B_t between the current work and Mamba-2.",
+          "importance": "high",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "B_t",
+            "Mamba-2",
+            "this paper"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C35",
+            "where": "chunk C35",
+            "quote": "B_t represents the continuous parameter, whereas in Mamba-2, B_t represents the discretized parameter"
+          }
+        },
+        {
+          "claim_id": "m3-C35-CL2",
+          "text": "The paper extends the prior Mamba discretization theory by introducing an exponential-trapezoidal method.",
+          "importance": "high",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "Mamba discretization",
+            "exponential-trapezoidal method"
+          ],
+          "dependencies": [
+            "m3-C35-CL1"
+          ],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C35",
+            "where": "chunk C35",
+            "quote": "Our theory formalizes the prior Mamba discretization as exponential-Euler and extends it"
+          }
+        },
+        {
+          "claim_id": "m3-C35-CL3",
+          "text": "Mamba-1's reported ZOH discretization differs from its actual implementation (as per a GitHub issue).",
+          "importance": "medium",
+          "support_type": "explicit",
+          "numbers": [
+            "#129"
+          ],
+          "entities": [
+            "Mamba-1",
+            "ZOH discretization",
+            "GitHub"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C35",
+            "where": "chunk C35",
+            "quote": "While the Mamba-1 paper reports ZOH discretization, the implementation follows https://github.com/state-spaces/mamba/issues/129"
+          }
+        },
+        {
+          "claim_id": "m3-C35-CL4",
+          "text": "Table 1 lists canonical linear-time invariant discretizations and custom linear-time varying discretizations from the exponential-adjusted framework.",
+          "importance": "low",
+          "support_type": "explicit",
+          "numbers": [
+            "1"
+          ],
+          "entities": [
+            "Table 1"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C35",
+            "where": "chunk C35",
+            "quote": "Table 1: Table of canonical linear-time invariant discretizations (top) and custom linear-time varying discretizations"
+          }
+        }
+      ],
+      "definitions": [
+        {
+          "term": "B_t",
+          "definition": "A parameter represented differently as continuous in this paper and discretized in Mamba-2 (equivalent to γ_tB_t)"
+        },
+        {
+          "term": "Exponential-Euler (discretization)",
+          "definition": "The formalized representation of the prior Mamba discretization in this paper"
+        },
+        {
+          "term": "Exponential-Trapezoidal Method",
+          "definition": "A new, more expressive discretization method introduced in this paper as an extension to Mamba"
+        }
+      ],
+      "math": [
+        {
+          "expression": "γ_tB_t",
+          "interpretation": "The discretized form of parameter B_t in Mamba-2, where γ_t is a scaling factor",
+          "evidence": null
+        }
+      ],
+      "table": [],
+      "figure": [],
+      "code": [],
+      "chunk_summary": "This paper contrasts its continuous parameter representation with Mamba-2's discretized approach, extends Mamba's discretization theory with a new method, and references a discrepancy in Mamba-1's implementation. It also presents a table on various discretizations."
+    },
+    "cached_at": "2026-03-31T06:11:33.529241+00:00"
+  },
+  "5640580cd928bf6ac61d7eb47ebdee7c1c7cde531b46d5bb55c3af4610a556f1": {
+    "stage": "extract",
+    "doc_id": "m3",
+    "chunk_id": "C64",
+    "chunk_hash": "7bb33741c71279b8237c5c73251f09ced4fd43ad5963e8363116a210d46bf467",
+    "chunk_type": "table",
+    "mode_used": "MODE_REASONING",
+    "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
+    "extracted": {
+      "claims": [
+        {
+          "claim_id": "m3-C64-CL1",
+          "text": "The paper discusses the efficiency of Mamba and compares its arithmetic intensity to NVIDIA H100-SXM5's bfloat16 matmul.",
+          "importance": "high",
+          "support_type": "explicit",
+          "numbers": [
+            "2.5",
+            "295"
+          ],
+          "entities": [
+            "Mamba",
+            "NVIDIA H100-SXM5"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C64",
+            "where": "chunk C64",
+            "quote": "More concretely, the arithmetic intensity for a single generation in Mamba is around 2.5ops per byte (Table 2a), while the arithmetic intensity for bfloat16 matmul is about 295ops per byte for NVIDIA "
+          }
+        },
+        {
+          "claim_id": "m3-C64-CL2",
+          "text": "SSM decoding in Mamba falls short of a compute-bound regime, with unclear parameter adjustments for mitigation.",
+          "importance": "high",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "SSM decoding",
+            "Mamba"
+          ],
+          "dependencies": [
+            "m3-C64-CL1"
+          ],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C64",
+            "where": "chunk C64",
+            "quote": "Consequently, SSM decoding falls far short of a compute-bound regime, and moreover it is not clear how one can adjust the existing parameters in Mamba to mitigate the lack of hardware efficiency."
+          }
+        },
+        {
+          "claim_id": "m3-C64-CL3",
+          "text": "The observation about inefficiency applies to other sub-quadratic models like causal linear attention.",
+          "importance": "medium",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "sub-quadratic models",
+            "causal linear attention"
+          ],
+          "dependencies": [
+            "m3-C64-CL2"
+          ],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C64",
+            "where": "chunk C64",
+            "quote": "We note that this observation applies generally to other sub-quadratic models, such as causal linear attention."
+          }
+        },
+        {
+          "claim_id": "m3-C64-CL4",
+          "text": "The paper technically analyzes SSM, transitioning from SISO to MIMO, involving specific mathematical formulations.",
+          "importance": "high",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "SSM",
+            "SISO",
+            "MIMO"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C64",
+            "where": "chunk C64",
+            "quote": "From SISO to MIMO. Consider a single head of a typical SSM..."
+          }
+        },
+        {
+          "claim_id": "m3-C64-CL5",
+          "text": "In MIMO SSM, memory traffic is dominated by state hₜ, and computation by the outer product Bₜx⊤ₜ with FLOPs proportional to N×P.",
+          "importance": "medium",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "MIMO SSM",
+            "hₜ",
+            "Bₜx⊤ₜ"
+          ],
+          "dependencies": [
+            "m3-C64-CL4"
+          ],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C64",
+            "where": "chunk C64",
+            "quote": "Note that the memory traffic... is dominated by the state hₜ, while the computation mainly comprises the outer product Bₜx⊤ₜ which has FLOPs proportional to N𝑃."
+          }
+        }
+      ],
+      "definitions": [
+        {
+          "term": "Arithmetic Intensity",
+          "definition": "Ratio of computational operations (ops) to memory traffic (bytes), implicitly defined in the context."
+        },
+        {
+          "term": "SSM",
+          "definition": "Implicitly defined as a model type within the paper's context, possibly 'State Space Model' based on external knowledge."
+        },
+        {
+          "term": "SISO/MIMO",
+          "definition": "Single-Input Single-Output / Multiple-Input Multiple-Output, contextual to the model's architecture."
+        }
+      ],
+      "math": [
+        {
+          "expression": "2.5ops/byte, 295ops/byte",
+          "interpretation": "Arithmetic intensity measurements for Mamba and NVIDIA H100-SXM5's bfloat16 matmul, respectively.",
+          "evidence": null
+        },
+        {
+          "expression": "N×P (FLOPs proportionality)",
+          "interpretation": "Proportionality of Floating Point Operations to the product of dimensions N and P in MIMO SSM computation.",
+          "evidence": null
+        }
+      ],
+      "table": [],
+      "figure": [],
+      "code": [],
+      "chunk_summary": "This introduction discusses the inefficiency of Mamba's SSM decoding compared to NVIDIA H100-SXM5, its broader applicability to sub-quadratic models, and delves into the technical analysis of transitioning SSM from SISO to MIMO, highlighting memory and computation aspects."
+    },
+    "cached_at": "2026-03-31T06:12:01.423458+00:00"
+  },
+  "7bb33741c71279b8237c5c73251f09ced4fd43ad5963e8363116a210d46bf467": {
+    "stage": "extract",
+    "doc_id": "m3",
+    "chunk_id": "C64",
+    "chunk_hash": "7bb33741c71279b8237c5c73251f09ced4fd43ad5963e8363116a210d46bf467",
+    "chunk_type": "table",
+    "mode_used": "MODE_REASONING",
+    "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
+    "extracted": {
+      "claims": [
+        {
+          "claim_id": "m3-C64-CL1",
+          "text": "The paper discusses the efficiency of Mamba and compares its arithmetic intensity to NVIDIA H100-SXM5's bfloat16 matmul.",
+          "importance": "high",
+          "support_type": "explicit",
+          "numbers": [
+            "2.5",
+            "295"
+          ],
+          "entities": [
+            "Mamba",
+            "NVIDIA H100-SXM5"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C64",
+            "where": "chunk C64",
+            "quote": "More concretely, the arithmetic intensity for a single generation in Mamba is around 2.5ops per byte (Table 2a), while the arithmetic intensity for bfloat16 matmul is about 295ops per byte for NVIDIA "
+          }
+        },
+        {
+          "claim_id": "m3-C64-CL2",
+          "text": "SSM decoding in Mamba falls short of a compute-bound regime, with unclear parameter adjustments for mitigation.",
+          "importance": "high",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "SSM decoding",
+            "Mamba"
+          ],
+          "dependencies": [
+            "m3-C64-CL1"
+          ],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C64",
+            "where": "chunk C64",
+            "quote": "Consequently, SSM decoding falls far short of a compute-bound regime, and moreover it is not clear how one can adjust the existing parameters in Mamba to mitigate the lack of hardware efficiency."
+          }
+        },
+        {
+          "claim_id": "m3-C64-CL3",
+          "text": "The observation about inefficiency applies to other sub-quadratic models like causal linear attention.",
+          "importance": "medium",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "sub-quadratic models",
+            "causal linear attention"
+          ],
+          "dependencies": [
+            "m3-C64-CL2"
+          ],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C64",
+            "where": "chunk C64",
+            "quote": "We note that this observation applies generally to other sub-quadratic models, such as causal linear attention."
+          }
+        },
+        {
+          "claim_id": "m3-C64-CL4",
+          "text": "The paper technically analyzes SSM, transitioning from SISO to MIMO, involving specific mathematical formulations.",
+          "importance": "high",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "SSM",
+            "SISO",
+            "MIMO"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C64",
+            "where": "chunk C64",
+            "quote": "From SISO to MIMO. Consider a single head of a typical SSM..."
+          }
+        },
+        {
+          "claim_id": "m3-C64-CL5",
+          "text": "In MIMO SSM, memory traffic is dominated by state hₜ, and computation by the outer product Bₜx⊤ₜ with FLOPs proportional to N×P.",
+          "importance": "medium",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "MIMO SSM",
+            "hₜ",
+            "Bₜx⊤ₜ"
+          ],
+          "dependencies": [
+            "m3-C64-CL4"
+          ],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C64",
+            "where": "chunk C64",
+            "quote": "Note that the memory traffic... is dominated by the state hₜ, while the computation mainly comprises the outer product Bₜx⊤ₜ which has FLOPs proportional to N𝑃."
+          }
+        }
+      ],
+      "definitions": [
+        {
+          "term": "Arithmetic Intensity",
+          "definition": "Ratio of computational operations (ops) to memory traffic (bytes), implicitly defined in the context."
+        },
+        {
+          "term": "SSM",
+          "definition": "Implicitly defined as a model type within the paper's context, possibly 'State Space Model' based on external knowledge."
+        },
+        {
+          "term": "SISO/MIMO",
+          "definition": "Single-Input Single-Output / Multiple-Input Multiple-Output, contextual to the model's architecture."
+        }
+      ],
+      "math": [
+        {
+          "expression": "2.5ops/byte, 295ops/byte",
+          "interpretation": "Arithmetic intensity measurements for Mamba and NVIDIA H100-SXM5's bfloat16 matmul, respectively.",
+          "evidence": null
+        },
+        {
+          "expression": "N×P (FLOPs proportionality)",
+          "interpretation": "Proportionality of Floating Point Operations to the product of dimensions N and P in MIMO SSM computation.",
+          "evidence": null
+        }
+      ],
+      "table": [],
+      "figure": [],
+      "code": [],
+      "chunk_summary": "This introduction discusses the inefficiency of Mamba's SSM decoding compared to NVIDIA H100-SXM5, its broader applicability to sub-quadratic models, and delves into the technical analysis of transitioning SSM from SISO to MIMO, highlighting memory and computation aspects."
+    },
+    "cached_at": "2026-03-31T06:12:01.423458+00:00"
+  },
+  "5b323bb2c1e955746b99089ab8edd1708edd9608e962335c1c324fd33d060dc4": {
+    "stage": "extract",
+    "doc_id": "m3",
+    "chunk_id": "C77",
+    "chunk_hash": "b60edb2fa7e49a8eb0690fdbda1edb54c65d05fe38cba79de41ec1ec6475470c",
+    "chunk_type": "figure",
+    "mode_used": "MODE_VISION",
+    "model_id": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1",
+    "extracted": {
+      "claims": [
+        {
+          "claim_id": "m3-C77-CL1",
+          "text": "The paper keeps the original SISO projection and scales each dimension of the projected output to size R with a learnable, data-independent vector.",
+          "importance": "high",
+          "support_type": "explicit",
+          "numbers": [
+            "R"
+          ],
+          "entities": [
+            "SISO projection",
+            "learnable vector"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C77",
+            "where": "chunk C77",
+            "quote": "Instead, we keep the original SISO projection and element-wise scale each dimension of the projected output to size 𝑅with a learnable, data-independent vector, resulting in 𝐷𝑃+𝑃𝑅parameters for each he"
+          }
+        },
+        {
+          "claim_id": "m3-C77-CL2",
+          "text": "The paper introduces exponential-trapezoidal discretization, data-dependent RoPE embeddings, MIMO projections, QK normalization, and learnable biases to mitigate the multiplicative increase in parameters.",
+          "importance": "medium",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "exponential-trapezoidal discretization",
+            "data-dependent RoPE embeddings",
+            "MIMO projections",
+            "QK normalization",
+            "learnable biases"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C77",
+            "where": "chunk C77",
+            "quote": "This mitigates the multiplicative increase to a more reasonable additive parameter count increase."
+          }
+        },
+        {
+          "claim_id": "m3-C77-CL3",
+          "text": "The paper parameter-matches all MIMO-variants to their SISO counterparts by reducing the MLP width.",
+          "importance": "medium",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "MIMO-variants",
+            "SISO counterparts",
+            "MLP width"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C77",
+            "where": "chunk C77",
+            "quote": "Appendix C details the parameterization, and all MIMO-variants in our paper are parameter-matched to their SISO counterparts by reducing the MLP width."
+          }
+        }
+      ],
+      "definitions": [
+        {
+          "term": "SISO projection",
+          "definition": "A projection method that processes data sequentially, one dimension at a time."
+        },
+        {
+          "term": "RoPE embeddings",
+          "definition": "A method for generating embeddings that uses relative position encoding."
+        },
+        {
+          "term": "MIMO projection",
+          "definition": "A projection method that processes multiple dimensions simultaneously."
+        },
+        {
+          "term": "QK normalization",
+          "definition": "A normalization method that uses a query-key-value attention mechanism."
+        }
+      ],
+      "math": [
+        {
+          "expression": "DP + PR",
+          "interpretation": "The total number of parameters for each head after scaling and adding learnable vectors.",
+          "evidence": null
+        }
+      ],
+      "table": [],
+      "figure": [],
+      "code": [],
+      "chunk_summary": "The paper introduces several updates to the original SISO projection, including scaling, exponential-trapezoidal discretization, data-dependent RoPE embeddings, MIMO projections, QK normalization, and learnable biases. These updates aim to mitigate the multiplicative increase in parameters, and all MIMO-variants are parameter-matched to their SISO counterparts by reducing the MLP width."
+    },
+    "cached_at": "2026-03-31T06:12:20.641235+00:00"
+  },
+  "b60edb2fa7e49a8eb0690fdbda1edb54c65d05fe38cba79de41ec1ec6475470c": {
+    "stage": "extract",
+    "doc_id": "m3",
+    "chunk_id": "C77",
+    "chunk_hash": "b60edb2fa7e49a8eb0690fdbda1edb54c65d05fe38cba79de41ec1ec6475470c",
+    "chunk_type": "figure",
+    "mode_used": "MODE_VISION",
+    "model_id": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1",
+    "extracted": {
+      "claims": [
+        {
+          "claim_id": "m3-C77-CL1",
+          "text": "The paper keeps the original SISO projection and scales each dimension of the projected output to size R with a learnable, data-independent vector.",
+          "importance": "high",
+          "support_type": "explicit",
+          "numbers": [
+            "R"
+          ],
+          "entities": [
+            "SISO projection",
+            "learnable vector"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C77",
+            "where": "chunk C77",
+            "quote": "Instead, we keep the original SISO projection and element-wise scale each dimension of the projected output to size 𝑅with a learnable, data-independent vector, resulting in 𝐷𝑃+𝑃𝑅parameters for each he"
+          }
+        },
+        {
+          "claim_id": "m3-C77-CL2",
+          "text": "The paper introduces exponential-trapezoidal discretization, data-dependent RoPE embeddings, MIMO projections, QK normalization, and learnable biases to mitigate the multiplicative increase in parameters.",
+          "importance": "medium",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "exponential-trapezoidal discretization",
+            "data-dependent RoPE embeddings",
+            "MIMO projections",
+            "QK normalization",
+            "learnable biases"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C77",
+            "where": "chunk C77",
+            "quote": "This mitigates the multiplicative increase to a more reasonable additive parameter count increase."
+          }
+        },
+        {
+          "claim_id": "m3-C77-CL3",
+          "text": "The paper parameter-matches all MIMO-variants to their SISO counterparts by reducing the MLP width.",
+          "importance": "medium",
+          "support_type": "explicit",
+          "numbers": [],
+          "entities": [
+            "MIMO-variants",
+            "SISO counterparts",
+            "MLP width"
+          ],
+          "dependencies": [],
+          "evidence": {
+            "doc_id": "m3",
+            "chunk_id": "C77",
+            "where": "chunk C77",
+            "quote": "Appendix C details the parameterization, and all MIMO-variants in our paper are parameter-matched to their SISO counterparts by reducing the MLP width."
+          }
+        }
+      ],
+      "definitions": [
+        {
+          "term": "SISO projection",
+          "definition": "A projection method that processes data sequentially, one dimension at a time."
+        },
+        {
+          "term": "RoPE embeddings",
+          "definition": "A method for generating embeddings that uses relative position encoding."
+        },
+        {
+          "term": "MIMO projection",
+          "definition": "A projection method that processes multiple dimensions simultaneously."
+        },
+        {
+          "term": "QK normalization",
+          "definition": "A normalization method that uses a query-key-value attention mechanism."
+        }
+      ],
+      "math": [
+        {
+          "expression": "DP + PR",
+          "interpretation": "The total number of parameters for each head after scaling and adding learnable vectors.",
+          "evidence": null
+        }
+      ],
+      "table": [],
+      "figure": [],
+      "code": [],
+      "chunk_summary": "The paper introduces several updates to the original SISO projection, including scaling, exponential-trapezoidal discretization, data-dependent RoPE embeddings, MIMO projections, QK normalization, and learnable biases. These updates aim to mitigate the multiplicative increase in parameters, and all MIMO-variants are parameter-matched to their SISO counterparts by reducing the MLP width."
+    },
+    "cached_at": "2026-03-31T06:12:20.641235+00:00"
+  },
+  "a06cce7e93a52c44a5d98615a3323cbf3ac07b900073919927bce7d091f2cda8": {
+    "stage": "extract",
+    "doc_id": "m3",
+    "chunk_id": "C99",
+    "chunk_hash": "41843d338df422981417cd2ef468355284584f7999c0b9fbdb2e9d1026be1bae",
+    "chunk_type": "text",
+    "mode_used": "MODE_REASONING",
+    "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
+    "extracted": {
+      "claims": [],
+      "definitions": [],
+      "math": [],
+      "table": [],
+      "figure": [],
+      "code": [],
+      "chunk_summary": "Here is the extracted JSON in the requested schema, prioritizing facts relevant to the user's question \"What is this paper about?\":\n\n```\n{\n  \"claims\": [\n    {\n      \"claim_id\": \"m3-C0-CL1\",\n      \"text\": \"The paper discusses the tradeoff between inference efficiency and performance in sub-quadratic models.\",\n      \"importance\": \"high\",\n      \"support_type\": \"explicit\",\n      \"numbers\": [],\n      \"entities\": [\"inference efficiency\", \"performance\", \"sub-quadratic models\"],\n      \"dependencies\": []"
+    },
+    "cached_at": "2026-03-31T06:12:39.008804+00:00"
+  },
+  "41843d338df422981417cd2ef468355284584f7999c0b9fbdb2e9d1026be1bae": {
+    "stage": "extract",
+    "doc_id": "m3",
+    "chunk_id": "C99",
+    "chunk_hash": "41843d338df422981417cd2ef468355284584f7999c0b9fbdb2e9d1026be1bae",
+    "chunk_type": "text",
+    "mode_used": "MODE_REASONING",
+    "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
+    "extracted": {
+      "claims": [],
+      "definitions": [],
+      "math": [],
+      "table": [],
+      "figure": [],
+      "code": [],
+      "chunk_summary": "Here is the extracted JSON in the requested schema, prioritizing facts relevant to the user's question \"What is this paper about?\":\n\n```\n{\n  \"claims\": [\n    {\n      \"claim_id\": \"m3-C0-CL1\",\n      \"text\": \"The paper discusses the tradeoff between inference efficiency and performance in sub-quadratic models.\",\n      \"importance\": \"high\",\n      \"support_type\": \"explicit\",\n      \"numbers\": [],\n      \"entities\": [\"inference efficiency\", \"performance\", \"sub-quadratic models\"],\n      \"dependencies\": []"
+    },
+    "cached_at": "2026-03-31T06:12:39.008804+00:00"
+  },
+  "d65db15fea30f37d4ebae83e0d6535c1b7c04ae0cea7935f606036123ac7679d": {
+    "stage": "extract",
+    "doc_id": "m3",
+    "chunk_id": "C0",
+    "chunk_hash": "5fbcc7098a4bb6fc7b9bef508d86a1bc253826726c7059d55be3d82de56187ce",
+    "chunk_type": "text",
+    "mode_used": "MODE_REASONING",
+    "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
+    "extracted": {
+      "claims": [],
+      "definitions": [],
+      "math": [],
+      "table": [],
+      "figure": [],
+      "code": [],
+      "chunk_summary": "Here is the extraction in the requested JSON format, prioritizing facts relevant to the user's question \"What is this paper about?\":\n\n```\n{\n  \"claims\": [\n    {\n      \"claim_id\": \"m3-C0-CL1\",\n      \"text\": \"The paper introduces Mamba-3, an improved sequence modeling approach based on state space principles.\",\n      \"importance\": \"high\",\n      \"support_type\": \"explicit\",\n      \"numbers\": [],\n      \"entities\": [\"Mamba-3\", \"sequence modeling\", \"state space principles\"],\n      \"dependencies\": [],\n   "
+    },
+    "cached_at": "2026-03-31T06:12:56.425042+00:00"
+  },
+  "5fbcc7098a4bb6fc7b9bef508d86a1bc253826726c7059d55be3d82de56187ce": {
+    "stage": "extract",
+    "doc_id": "m3",
+    "chunk_id": "C0",
+    "chunk_hash": "5fbcc7098a4bb6fc7b9bef508d86a1bc253826726c7059d55be3d82de56187ce",
+    "chunk_type": "text",
+    "mode_used": "MODE_REASONING",
+    "model_id": "nvidia/llama-3.3-nemotron-super-49b-v1",
+    "extracted": {
+      "claims": [],
+      "definitions": [],
+      "math": [],
+      "table": [],
+      "figure": [],
+      "code": [],
+      "chunk_summary": "Here is the extraction in the requested JSON format, prioritizing facts relevant to the user's question \"What is this paper about?\":\n\n```\n{\n  \"claims\": [\n    {\n      \"claim_id\": \"m3-C0-CL1\",\n      \"text\": \"The paper introduces Mamba-3, an improved sequence modeling approach based on state space principles.\",\n      \"importance\": \"high\",\n      \"support_type\": \"explicit\",\n      \"numbers\": [],\n      \"entities\": [\"Mamba-3\", \"sequence modeling\", \"state space principles\"],\n      \"dependencies\": [],\n   "
+    },
+    "cached_at": "2026-03-31T06:12:56.425042+00:00"
   }
 }

mp1/pluto/models.py CHANGED Viewed

@@ -10,7 +10,9 @@ import hashlib
 from enum import Enum
 from typing import Optional
-from pydantic import BaseModel, Field
 # ── Enums ──────────────────────────────────────────────────────────────────────
@@ -63,6 +65,11 @@ class Evidence(BaseModel):
     where: str = ""
     quote: str = Field(default="", max_length=200)
 # ── S0 ROUTE ───────────────────────────────────────────────────────────────────
@@ -70,6 +77,11 @@ class DocScope(BaseModel):
     doc_id: str
     reason: str
 class ChunkPlan(BaseModel):
     doc_id: str
@@ -80,6 +92,11 @@ class ChunkPlan(BaseModel):
     priority: Priority = Priority.MEDIUM
     task: str = ""
 class Budgets(BaseModel):
     max_chunks_to_read: int = 200
@@ -106,12 +123,27 @@ class Claim(BaseModel):
     dependencies: list[str] = Field(default_factory=list)
     evidence: Evidence | None = None
 class MathItem(BaseModel):
     expression: str
     interpretation: str = ""
     evidence: Evidence | None = None
 class TableItem(BaseModel):
     caption: str = ""
@@ -119,12 +151,35 @@ class TableItem(BaseModel):
     rows: list[list[str]] = Field(default_factory=list)
     evidence: Evidence | None = None
 class FigureItem(BaseModel):
     caption: str = ""
     description: str = ""
     evidence: Evidence | None = None
 class CodeItem(BaseModel):
     language: str = ""
@@ -132,6 +187,11 @@ class CodeItem(BaseModel):
     description: str = ""
     evidence: Evidence | None = None
 class ExtractedContent(BaseModel):
     claims: list[Claim] = Field(default_factory=list)
@@ -142,6 +202,11 @@ class ExtractedContent(BaseModel):
     code: list[CodeItem] = Field(default_factory=list)
     chunk_summary: str = ""
 class ExtractOutput(BaseModel):
     stage: str = "extract"
@@ -160,18 +225,38 @@ class SectionPoint(BaseModel):
     section: str
     points: list[str] = Field(default_factory=list)
 class KeyClaim(BaseModel):
     claim: str
     support: ClaimStatus = ClaimStatus.SUPPORTED
     evidence_refs: list[Evidence] = Field(default_factory=list)
 class Synthesis(BaseModel):
     answer_outline: list[SectionPoint] = Field(default_factory=list)
     key_claims: list[KeyClaim] = Field(default_factory=list)
     open_gaps: list[str] = Field(default_factory=list)
 class MergeOutput(BaseModel):
     stage: str = "merge"
@@ -185,12 +270,22 @@ class CheckedClaim(BaseModel):
     status: ClaimStatus
     evidence: list[Evidence] = Field(default_factory=list)
 class Verification(BaseModel):
     checked_claims: list[CheckedClaim] = Field(default_factory=list)
     unsupported_claims: list[str] = Field(default_factory=list)
     required_followups: list[str] = Field(default_factory=list)
 class VerifyOutput(BaseModel):
     stage: str = "verify"
@@ -203,11 +298,21 @@ class Section(BaseModel):
     title: str
     content: str
 class FinalAnswer(BaseModel):
     response: str
     sections: list[Section] = Field(default_factory=list)
 class FinalEvidence(BaseModel):
     doc_id: str
@@ -216,6 +321,11 @@ class FinalEvidence(BaseModel):
     supports: str = ""
     quote: str = Field(default="", max_length=200)
 class TraceSummary(BaseModel):
     real_switching: bool = False
@@ -226,6 +336,16 @@ class TraceSummary(BaseModel):
     search_queries: list[str] = Field(default_factory=list)
     budget_notes: str = ""
 class FinalOutput(BaseModel):
     final_answer: FinalAnswer = Field(default_factory=FinalAnswer)
@@ -236,6 +356,11 @@ class FinalOutput(BaseModel):
     next_actions: list[str] = Field(default_factory=list)
     bus_messages: list[dict] = Field(default_factory=list)
 # ── Helpers ────────────────────────────────────────────────────────────────────

 from enum import Enum
 from typing import Optional
+from pydantic import BaseModel, Field, field_validator
+from pluto.utils import coerce_string, coerce_string_list, ensure_list
 # ── Enums ──────────────────────────────────────────────────────────────────────
     where: str = ""
     quote: str = Field(default="", max_length=200)
+    @field_validator("doc_id", "chunk_id", "where", "quote", mode="before")
+    @classmethod
+    def _normalize_text_fields(cls, value):
+        return coerce_string(value, default="")
 # ── S0 ROUTE ───────────────────────────────────────────────────────────────────
     doc_id: str
     reason: str
+    @field_validator("doc_id", "reason", mode="before")
+    @classmethod
+    def _normalize_doc_scope_fields(cls, value):
+        return coerce_string(value, default="")
 class ChunkPlan(BaseModel):
     doc_id: str
     priority: Priority = Priority.MEDIUM
     task: str = ""
+    @field_validator("doc_id", "chunk_id", "where", "task", mode="before")
+    @classmethod
+    def _normalize_chunk_plan_text_fields(cls, value):
+        return coerce_string(value, default="")
 class Budgets(BaseModel):
     max_chunks_to_read: int = 200
     dependencies: list[str] = Field(default_factory=list)
     evidence: Evidence | None = None
+    @field_validator("claim_id", "text", mode="before")
+    @classmethod
+    def _normalize_claim_text_fields(cls, value):
+        return coerce_string(value, default="")
+    @field_validator("numbers", "entities", "dependencies", mode="before")
+    @classmethod
+    def _normalize_claim_lists(cls, value):
+        return coerce_string_list(value)
 class MathItem(BaseModel):
     expression: str
     interpretation: str = ""
     evidence: Evidence | None = None
+    @field_validator("expression", "interpretation", mode="before")
+    @classmethod
+    def _normalize_math_fields(cls, value):
+        return coerce_string(value, default="")
 class TableItem(BaseModel):
     caption: str = ""
     rows: list[list[str]] = Field(default_factory=list)
     evidence: Evidence | None = None
+    @field_validator("caption", mode="before")
+    @classmethod
+    def _normalize_table_caption(cls, value):
+        return coerce_string(value, default="")
+    @field_validator("headers", mode="before")
+    @classmethod
+    def _normalize_table_headers(cls, value):
+        return coerce_string_list(value)
+    @field_validator("rows", mode="before")
+    @classmethod
+    def _normalize_table_rows(cls, value):
+        rows = []
+        for row in ensure_list(value):
+            rows.append(coerce_string_list(row))
+        return [row for row in rows if row]
 class FigureItem(BaseModel):
     caption: str = ""
     description: str = ""
     evidence: Evidence | None = None
+    @field_validator("caption", "description", mode="before")
+    @classmethod
+    def _normalize_figure_fields(cls, value):
+        return coerce_string(value, default="")
 class CodeItem(BaseModel):
     language: str = ""
     description: str = ""
     evidence: Evidence | None = None
+    @field_validator("language", "snippet", "description", mode="before")
+    @classmethod
+    def _normalize_code_fields(cls, value):
+        return coerce_string(value, default="")
 class ExtractedContent(BaseModel):
     claims: list[Claim] = Field(default_factory=list)
     code: list[CodeItem] = Field(default_factory=list)
     chunk_summary: str = ""
+    @field_validator("chunk_summary", mode="before")
+    @classmethod
+    def _normalize_chunk_summary(cls, value):
+        return coerce_string(value, default="")
 class ExtractOutput(BaseModel):
     stage: str = "extract"
     section: str
     points: list[str] = Field(default_factory=list)
+    @field_validator("section", mode="before")
+    @classmethod
+    def _normalize_section_name(cls, value):
+        return coerce_string(value, default="")
+    @field_validator("points", mode="before")
+    @classmethod
+    def _normalize_section_points(cls, value):
+        return coerce_string_list(value)
 class KeyClaim(BaseModel):
     claim: str
     support: ClaimStatus = ClaimStatus.SUPPORTED
     evidence_refs: list[Evidence] = Field(default_factory=list)
+    @field_validator("claim", mode="before")
+    @classmethod
+    def _normalize_key_claim(cls, value):
+        return coerce_string(value, default="")
 class Synthesis(BaseModel):
     answer_outline: list[SectionPoint] = Field(default_factory=list)
     key_claims: list[KeyClaim] = Field(default_factory=list)
     open_gaps: list[str] = Field(default_factory=list)
+    @field_validator("open_gaps", mode="before")
+    @classmethod
+    def _normalize_open_gap_list(cls, value):
+        return coerce_string_list(value)
 class MergeOutput(BaseModel):
     stage: str = "merge"
     status: ClaimStatus
     evidence: list[Evidence] = Field(default_factory=list)
+    @field_validator("claim", mode="before")
+    @classmethod
+    def _normalize_checked_claim(cls, value):
+        return coerce_string(value, default="")
 class Verification(BaseModel):
     checked_claims: list[CheckedClaim] = Field(default_factory=list)
     unsupported_claims: list[str] = Field(default_factory=list)
     required_followups: list[str] = Field(default_factory=list)
+    @field_validator("unsupported_claims", "required_followups", mode="before")
+    @classmethod
+    def _normalize_verification_lists(cls, value):
+        return coerce_string_list(value)
 class VerifyOutput(BaseModel):
     stage: str = "verify"
     title: str
     content: str
+    @field_validator("title", "content", mode="before")
+    @classmethod
+    def _normalize_section_fields(cls, value):
+        return coerce_string(value, default="")
 class FinalAnswer(BaseModel):
     response: str
     sections: list[Section] = Field(default_factory=list)
+    @field_validator("response", mode="before")
+    @classmethod
+    def _normalize_response(cls, value):
+        return coerce_string(value, default="")
 class FinalEvidence(BaseModel):
     doc_id: str
     supports: str = ""
     quote: str = Field(default="", max_length=200)
+    @field_validator("doc_id", "chunk_id", "where", "supports", "quote", mode="before")
+    @classmethod
+    def _normalize_final_evidence_fields(cls, value):
+        return coerce_string(value, default="")
 class TraceSummary(BaseModel):
     real_switching: bool = False
     search_queries: list[str] = Field(default_factory=list)
     budget_notes: str = ""
+    @field_validator("models_used", "docs_opened", "search_queries", mode="before")
+    @classmethod
+    def _normalize_trace_lists(cls, value):
+        return coerce_string_list(value)
+    @field_validator("budget_notes", mode="before")
+    @classmethod
+    def _normalize_budget_notes(cls, value):
+        return coerce_string(value, default="")
 class FinalOutput(BaseModel):
     final_answer: FinalAnswer = Field(default_factory=FinalAnswer)
     next_actions: list[str] = Field(default_factory=list)
     bus_messages: list[dict] = Field(default_factory=list)
+    @field_validator("missing_info", "next_actions", mode="before")
+    @classmethod
+    def _normalize_final_output_lists(cls, value):
+        return coerce_string_list(value)
 # ── Helpers ────────────────────────────────────────────────────────────────────

mp1/pluto/stages/merge.py CHANGED Viewed

@@ -27,6 +27,7 @@ from pluto.models import (
     Synthesis,
 )
 from pluto.tracer import Tracer
 _BATCH_PROMPT = """You are synthesizing extracted facts from a document chunk batch. Produce a focused sub-summary for the user's question.
@@ -314,20 +315,18 @@ def _parse_merge(raw: str) -> MergeOutput:
             section=sec.get("section", ""),
             points=sec.get("points", []),
         )
-        for sec in data.get("answer_outline", [])
         if isinstance(sec, dict)
         if sec.get("section") or sec.get("points")
     ]
     key_claims: list[KeyClaim] = []
-    for kc in data.get("key_claims", []):
         if not isinstance(kc, dict):
             continue
-        evidence_refs = []
-        for doc_id, chunk_id in zip(kc.get("evidence_doc_ids") or [], kc.get("evidence_chunk_ids") or []):
-            evidence_refs.append(Evidence(doc_id=doc_id or "", chunk_id=chunk_id or ""))
-        support_str = str(kc.get("support", "supported")).lower()
         try:
             support = ClaimStatus(support_str)
         except ValueError:
@@ -369,6 +368,8 @@ def _stabilize_merge(result: MergeOutput, query: str = "", detail_level: str = "
         outline = _synthesize_outline_from_claims(key_claims, query=query, detail_level=detail_level)
     elif outline:
         outline = _top_up_outline(outline, key_claims, detail_level=detail_level)
     return MergeOutput(
         synthesis=Synthesis(
@@ -558,6 +559,73 @@ def _top_up_outline(
     return outline
 def _normalize_detail_level(detail_level: str | None) -> str:
     return "detailed" if str(detail_level or "").strip().lower() == "detailed" else "standard"
@@ -638,3 +706,43 @@ def _normalize_open_gaps(raw_open_gaps) -> list[str]:
         if text:
             normalized.append(text)
     return normalized

     Synthesis,
 )
 from pluto.tracer import Tracer
+from pluto.utils import coerce_string, coerce_string_list, ensure_list, pair_string_lists
 _BATCH_PROMPT = """You are synthesizing extracted facts from a document chunk batch. Produce a focused sub-summary for the user's question.
             section=sec.get("section", ""),
             points=sec.get("points", []),
         )
+        for sec in ensure_list(data.get("answer_outline", []))
         if isinstance(sec, dict)
         if sec.get("section") or sec.get("points")
     ]
     key_claims: list[KeyClaim] = []
+    for kc in ensure_list(data.get("key_claims", [])):
         if not isinstance(kc, dict):
             continue
+        evidence_refs = _parse_evidence_refs(kc)
+        support_str = coerce_string(kc.get("support", "supported"), default="supported").lower()
         try:
             support = ClaimStatus(support_str)
         except ValueError:
         outline = _synthesize_outline_from_claims(key_claims, query=query, detail_level=detail_level)
     elif outline:
         outline = _top_up_outline(outline, key_claims, detail_level=detail_level)
+    if detail_level == "detailed" and key_claims:
+        outline = _enrich_detailed_outline(outline, key_claims, query=query)
     return MergeOutput(
         synthesis=Synthesis(
     return outline
+def _enrich_detailed_outline(
+    outline: list[SectionPoint],
+    key_claims: list[KeyClaim],
+    query: str = "",
+) -> list[SectionPoint]:
+    """Guarantee richer structure for detailed mode when evidence is available."""
+    synthesized = _synthesize_outline_from_claims(key_claims, query=query, detail_level="detailed")
+    if not synthesized:
+        return outline
+    if not outline:
+        return synthesized
+    return _merge_outline_variants(outline, synthesized, point_cap=7, section_cap=5)
+def _merge_outline_variants(
+    primary: list[SectionPoint],
+    secondary: list[SectionPoint],
+    point_cap: int,
+    section_cap: int,
+) -> list[SectionPoint]:
+    """Merge outline variants while preserving order and deduplicating points."""
+    merged: list[SectionPoint] = []
+    title_to_index: dict[str, int] = {}
+    def add_section(section: SectionPoint) -> None:
+        title = _clean_text(section.section)
+        if not title:
+            return
+        title_key = _fingerprint(title)
+        clean_points: list[str] = []
+        seen_local: set[str] = set()
+        for point in section.points:
+            text = _clean_text(point)
+            fingerprint = _fingerprint(text)
+            if not text or fingerprint in seen_local:
+                continue
+            seen_local.add(fingerprint)
+            clean_points.append(text)
+        if not clean_points:
+            return
+        if title_key in title_to_index:
+            existing = merged[title_to_index[title_key]]
+            seen_existing = {_fingerprint(point) for point in existing.points}
+            for point in clean_points:
+                fingerprint = _fingerprint(point)
+                if fingerprint in seen_existing or len(existing.points) >= point_cap:
+                    continue
+                existing.points.append(point)
+                seen_existing.add(fingerprint)
+            return
+        if len(merged) >= section_cap:
+            return
+        title_to_index[title_key] = len(merged)
+        merged.append(SectionPoint(section=title, points=clean_points[:point_cap]))
+    for section in primary:
+        add_section(section)
+    for section in secondary:
+        add_section(section)
+    return merged or primary or secondary
 def _normalize_detail_level(detail_level: str | None) -> str:
     return "detailed" if str(detail_level or "").strip().lower() == "detailed" else "standard"
         if text:
             normalized.append(text)
     return normalized
+def _parse_evidence_refs(raw_item: dict) -> list[Evidence]:
+    """Normalize evidence refs from scalar, list, or nested-object shapes."""
+    evidence_refs: list[Evidence] = []
+    raw_refs = raw_item.get("evidence_refs") or raw_item.get("evidence") or []
+    for ref in ensure_list(raw_refs):
+        if not isinstance(ref, dict):
+            continue
+        for doc_id, chunk_id in pair_string_lists(
+            ref.get("doc_id") or ref.get("evidence_doc_id") or ref.get("doc_ids"),
+            ref.get("chunk_id") or ref.get("evidence_chunk_id") or ref.get("chunk_ids"),
+        ):
+            evidence_refs.append(
+                Evidence(
+                    doc_id=doc_id,
+                    chunk_id=chunk_id,
+                    where=coerce_string(ref.get("where", ""), default=""),
+                    quote=coerce_string(ref.get("quote", ""), default="")[:200],
+                )
+            )
+    if evidence_refs:
+        return _dedupe_evidence_refs(evidence_refs)
+    for doc_id, chunk_id in pair_string_lists(
+        raw_item.get("evidence_doc_ids") or raw_item.get("evidence_doc_id"),
+        raw_item.get("evidence_chunk_ids") or raw_item.get("evidence_chunk_id"),
+    ):
+        evidence_refs.append(Evidence(doc_id=doc_id, chunk_id=chunk_id))
+    # Last-resort fallback when the model emits one combined evidence object.
+    if not evidence_refs:
+        chunk_ids = coerce_string_list(raw_item.get("chunk_ids") or raw_item.get("chunk_id"))
+        doc_ids = coerce_string_list(raw_item.get("doc_ids") or raw_item.get("doc_id"))
+        for doc_id, chunk_id in pair_string_lists(doc_ids, chunk_ids):
+            evidence_refs.append(Evidence(doc_id=doc_id, chunk_id=chunk_id))
+    return _dedupe_evidence_refs(evidence_refs)

mp1/pluto/stages/verify.py CHANGED Viewed

@@ -24,7 +24,7 @@ from pluto.models import (
     VerifyOutput,
 )
 from pluto.tracer import Tracer
-from pluto.utils import extract_json_from_response
 DIRECT_SUPPORT_THRESHOLD = 0.72
 LLM_CHECK_THRESHOLD = 0.18
@@ -306,18 +306,8 @@ def _extract_single_verdict(v_data: dict, candidates: list[dict]) -> tuple[Claim
     except ValueError:
         return None, []
-    evidence = []
-    doc_id = item.get("evidence_doc_id")
-    chunk_id = item.get("evidence_chunk_id")
-    if doc_id:
-        evidence.append(
-            Evidence(
-                doc_id=doc_id,
-                chunk_id=chunk_id or "",
-                quote=item.get("quote", ""),
-            )
-        )
-    elif candidates and status != ClaimStatus.UNSUPPORTED:
         evidence.append(_candidate_to_evidence(candidates[0]))
     return status, evidence
@@ -341,7 +331,7 @@ def _parse_verify(raw: str) -> VerifyOutput:
     data = _parse_verify_json(raw)
     checked_claims = []
-    for item in data.get("checked_claims", []):
         if not isinstance(item, dict):
             continue
         status_raw = str(item.get("status", "unsupported")).lower()
@@ -350,17 +340,7 @@ def _parse_verify(raw: str) -> VerifyOutput:
         except ValueError:
             status = ClaimStatus.UNSUPPORTED
-        evidence = []
-        doc_id = item.get("evidence_doc_id")
-        if doc_id:
-            evidence.append(
-                Evidence(
-                    doc_id=doc_id,
-                    chunk_id=item.get("evidence_chunk_id", ""),
-                    where=item.get("where", ""),
-                    quote=item.get("quote", ""),
-                )
-            )
         checked_claims.append(
             CheckedClaim(
@@ -387,6 +367,46 @@ def _parse_verify(raw: str) -> VerifyOutput:
     )
 def _should_generate_followups(checked_results: list[CheckedClaim]) -> bool:
     unsupported_count = sum(1 for item in checked_results if item.status == ClaimStatus.UNSUPPORTED)
     if unsupported_count == 0:

     VerifyOutput,
 )
 from pluto.tracer import Tracer
+from pluto.utils import coerce_string, ensure_list, extract_json_from_response, pair_string_lists
 DIRECT_SUPPORT_THRESHOLD = 0.72
 LLM_CHECK_THRESHOLD = 0.18
     except ValueError:
         return None, []
+    evidence = _parse_evidence_items(item)
+    if not evidence and candidates and status != ClaimStatus.UNSUPPORTED:
         evidence.append(_candidate_to_evidence(candidates[0]))
     return status, evidence
     data = _parse_verify_json(raw)
     checked_claims = []
+    for item in ensure_list(data.get("checked_claims", [])):
         if not isinstance(item, dict):
             continue
         status_raw = str(item.get("status", "unsupported")).lower()
         except ValueError:
             status = ClaimStatus.UNSUPPORTED
+        evidence = _parse_evidence_items(item)
         checked_claims.append(
             CheckedClaim(
     )
+def _parse_evidence_items(raw_item: dict) -> list[Evidence]:
+    """Normalize verifier evidence from nested refs or scalar/list doc/chunk ids."""
+    evidence: list[Evidence] = []
+    raw_refs = raw_item.get("evidence") or raw_item.get("evidence_refs") or []
+    for ref in ensure_list(raw_refs):
+        if not isinstance(ref, dict):
+            continue
+        for doc_id, chunk_id in pair_string_lists(
+            ref.get("doc_id") or ref.get("evidence_doc_id") or ref.get("doc_ids"),
+            ref.get("chunk_id") or ref.get("evidence_chunk_id") or ref.get("chunk_ids"),
+        ):
+            evidence.append(
+                Evidence(
+                    doc_id=doc_id,
+                    chunk_id=chunk_id,
+                    where=coerce_string(ref.get("where", ""), default=""),
+                    quote=coerce_string(ref.get("quote", ""), default="")[:200],
+                )
+            )
+    if evidence:
+        return evidence
+    for doc_id, chunk_id in pair_string_lists(
+        raw_item.get("evidence_doc_id") or raw_item.get("evidence_doc_ids"),
+        raw_item.get("evidence_chunk_id") or raw_item.get("evidence_chunk_ids"),
+    ):
+        evidence.append(
+            Evidence(
+                doc_id=doc_id,
+                chunk_id=chunk_id,
+                where=coerce_string(raw_item.get("where", ""), default=""),
+                quote=coerce_string(raw_item.get("quote", ""), default="")[:200],
+            )
+        )
+    return evidence
 def _should_generate_followups(checked_results: list[CheckedClaim]) -> bool:
     unsupported_count = sum(1 for item in checked_results if item.status == ClaimStatus.UNSUPPORTED)
     if unsupported_count == 0:

mp1/pluto/utils.py CHANGED Viewed

@@ -4,7 +4,25 @@ pluto/utils.py — Shared utilities for response parsing.
 from __future__ import annotations
 import re
 def strip_think_block(text: str) -> str:
@@ -28,3 +46,81 @@ def extract_json_from_response(raw: str) -> str:
         return brace_match.group(0).strip()
     return cleaned.strip()

 from __future__ import annotations
+import json
 import re
+from itertools import zip_longest
+_PREFERRED_TEXT_KEYS = (
+    "chunk_id",
+    "doc_id",
+    "value",
+    "text",
+    "title",
+    "label",
+    "name",
+    "id",
+    "where",
+    "quote",
+    "claim",
+    "section",
+)
 def strip_think_block(text: str) -> str:
         return brace_match.group(0).strip()
     return cleaned.strip()
+def ensure_list(value):
+    """Return *value* as a list while preserving existing lists."""
+    if value is None:
+        return []
+    if isinstance(value, list):
+        return value
+    if isinstance(value, (tuple, set)):
+        return list(value)
+    return [value]
+def flatten_string_values(value) -> list[str]:
+    """Flatten nested scalars/collections into a list of non-empty strings."""
+    values: list[str] = []
+    def _walk(item) -> None:
+        if item is None:
+            return
+        if isinstance(item, dict):
+            for key in _PREFERRED_TEXT_KEYS:
+                if key in item and item[key] not in (None, ""):
+                    _walk(item[key])
+                    return
+            dumped = json.dumps(item, ensure_ascii=False, sort_keys=True).strip()
+            if dumped:
+                values.append(dumped)
+            return
+        if isinstance(item, (list, tuple, set)):
+            for part in item:
+                _walk(part)
+            return
+        text = str(item).strip()
+        if text:
+            values.append(text)
+    _walk(value)
+    return values
+def coerce_string(value, default: str = "") -> str:
+    """Normalize mixed scalar/list inputs into one printable string."""
+    parts = flatten_string_values(value)
+    return ", ".join(parts) if parts else default
+def coerce_string_list(value) -> list[str]:
+    """Normalize mixed scalar/list inputs into a deduplicated string list."""
+    seen: set[str] = set()
+    normalized: list[str] = []
+    for item in flatten_string_values(value):
+        if item in seen:
+            continue
+        seen.add(item)
+        normalized.append(item)
+    return normalized
+def pair_string_lists(left, right) -> list[tuple[str, str]]:
+    """Broadcast or zip mixed scalar/list inputs into string pairs."""
+    left_items = coerce_string_list(left)
+    right_items = coerce_string_list(right)
+    if not left_items and not right_items:
+        return []
+    if not left_items:
+        left_items = [""]
+    if not right_items:
+        right_items = [""]
+    if len(left_items) == 1 and len(right_items) > 1:
+        return [(left_items[0], item) for item in right_items]
+    if len(right_items) == 1 and len(left_items) > 1:
+        return [(item, right_items[0]) for item in left_items]
+    return list(zip_longest(left_items, right_items, fillvalue=""))

mp1/test_merge.py CHANGED Viewed

@@ -9,7 +9,7 @@ from pluto.models import (
     Synthesis,
 )
 from pluto.stages import merge as merge_stage
-from pluto.stages.merge import run_merge
 from pluto.tracer import Tracer
@@ -78,3 +78,117 @@ def test_merge_synthesizes_outline_when_model_returns_only_key_claims(monkeypatc
         for section in result.synthesis.answer_outline
         for point in section.points
     )

     Synthesis,
 )
 from pluto.stages import merge as merge_stage
+from pluto.stages.merge import _parse_merge, run_merge
 from pluto.tracer import Tracer
         for section in result.synthesis.answer_outline
         for point in section.points
     )
+def test_parse_merge_normalizes_scalar_doc_and_multi_chunk_evidence():
+    raw = """
+    {
+      "answer_outline": [
+        {
+          "section": "Overview",
+          "points": "The method uses evidence from multiple chunks."
+        }
+      ],
+      "key_claims": [
+        {
+          "claim": "The method is supported across several chunks.",
+          "support": "supported",
+          "evidence_doc_ids": "paper_a",
+          "evidence_chunk_ids": [["C18", "C46", "C81"]]
+        }
+      ],
+      "open_gaps": []
+    }
+    """
+    out = _parse_merge(raw)
+    assert out.synthesis.answer_outline[0].points == ["The method uses evidence from multiple chunks."]
+    refs = out.synthesis.key_claims[0].evidence_refs
+    assert len(refs) == 3
+    assert [ref.doc_id for ref in refs] == ["paper_a", "paper_a", "paper_a"]
+    assert [ref.chunk_id for ref in refs] == ["C18", "C46", "C81"]
+def test_merge_detailed_mode_produces_richer_answer_structure(monkeypatch):
+    raw_merge = """
+    {
+      "answer_outline": [
+        {
+          "section": "Overview",
+          "points": [
+            "The paper introduces a multi-agent defense coordinator.",
+            "The system reports strong defended-scenario performance."
+          ]
+        }
+      ],
+      "key_claims": [
+        {
+          "claim": "The paper introduces a multi-agent defense coordinator for prompt-injection mitigation.",
+          "support": "supported",
+          "evidence_doc_ids": ["multi_agent"],
+          "evidence_chunk_ids": ["C1"]
+        },
+        {
+          "claim": "The evaluation reports 0% ASR across defended scenarios.",
+          "support": "supported",
+          "evidence_doc_ids": ["multi_agent"],
+          "evidence_chunk_ids": ["C2"]
+        },
+        {
+          "claim": "The method routes adversarial prompts through a defense worker.",
+          "support": "supported",
+          "evidence_doc_ids": ["multi_agent"],
+          "evidence_chunk_ids": ["C3"]
+        },
+        {
+          "claim": "The architecture includes a recovery worker for post-attack repair.",
+          "support": "supported",
+          "evidence_doc_ids": ["multi_agent"],
+          "evidence_chunk_ids": ["C4"]
+        },
+        {
+          "claim": "The paper discusses limitations and future work for the coordinator pipeline.",
+          "support": "supported",
+          "evidence_doc_ids": ["multi_agent"],
+          "evidence_chunk_ids": ["C5"]
+        },
+        {
+          "claim": "The benchmark comparison highlights gains over baselines.",
+          "support": "supported",
+          "evidence_doc_ids": ["multi_agent"],
+          "evidence_chunk_ids": ["C6"]
+        }
+      ],
+      "open_gaps": []
+    }
+    """
+    monkeypatch.setattr(merge_stage, "dispatch", lambda *args, **kwargs: raw_merge)
+    extraction = ExtractOutput(
+        doc_id="multi_agent",
+        chunk_id="C1",
+        chunk_type=ChunkType.TEXT,
+        mode_used=ModeName.MODE_REASONING,
+        extracted=ExtractedContent(
+            claims=[
+                Claim(
+                    claim_id="cl1",
+                    text="The paper introduces a multi-agent defense coordinator for prompt-injection mitigation.",
+                    importance=Importance.HIGH,
+                    evidence=Evidence(doc_id="multi_agent", chunk_id="C1", where="overview", quote="multi-agent defense coordinator"),
+                )
+            ],
+            chunk_summary="Coordinator overview and results.",
+        ),
+    )
+    standard = run_merge("Summarize the paper.", [extraction], Tracer(), detail_level="standard")
+    detailed = run_merge("Summarize the paper.", [extraction], Tracer(), detail_level="detailed")
+    standard_points = sum(len(section.points) for section in standard.synthesis.answer_outline)
+    detailed_points = sum(len(section.points) for section in detailed.synthesis.answer_outline)
+    assert len(detailed.synthesis.answer_outline) >= len(standard.synthesis.answer_outline)
+    assert detailed_points > standard_points

mp1/test_schema.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from pluto.models import Evidence, FinalEvidence, SectionPoint, Verification
+def test_schema_coerces_mixed_scalar_and_list_inputs():
+    evidence = Evidence(
+        doc_id=["paper_a"],
+        chunk_id=["C1", "C2"],
+        where={"text": "results"},
+        quote=["alpha", "beta"],
+    )
+    assert evidence.doc_id == "paper_a"
+    assert evidence.chunk_id == "C1, C2"
+    assert evidence.where == "results"
+    assert evidence.quote == "alpha, beta"
+    final_evidence = FinalEvidence(
+        doc_id="paper_a",
+        chunk_id=["C4", "C5"],
+        where=["method"],
+        supports=["Main claim"],
+        quote=["quoted", "support"],
+    )
+    assert final_evidence.chunk_id == "C4, C5"
+    assert final_evidence.where == "method"
+    assert final_evidence.supports == "Main claim"
+    assert final_evidence.quote == "quoted, support"
+def test_schema_coerces_outline_and_followup_lists():
+    section = SectionPoint(section=["Overview"], points="Single normalized point")
+    verification = Verification(
+        unsupported_claims="Missing metric support",
+        required_followups={"text": "Where is the metric reported?"},
+    )
+    assert section.section == "Overview"
+    assert section.points == ["Single normalized point"]
+    assert verification.unsupported_claims == ["Missing metric support"]
+    assert verification.required_followups == ["Where is the metric reported?"]

mp1/test_server.py CHANGED Viewed

@@ -189,3 +189,27 @@ def test_stream_progress_serializes_pydantic_payloads(monkeypatch):
     payload = json.loads(body.removeprefix("data: ").strip())
     assert payload["payload"]["plan"][0]["doc_id"] == "paper"
     assert payload["payload"]["plan"][0]["chunk_type"] == "text"

     payload = json.loads(body.removeprefix("data: ").strip())
     assert payload["payload"]["plan"][0]["doc_id"] == "paper"
     assert payload["payload"]["plan"][0]["chunk_type"] == "text"
+def test_server_cache_stats_route_returns_json(monkeypatch):
+    class FakeCache:
+        def stats(self):
+            return {"hits": 7, "misses": 3, "entries": 10}
+    monkeypatch.setattr(server, "_extraction_cache", FakeCache())
+    client = TestClient(server.app)
+    response = client.get("/api/cache/stats")
+    assert response.status_code == 200
+    assert response.json() == {"hits": 7, "misses": 3, "entries": 10}
+def test_server_result_route_returns_404_when_empty(monkeypatch):
+    monkeypatch.setattr(server, "_latest_result", None)
+    client = TestClient(server.app)
+    response = client.get("/api/result")
+    assert response.status_code == 404
+    assert response.json()["error"] == "No result yet"

mp1/test_verify.py CHANGED Viewed

@@ -49,6 +49,31 @@ def test_parse_verify_dump():
     assert out.verification.required_followups == ["Upload the appendix for dataset details."]
 def test_verify_directly_supports_matching_claim_without_dispatch(monkeypatch):
     def fail_dispatch(*args, **kwargs):
         raise AssertionError("dispatch should not be called for an obvious direct evidence match")

     assert out.verification.required_followups == ["Upload the appendix for dataset details."]
+def test_parse_verify_handles_multi_chunk_evidence_ids():
+    raw = """
+    {
+      "checked_claims": [
+        {
+          "claim": "The results are supported across multiple chunks.",
+          "status": "supported",
+          "evidence_doc_id": "paper_a",
+          "evidence_chunk_id": ["C18", "C46", "C81"],
+          "quote": "results are supported"
+        }
+      ],
+      "unsupported_claims": [],
+      "required_followups": []
+    }
+    """
+    out = _parse_verify(raw)
+    evidence = out.verification.checked_claims[0].evidence
+    assert len(evidence) == 3
+    assert [item.doc_id for item in evidence] == ["paper_a", "paper_a", "paper_a"]
+    assert [item.chunk_id for item in evidence] == ["C18", "C46", "C81"]
 def test_verify_directly_supports_matching_claim_without_dispatch(monkeypatch):
     def fail_dispatch(*args, **kwargs):
         raise AssertionError("dispatch should not be called for an obvious direct evidence match")