3v324v23's picture
Add application file
66ad25b
from pluto.stages.route import run_route
from pluto.tracer import Tracer
class _FakeTools:
def __init__(self):
self.doc_index = None
self._chunks = {
"doc_a": [
"Introduction to the system architecture and design.",
"Method details for the proposed coordinator pipeline.",
"Results show 0% ASR across defended scenarios.",
"Additional background information with little relevance.",
"Conclusion and limitations of the coordinator pipeline.",
"Appendix references and citations.",
],
"doc_b": [
"Dataset description and benchmark setup.",
"Performance comparison against baselines.",
"More benchmark discussion and analysis.",
"Implementation details and ablations.",
"Future work and limitations.",
"References section.",
],
}
def list_docs(self):
return [{"doc_id": doc_id, "filename": f"{doc_id}.md"} for doc_id in self._chunks]
def search(self, query, filters=None):
del query
results = [{"doc_id": "doc_a", "score": 0.9}, {"doc_id": "doc_b", "score": 0.7}]
if filters and filters.get("doc_ids"):
allowed = set(filters["doc_ids"])
results = [item for item in results if item["doc_id"] in allowed]
return results
def get_all_chunks(self, doc_id):
return self._chunks[doc_id]
def test_route_limits_extractions_to_relevant_subset():
tools = _FakeTools()
tracer = Tracer()
route_out = run_route("coordinator pipeline architecture results", tools, tracer)
total_chunks = sum(len(chunks) for chunks in tools._chunks.values())
assert len(route_out.chunk_plan) < total_chunks
assert len(route_out.chunk_plan) <= route_out.budgets.max_extractions
assert len(route_out.chunk_plan) <= 12
assert any(chunk.doc_id == "doc_a" for chunk in route_out.chunk_plan)
assert any(chunk.doc_id == "doc_b" for chunk in route_out.chunk_plan)
def test_route_prefers_directly_named_document():
tools = _FakeTools()
tracer = Tracer()
route_out = run_route("Summarize doc_a and its architecture contributions", tools, tracer)
assert {doc.doc_id for doc in route_out.doc_scope} == {"doc_a"}
def test_route_respects_selected_doc_scope():
tools = _FakeTools()
tracer = Tracer()
route_out = run_route(
"Compare methodology and results",
tools,
tracer,
selected_doc_ids=["doc_b"],
)
assert [doc.doc_id for doc in route_out.doc_scope] == ["doc_b"]
assert all(chunk.doc_id == "doc_b" for chunk in route_out.chunk_plan)
def test_route_detailed_mode_expands_chunk_budget():
tools = _FakeTools()
tracer = Tracer()
standard = run_route("Compare methodology and results", tools, tracer, selected_doc_ids=["doc_a", "doc_b"])
detailed = run_route(
"Compare methodology and results",
tools,
tracer,
selected_doc_ids=["doc_a", "doc_b"],
detail_level="detailed",
)
assert len(detailed.chunk_plan) >= len(standard.chunk_plan)