from pluto.tools import strip_non_extractable_context def test_strip_non_extractable_context_removes_extraction_metadata_prefixes(): chunk = ( "[Context | doc:attention | chunk:C0 | section:introduction]\n" "[Document context: Attention Is All You Need | Domain: ML]\n\n" "The Transformer is a model architecture based on attention mechanisms." ) assert strip_non_extractable_context(chunk) == ( "The Transformer is a model architecture based on attention mechanisms." )