File size: 523 Bytes
0a62245
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from pluto.tools import strip_non_extractable_context


def test_strip_non_extractable_context_removes_extraction_metadata_prefixes():
    chunk = (
        "[Context | doc:attention | chunk:C0 | section:introduction]\n"
        "[Document context: Attention Is All You Need | Domain: ML]\n\n"
        "The Transformer is a model architecture based on attention mechanisms."
    )

    assert strip_non_extractable_context(chunk) == (
        "The Transformer is a model architecture based on attention mechanisms."
    )