File size: 2,809 Bytes
63c75d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import json
from spooler.processor import _extract_text_from_content, _process_entry
from spooler.redaction import redact
from spooler.noise_filter import is_noise
from spooler.store import _derive_display_title

def test_extract_text_from_content():
    # String content
    assert _extract_text_from_content("simple text") == "simple text"
    
    # List of blocks
    blocks = [
        {"type": "text", "text": "block 1"},
        {"type": "thinking", "thinking": "thinking text"},
        {"type": "resource", "resource": "res data"},
        {"type": "image", "source": "ignore this"}
    ]
    extracted = _extract_text_from_content(blocks)
    assert "block 1" in extracted
    assert "thinking text" in extracted
    assert "res data" in extracted
    assert "ignore this" not in extracted

def test_process_entry(sample_transcript_lines):
    # Parse the user message
    user_entry = json.loads(sample_transcript_lines[1])
    row = _process_entry(user_entry, "sess_1", "agent_1", 1)
    assert row is not None
    assert row["role"] == "user"
    assert row["clean_text"] == "Hello, can you help me?"
    assert row["entry_idx"] == 1
    
    # Parse tool result
    tool_entry = json.loads(sample_transcript_lines[3])
    row2 = _process_entry(tool_entry, "sess_1", "agent_1", 3)
    assert row2 is not None
    assert row2["role"] == "toolResult"
    assert row2["tool_name"] == "read_file"
    assert "file contents" in row2["clean_text"]

def test_redaction():
    text = "Here is my key: api_key='sk-1234567890abcdef' inside a sentence."
    redacted = redact(text)
    assert "sk-1234567890abcdef" not in redacted
    assert "[REDACTED]" in redacted
    
def test_noise_filter():
    assert is_noise("ENOENT: no such file") == True
    assert is_noise("no output") == True
    assert is_noise("command exited with code 1") == True
    assert is_noise("Valid output from a tool") == False
    assert is_noise("x\n") == True  # Too short
    assert is_noise("") == True


def test_derive_display_title_prefers_conversation_text():
    activity = [
        {"role": "system", "clean_text": "ignore system scaffolding"},
        {"role": "user", "clean_text": "Can you inspect the Hermes agent update and dashboard timeout?"},
    ]

    assert _derive_display_title("548670c7-e187-4960-a714-1f8e70957060", activity) == (
        "Can you inspect the Hermes agent update and dashboard timeout?"
    )


def test_derive_display_title_skips_scaffold_context():
    activity = [
        {"role": "user", "clean_text": "Conversation info (untrusted metadata): long envelope"},
        {"role": "assistant", "clean_text": "Dashboard harvest is complete."},
    ]

    assert _derive_display_title("548670c7-e187-4960-a714-1f8e70957060", activity) == (
        "Dashboard harvest is complete."
    )