| import json |
| from spooler.processor import _extract_text_from_content, _process_entry |
| from spooler.redaction import redact |
| from spooler.noise_filter import is_noise |
| from spooler.store import _derive_display_title |
|
|
| def test_extract_text_from_content(): |
| |
| assert _extract_text_from_content("simple text") == "simple text" |
| |
| |
| blocks = [ |
| {"type": "text", "text": "block 1"}, |
| {"type": "thinking", "thinking": "thinking text"}, |
| {"type": "resource", "resource": "res data"}, |
| {"type": "image", "source": "ignore this"} |
| ] |
| extracted = _extract_text_from_content(blocks) |
| assert "block 1" in extracted |
| assert "thinking text" in extracted |
| assert "res data" in extracted |
| assert "ignore this" not in extracted |
|
|
| def test_process_entry(sample_transcript_lines): |
| |
| user_entry = json.loads(sample_transcript_lines[1]) |
| row = _process_entry(user_entry, "sess_1", "agent_1", 1) |
| assert row is not None |
| assert row["role"] == "user" |
| assert row["clean_text"] == "Hello, can you help me?" |
| assert row["entry_idx"] == 1 |
| |
| |
| tool_entry = json.loads(sample_transcript_lines[3]) |
| row2 = _process_entry(tool_entry, "sess_1", "agent_1", 3) |
| assert row2 is not None |
| assert row2["role"] == "toolResult" |
| assert row2["tool_name"] == "read_file" |
| assert "file contents" in row2["clean_text"] |
|
|
| def test_redaction(): |
| text = "Here is my key: api_key='sk-1234567890abcdef' inside a sentence." |
| redacted = redact(text) |
| assert "sk-1234567890abcdef" not in redacted |
| assert "[REDACTED]" in redacted |
| |
| def test_noise_filter(): |
| assert is_noise("ENOENT: no such file") == True |
| assert is_noise("no output") == True |
| assert is_noise("command exited with code 1") == True |
| assert is_noise("Valid output from a tool") == False |
| assert is_noise("x\n") == True |
| assert is_noise("") == True |
|
|
|
|
| def test_derive_display_title_prefers_conversation_text(): |
| activity = [ |
| {"role": "system", "clean_text": "ignore system scaffolding"}, |
| {"role": "user", "clean_text": "Can you inspect the Hermes agent update and dashboard timeout?"}, |
| ] |
|
|
| assert _derive_display_title("548670c7-e187-4960-a714-1f8e70957060", activity) == ( |
| "Can you inspect the Hermes agent update and dashboard timeout?" |
| ) |
|
|
|
|
| def test_derive_display_title_skips_scaffold_context(): |
| activity = [ |
| {"role": "user", "clean_text": "Conversation info (untrusted metadata): long envelope"}, |
| {"role": "assistant", "clean_text": "Dashboard harvest is complete."}, |
| ] |
|
|
| assert _derive_display_title("548670c7-e187-4960-a714-1f8e70957060", activity) == ( |
| "Dashboard harvest is complete." |
| ) |
|
|