import json from spooler.processor import _extract_text_from_content, _process_entry from spooler.redaction import redact from spooler.noise_filter import is_noise from spooler.store import _derive_display_title def test_extract_text_from_content(): # String content assert _extract_text_from_content("simple text") == "simple text" # List of blocks blocks = [ {"type": "text", "text": "block 1"}, {"type": "thinking", "thinking": "thinking text"}, {"type": "resource", "resource": "res data"}, {"type": "image", "source": "ignore this"} ] extracted = _extract_text_from_content(blocks) assert "block 1" in extracted assert "thinking text" in extracted assert "res data" in extracted assert "ignore this" not in extracted def test_process_entry(sample_transcript_lines): # Parse the user message user_entry = json.loads(sample_transcript_lines[1]) row = _process_entry(user_entry, "sess_1", "agent_1", 1) assert row is not None assert row["role"] == "user" assert row["clean_text"] == "Hello, can you help me?" assert row["entry_idx"] == 1 # Parse tool result tool_entry = json.loads(sample_transcript_lines[3]) row2 = _process_entry(tool_entry, "sess_1", "agent_1", 3) assert row2 is not None assert row2["role"] == "toolResult" assert row2["tool_name"] == "read_file" assert "file contents" in row2["clean_text"] def test_redaction(): text = "Here is my key: api_key='sk-1234567890abcdef' inside a sentence." redacted = redact(text) assert "sk-1234567890abcdef" not in redacted assert "[REDACTED]" in redacted def test_noise_filter(): assert is_noise("ENOENT: no such file") == True assert is_noise("no output") == True assert is_noise("command exited with code 1") == True assert is_noise("Valid output from a tool") == False assert is_noise("x\n") == True # Too short assert is_noise("") == True def test_derive_display_title_prefers_conversation_text(): activity = [ {"role": "system", "clean_text": "ignore system scaffolding"}, {"role": "user", "clean_text": "Can you inspect the Hermes agent update and dashboard timeout?"}, ] assert _derive_display_title("548670c7-e187-4960-a714-1f8e70957060", activity) == ( "Can you inspect the Hermes agent update and dashboard timeout?" ) def test_derive_display_title_skips_scaffold_context(): activity = [ {"role": "user", "clean_text": "Conversation info (untrusted metadata): long envelope"}, {"role": "assistant", "clean_text": "Dashboard harvest is complete."}, ] assert _derive_display_title("548670c7-e187-4960-a714-1f8e70957060", activity) == ( "Dashboard harvest is complete." )