Spaces:
Running
Running
| """Tests for the parser module. | |
| Tests all parsing and analysis functions: | |
| - compute_hash: Content hashing | |
| - detect_waste_signals: Waste signal detection | |
| - is_rag_content: RAG content detection | |
| - parse_message_to_blocks: Single message parsing | |
| - parse_messages: Multi-message parsing | |
| - find_tool_units: Tool call/response pairing | |
| - get_message_content_text: Content extraction | |
| """ | |
| from unittest.mock import Mock | |
| import pytest | |
| from headroom.parser import ( | |
| compute_hash, | |
| detect_waste_signals, | |
| find_tool_units, | |
| get_message_content_text, | |
| is_rag_content, | |
| parse_message_to_blocks, | |
| parse_messages, | |
| ) | |
| # --- Fixtures --- | |
| def mock_tokenizer(): | |
| """Mock tokenizer that returns predictable token counts.""" | |
| tokenizer = Mock() | |
| # Simple mock: 1 token per 4 characters | |
| tokenizer.count_text = Mock(side_effect=lambda text: len(text) // 4 + 1) | |
| return tokenizer | |
| def system_message(): | |
| """Basic system message.""" | |
| return {"role": "system", "content": "You are a helpful assistant."} | |
| def user_message(): | |
| """Basic user message.""" | |
| return {"role": "user", "content": "Hello, how are you?"} | |
| def assistant_message(): | |
| """Basic assistant message.""" | |
| return {"role": "assistant", "content": "I'm doing well, thank you!"} | |
| def tool_call_message(): | |
| """Assistant message with tool calls.""" | |
| return { | |
| "role": "assistant", | |
| "content": None, | |
| "tool_calls": [ | |
| { | |
| "id": "call_abc123", | |
| "type": "function", | |
| "function": {"name": "search_user", "arguments": '{"user_id": "12345"}'}, | |
| } | |
| ], | |
| } | |
| def tool_result_message(): | |
| """Tool result message.""" | |
| return { | |
| "role": "tool", | |
| "tool_call_id": "call_abc123", | |
| "content": '{"id": "12345", "name": "Alice", "email": "alice@example.com"}', | |
| } | |
| def multimodal_message(): | |
| """User message with multimodal content (list format).""" | |
| return { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": "Analyze this image:"}, | |
| {"type": "image", "source": {"type": "base64", "data": "..."}}, | |
| {"type": "text", "text": "What do you see?"}, | |
| ], | |
| } | |
| def rag_user_message(): | |
| """User message containing RAG content markers.""" | |
| return { | |
| "role": "user", | |
| "content": "[Document 1] Here is the relevant context from our knowledge base. [Source: docs/manual.md]", | |
| } | |
| def html_waste_text(): | |
| """Text containing HTML noise.""" | |
| return "<div class='container'><p>Hello</p><!-- comment --></div>" | |
| def base64_waste_text(): | |
| """Text containing base64 encoded data.""" | |
| return "Data: " + "A" * 60 + "==" | |
| def whitespace_waste_text(): | |
| """Text with excessive whitespace.""" | |
| return "Line 1\n\n\n\nLine 2 extra spaces" | |
| def json_bloat_text(): | |
| """Text containing large JSON block (>500 chars). | |
| Uses spaces and punctuation to avoid base64 pattern matching. | |
| """ | |
| # Use content that won't match base64 pattern (needs non-base64 chars) | |
| content = "This is a long text value. " * 25 # ~675 chars | |
| return '{"data": "' + content + '"}' | |
| # --- TestComputeHash --- | |
| class TestComputeHash: | |
| """Tests for compute_hash function.""" | |
| def test_consistent_hash(self): | |
| """Same text produces same hash.""" | |
| text = "Hello, world!" | |
| hash1 = compute_hash(text) | |
| hash2 = compute_hash(text) | |
| assert hash1 == hash2 | |
| def test_different_texts_different_hashes(self): | |
| """Different texts produce different hashes.""" | |
| hash1 = compute_hash("Hello") | |
| hash2 = compute_hash("World") | |
| assert hash1 != hash2 | |
| def test_hash_length_16(self): | |
| """Hash is truncated to 16 characters.""" | |
| text = "Any text content" | |
| hash_result = compute_hash(text) | |
| assert len(hash_result) == 16 | |
| def test_empty_string_hash(self): | |
| """Empty string produces valid hash.""" | |
| hash_result = compute_hash("") | |
| assert len(hash_result) == 16 | |
| assert hash_result.isalnum() | |
| def test_unicode_text_hash(self): | |
| """Unicode text produces valid hash.""" | |
| hash_result = compute_hash("Hello \\u4e16\\u754c") | |
| assert len(hash_result) == 16 | |
| # --- TestDetectWasteSignals --- | |
| class TestDetectWasteSignals: | |
| """Tests for detect_waste_signals function.""" | |
| def test_detect_html_tags(self, mock_tokenizer, html_waste_text): | |
| """Detects HTML tags as waste.""" | |
| signals = detect_waste_signals(html_waste_text, mock_tokenizer) | |
| assert signals.html_noise_tokens > 0 | |
| def test_detect_html_comments(self, mock_tokenizer): | |
| """Detects HTML comments as waste.""" | |
| text = "Some text <!-- this is a comment --> more text" | |
| signals = detect_waste_signals(text, mock_tokenizer) | |
| assert signals.html_noise_tokens > 0 | |
| def test_detect_base64(self, mock_tokenizer, base64_waste_text): | |
| """Detects base64 encoded content as waste.""" | |
| signals = detect_waste_signals(base64_waste_text, mock_tokenizer) | |
| assert signals.base64_tokens > 0 | |
| def test_detect_excessive_whitespace(self, mock_tokenizer, whitespace_waste_text): | |
| """Detects excessive whitespace as waste.""" | |
| signals = detect_waste_signals(whitespace_waste_text, mock_tokenizer) | |
| assert signals.whitespace_tokens >= 0 # May be 0 if normalized tokens <= matches | |
| def test_detect_json_bloat(self, mock_tokenizer, json_bloat_text): | |
| """Detects large JSON blocks as bloat.""" | |
| # Need to ensure the mock returns >500 tokens for JSON bloat | |
| # The JSON pattern requires the matched block to have >500 tokens | |
| mock_tokenizer.count_text = Mock(side_effect=lambda text: len(text)) | |
| signals = detect_waste_signals(json_bloat_text, mock_tokenizer) | |
| assert signals.json_bloat_tokens > 0 | |
| def test_empty_text_no_waste(self, mock_tokenizer): | |
| """Empty text returns zero waste signals.""" | |
| signals = detect_waste_signals("", mock_tokenizer) | |
| assert signals.total() == 0 | |
| def test_combined_waste_signals(self, mock_tokenizer): | |
| """Multiple waste types are detected together.""" | |
| text = "<div>Hello</div> " + "B" * 60 + "== and <!-- comment -->" | |
| signals = detect_waste_signals(text, mock_tokenizer) | |
| assert signals.html_noise_tokens > 0 | |
| assert signals.base64_tokens > 0 | |
| def test_clean_text_no_waste(self, mock_tokenizer): | |
| """Clean text produces minimal waste signals.""" | |
| text = "This is a normal sentence without any waste." | |
| signals = detect_waste_signals(text, mock_tokenizer) | |
| assert signals.html_noise_tokens == 0 | |
| assert signals.base64_tokens == 0 | |
| assert signals.json_bloat_tokens == 0 | |
| # --- TestIsRagContent --- | |
| class TestIsRagContent: | |
| """Tests for is_rag_content function.""" | |
| def test_document_markers(self): | |
| """Detects [Document N] markers.""" | |
| text = "[Document 1] This is the first document. [Document 2] Second document." | |
| assert is_rag_content(text) is True | |
| def test_source_markers(self): | |
| """Detects [Source: ...] markers.""" | |
| text = "[Source: knowledge_base/docs.md] Here is the information." | |
| assert is_rag_content(text) is True | |
| def test_context_tags(self): | |
| """Detects <context> and <document> tags.""" | |
| assert is_rag_content("<context>Retrieved content here</context>") is True | |
| assert is_rag_content("<document>Document content</document>") is True | |
| def test_retrieved_from_marker(self): | |
| """Detects 'Retrieved from:' marker.""" | |
| text = "Retrieved from: https://example.com/docs\nHere is the content." | |
| assert is_rag_content(text) is True | |
| def test_knowledge_base_marker(self): | |
| """Detects 'From the knowledge base:' marker.""" | |
| text = "From the knowledge base: This is relevant information." | |
| assert is_rag_content(text) is True | |
| def test_not_rag_content(self): | |
| """Regular text is not detected as RAG content.""" | |
| text = "Hello, how can I help you today?" | |
| assert is_rag_content(text) is False | |
| def test_case_insensitive(self): | |
| """RAG detection is case insensitive.""" | |
| assert is_rag_content("[DOCUMENT 1] Content") is True | |
| assert is_rag_content("retrieved FROM: somewhere") is True | |
| # --- TestParseMessageToBlocks --- | |
| class TestParseMessageToBlocks: | |
| """Tests for parse_message_to_blocks function.""" | |
| def test_system_message_block(self, mock_tokenizer, system_message): | |
| """System message creates system block.""" | |
| blocks = parse_message_to_blocks(system_message, 0, mock_tokenizer) | |
| assert len(blocks) == 1 | |
| assert blocks[0].kind == "system" | |
| assert blocks[0].text == "You are a helpful assistant." | |
| assert blocks[0].source_index == 0 | |
| def test_user_message_block(self, mock_tokenizer, user_message): | |
| """User message creates user block.""" | |
| blocks = parse_message_to_blocks(user_message, 1, mock_tokenizer) | |
| assert len(blocks) == 1 | |
| assert blocks[0].kind == "user" | |
| assert blocks[0].text == "Hello, how are you?" | |
| assert blocks[0].source_index == 1 | |
| def test_assistant_message_block(self, mock_tokenizer, assistant_message): | |
| """Assistant message creates assistant block.""" | |
| blocks = parse_message_to_blocks(assistant_message, 2, mock_tokenizer) | |
| assert len(blocks) == 1 | |
| assert blocks[0].kind == "assistant" | |
| assert blocks[0].text == "I'm doing well, thank you!" | |
| def test_tool_result_block(self, mock_tokenizer, tool_result_message): | |
| """Tool result creates tool_result block with tool_call_id.""" | |
| blocks = parse_message_to_blocks(tool_result_message, 3, mock_tokenizer) | |
| assert len(blocks) == 1 | |
| assert blocks[0].kind == "tool_result" | |
| assert blocks[0].flags.get("tool_call_id") == "call_abc123" | |
| def test_rag_detection_in_user_message(self, mock_tokenizer, rag_user_message): | |
| """User message with RAG markers creates rag block.""" | |
| blocks = parse_message_to_blocks(rag_user_message, 0, mock_tokenizer) | |
| assert len(blocks) == 1 | |
| assert blocks[0].kind == "rag" | |
| def test_multimodal_content(self, mock_tokenizer, multimodal_message): | |
| """Multimodal content (list with text parts) is extracted.""" | |
| blocks = parse_message_to_blocks(multimodal_message, 0, mock_tokenizer) | |
| assert len(blocks) == 1 | |
| assert "Analyze this image:" in blocks[0].text | |
| assert "What do you see?" in blocks[0].text | |
| def test_tool_calls_create_separate_blocks(self, mock_tokenizer, tool_call_message): | |
| """Tool calls create separate tool_call blocks.""" | |
| blocks = parse_message_to_blocks(tool_call_message, 0, mock_tokenizer) | |
| # Should have tool_call blocks (no content block since content is None) | |
| tool_call_blocks = [b for b in blocks if b.kind == "tool_call"] | |
| assert len(tool_call_blocks) == 1 | |
| assert tool_call_blocks[0].flags.get("tool_call_id") == "call_abc123" | |
| assert tool_call_blocks[0].flags.get("function_name") == "search_user" | |
| assert "search_user" in tool_call_blocks[0].text | |
| def test_empty_message_creates_block(self, mock_tokenizer): | |
| """Empty message (no content or tool_calls) creates minimal block.""" | |
| empty_msg = {"role": "assistant"} | |
| blocks = parse_message_to_blocks(empty_msg, 0, mock_tokenizer) | |
| assert len(blocks) == 1 | |
| assert blocks[0].kind == "unknown" | |
| assert blocks[0].text == "" | |
| def test_message_with_content_and_tool_calls(self, mock_tokenizer): | |
| """Message with both content and tool_calls creates multiple blocks.""" | |
| msg = { | |
| "role": "assistant", | |
| "content": "Let me search for that.", | |
| "tool_calls": [{"id": "call_xyz", "function": {"name": "search", "arguments": "{}"}}], | |
| } | |
| blocks = parse_message_to_blocks(msg, 0, mock_tokenizer) | |
| kinds = [b.kind for b in blocks] | |
| assert "assistant" in kinds | |
| assert "tool_call" in kinds | |
| def test_waste_signals_in_flags(self, mock_tokenizer, html_waste_text): | |
| """Waste signals are added to block flags.""" | |
| msg = {"role": "user", "content": html_waste_text} | |
| blocks = parse_message_to_blocks(msg, 0, mock_tokenizer) | |
| assert "waste_signals" in blocks[0].flags | |
| assert blocks[0].flags["waste_signals"]["html_noise"] > 0 | |
| def test_content_hash_generated(self, mock_tokenizer, user_message): | |
| """Content hash is generated for blocks.""" | |
| blocks = parse_message_to_blocks(user_message, 0, mock_tokenizer) | |
| assert len(blocks[0].content_hash) == 16 | |
| def test_tokens_estimated(self, mock_tokenizer, user_message): | |
| """Token count is estimated.""" | |
| blocks = parse_message_to_blocks(user_message, 0, mock_tokenizer) | |
| assert blocks[0].tokens_est > 0 | |
| # --- TestParseMessages --- | |
| class TestParseMessages: | |
| """Tests for parse_messages function.""" | |
| def test_parse_all_messages(self, mock_tokenizer, sample_messages): | |
| """All messages are parsed into blocks.""" | |
| blocks, breakdown, waste = parse_messages(sample_messages, mock_tokenizer) | |
| assert len(blocks) >= len(sample_messages) | |
| def test_block_breakdown(self, mock_tokenizer, sample_messages): | |
| """Block breakdown counts tokens per kind.""" | |
| blocks, breakdown, waste = parse_messages(sample_messages, mock_tokenizer) | |
| assert "system" in breakdown | |
| assert "user" in breakdown | |
| assert "assistant" in breakdown | |
| assert all(v > 0 for v in breakdown.values()) | |
| def test_waste_signals_accumulated(self, mock_tokenizer): | |
| """Waste signals are accumulated across messages.""" | |
| messages = [ | |
| {"role": "user", "content": "<div>HTML here</div>"}, | |
| {"role": "assistant", "content": "More <span>HTML</span>"}, | |
| ] | |
| blocks, breakdown, waste = parse_messages(messages, mock_tokenizer) | |
| assert waste.html_noise_tokens > 0 | |
| def test_empty_messages(self, mock_tokenizer): | |
| """Empty message list returns empty results.""" | |
| blocks, breakdown, waste = parse_messages([], mock_tokenizer) | |
| assert blocks == [] | |
| assert breakdown == {} | |
| assert waste.total() == 0 | |
| def test_multiple_tool_calls_parsed(self, mock_tokenizer, sample_messages_with_tools): | |
| """Messages with tool calls are parsed correctly.""" | |
| blocks, breakdown, waste = parse_messages(sample_messages_with_tools, mock_tokenizer) | |
| tool_call_blocks = [b for b in blocks if b.kind == "tool_call"] | |
| tool_result_blocks = [b for b in blocks if b.kind == "tool_result"] | |
| assert len(tool_call_blocks) >= 1 | |
| assert len(tool_result_blocks) >= 1 | |
| # --- TestFindToolUnits --- | |
| class TestFindToolUnits: | |
| """Tests for find_tool_units function.""" | |
| def test_finds_tool_call_and_responses(self, sample_messages_with_tools): | |
| """Finds matching tool call and response pairs.""" | |
| units = find_tool_units(sample_messages_with_tools) | |
| assert len(units) >= 1 | |
| # Each unit is (assistant_index, [tool_response_indices]) | |
| assistant_idx, response_indices = units[0] | |
| assert response_indices # Should have at least one response | |
| def test_multiple_tool_calls_same_assistant(self): | |
| """Multiple tool calls from same assistant are grouped.""" | |
| messages = [ | |
| {"role": "system", "content": "You are helpful."}, | |
| {"role": "user", "content": "Search both"}, | |
| { | |
| "role": "assistant", | |
| "content": None, | |
| "tool_calls": [ | |
| {"id": "call_1", "function": {"name": "search", "arguments": "{}"}}, | |
| {"id": "call_2", "function": {"name": "fetch", "arguments": "{}"}}, | |
| ], | |
| }, | |
| {"role": "tool", "tool_call_id": "call_1", "content": "result 1"}, | |
| {"role": "tool", "tool_call_id": "call_2", "content": "result 2"}, | |
| ] | |
| units = find_tool_units(messages) | |
| assert len(units) == 1 | |
| assistant_idx, response_indices = units[0] | |
| assert len(response_indices) == 2 | |
| def test_no_tool_units(self): | |
| """Returns empty list when no tool calls present.""" | |
| messages = [ | |
| {"role": "system", "content": "Hello"}, | |
| {"role": "user", "content": "Hi"}, | |
| {"role": "assistant", "content": "Hello!"}, | |
| ] | |
| units = find_tool_units(messages) | |
| assert units == [] | |
| def test_orphaned_tool_response(self): | |
| """Tool response without matching assistant is not included.""" | |
| messages = [ | |
| {"role": "system", "content": "Hello"}, | |
| {"role": "user", "content": "Hi"}, | |
| # Orphaned tool response - no assistant with tool_calls | |
| {"role": "tool", "tool_call_id": "orphan_call", "content": "orphaned"}, | |
| {"role": "assistant", "content": "I don't have tools."}, | |
| ] | |
| units = find_tool_units(messages) | |
| assert units == [] | |
| def test_tool_response_order_sorted(self): | |
| """Tool response indices are sorted.""" | |
| messages = [ | |
| {"role": "user", "content": "Do two things"}, | |
| { | |
| "role": "assistant", | |
| "tool_calls": [ | |
| {"id": "call_a", "function": {"name": "first", "arguments": "{}"}}, | |
| {"id": "call_b", "function": {"name": "second", "arguments": "{}"}}, | |
| ], | |
| }, | |
| {"role": "tool", "tool_call_id": "call_b", "content": "second result"}, | |
| {"role": "tool", "tool_call_id": "call_a", "content": "first result"}, | |
| ] | |
| units = find_tool_units(messages) | |
| assert len(units) == 1 | |
| _, response_indices = units[0] | |
| assert response_indices == sorted(response_indices) | |
| def test_anthropic_format_tool_use_and_result(self): | |
| """Finds Anthropic format tool_use/tool_result pairs in content blocks.""" | |
| messages = [ | |
| {"role": "system", "content": "You are helpful."}, | |
| {"role": "user", "content": "Take a screenshot"}, | |
| { | |
| "role": "assistant", | |
| "content": [ | |
| {"type": "text", "text": "Let me take a screenshot."}, | |
| { | |
| "type": "tool_use", | |
| "id": "toolu_123", | |
| "name": "browser_screenshot", | |
| "input": {}, | |
| }, | |
| ], | |
| }, | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "tool_result", | |
| "tool_use_id": "toolu_123", | |
| "content": "Screenshot taken successfully", | |
| } | |
| ], | |
| }, | |
| {"role": "user", "content": "Thanks!"}, | |
| ] | |
| units = find_tool_units(messages) | |
| assert len(units) == 1 | |
| assistant_idx, response_indices = units[0] | |
| assert assistant_idx == 2 | |
| assert response_indices == [3] | |
| def test_anthropic_format_multiple_tool_uses(self): | |
| """Finds multiple Anthropic format tool_use blocks from same assistant.""" | |
| messages = [ | |
| {"role": "user", "content": "Do two things"}, | |
| { | |
| "role": "assistant", | |
| "content": [ | |
| {"type": "tool_use", "id": "toolu_a", "name": "first", "input": {}}, | |
| {"type": "tool_use", "id": "toolu_b", "name": "second", "input": {}}, | |
| ], | |
| }, | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "tool_result", "tool_use_id": "toolu_a", "content": "first done"}, | |
| {"type": "tool_result", "tool_use_id": "toolu_b", "content": "second done"}, | |
| ], | |
| }, | |
| ] | |
| units = find_tool_units(messages) | |
| assert len(units) == 1 | |
| assistant_idx, response_indices = units[0] | |
| assert assistant_idx == 1 | |
| assert response_indices == [2] | |
| def test_anthropic_format_orphaned_tool_result(self): | |
| """Anthropic tool_result without matching tool_use is not included.""" | |
| messages = [ | |
| {"role": "user", "content": "Hi"}, | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "tool_result", | |
| "tool_use_id": "orphan_toolu", | |
| "content": "orphaned result", | |
| } | |
| ], | |
| }, | |
| {"role": "assistant", "content": "Hello!"}, | |
| ] | |
| units = find_tool_units(messages) | |
| assert units == [] | |
| def test_mixed_openai_and_anthropic_formats(self): | |
| """Both OpenAI and Anthropic formats can coexist (edge case).""" | |
| messages = [ | |
| {"role": "user", "content": "Do things"}, | |
| # OpenAI format | |
| { | |
| "role": "assistant", | |
| "tool_calls": [ | |
| {"id": "call_1", "function": {"name": "openai_tool", "arguments": "{}"}} | |
| ], | |
| }, | |
| {"role": "tool", "tool_call_id": "call_1", "content": "openai result"}, | |
| # Anthropic format | |
| { | |
| "role": "assistant", | |
| "content": [ | |
| {"type": "tool_use", "id": "toolu_2", "name": "anthropic_tool", "input": {}} | |
| ], | |
| }, | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "tool_result", "tool_use_id": "toolu_2", "content": "anthropic result"} | |
| ], | |
| }, | |
| ] | |
| units = find_tool_units(messages) | |
| assert len(units) == 2 | |
| # First unit: OpenAI format (assistant at 1, tool response at 2) | |
| assert units[0] == (1, [2]) | |
| # Second unit: Anthropic format (assistant at 3, user with tool_result at 4) | |
| assert units[1] == (3, [4]) | |
| # --- TestGetMessageContentText --- | |
| class TestGetMessageContentText: | |
| """Tests for get_message_content_text function.""" | |
| def test_string_content(self): | |
| """Extracts string content directly.""" | |
| msg = {"role": "user", "content": "Hello, world!"} | |
| text = get_message_content_text(msg) | |
| assert text == "Hello, world!" | |
| def test_list_content(self): | |
| """Extracts text from list content (multimodal).""" | |
| msg = { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": "First part"}, | |
| {"type": "image", "source": {}}, | |
| {"type": "text", "text": "Second part"}, | |
| ], | |
| } | |
| text = get_message_content_text(msg) | |
| assert "First part" in text | |
| assert "Second part" in text | |
| def test_none_content(self): | |
| """Returns empty string for None content.""" | |
| msg = {"role": "assistant", "content": None} | |
| text = get_message_content_text(msg) | |
| assert text == "" | |
| def test_mixed_content_list(self): | |
| """Handles list with both dict and string items.""" | |
| msg = { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": "Dict text"}, | |
| "Plain string", | |
| ], | |
| } | |
| text = get_message_content_text(msg) | |
| assert "Dict text" in text | |
| assert "Plain string" in text | |
| def test_missing_content_key(self): | |
| """Returns empty string when content key is missing.""" | |
| msg = {"role": "user"} | |
| text = get_message_content_text(msg) | |
| assert text == "" | |
| def test_non_text_type_skipped(self): | |
| """Non-text types in list are skipped.""" | |
| msg = { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "data": "..."}, | |
| {"type": "text", "text": "Only this"}, | |
| ], | |
| } | |
| text = get_message_content_text(msg) | |
| assert text == "Only this" | |
| def test_empty_list_content(self): | |
| """Empty list content returns empty string.""" | |
| msg = {"role": "user", "content": []} | |
| text = get_message_content_text(msg) | |
| assert text == "" | |
| # --- Additional fixtures for complex tests --- | |
| def sample_messages(): | |
| """Basic conversation messages.""" | |
| return [ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": "Hello, how are you?"}, | |
| {"role": "assistant", "content": "I'm doing well, thank you!"}, | |
| ] | |
| def sample_messages_with_tools(): | |
| """Conversation with tool calls and responses.""" | |
| return [ | |
| {"role": "system", "content": "You are a helpful assistant with tools."}, | |
| {"role": "user", "content": "Search for user 12345"}, | |
| { | |
| "role": "assistant", | |
| "content": None, | |
| "tool_calls": [ | |
| { | |
| "id": "call_123", | |
| "type": "function", | |
| "function": {"name": "search_user", "arguments": '{"user_id": "12345"}'}, | |
| } | |
| ], | |
| }, | |
| { | |
| "role": "tool", | |
| "tool_call_id": "call_123", | |
| "content": '{"id": "12345", "name": "Alice", "email": "alice@example.com"}', | |
| }, | |
| {"role": "assistant", "content": "I found user Alice with ID 12345."}, | |
| ] | |