File size: 3,821 Bytes
f9416a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python3
"""Basic usage example for Harmony Inspector.

This example demonstrates how to use the HarmonyInspector class to inspect
harmony-formatted conversations.
"""

from harmony_inspector import HarmonyInspector, SpanType


def main():
    # Create an inspector instance
    inspector = HarmonyInspector()

    # Example 1: Inspect a simple conversation
    print("=" * 60)
    print("Example 1: Simple user-assistant conversation")
    print("=" * 60)

    text = "<|start|>user<|message|>Hello<|end|><|start|>assistant<|channel|>final<|message|>Hi there!<|end|>"

    result = inspector.inspect_text(text)

    print(f"Total tokens: {len(result.tokens)}")
    print(f"Messages found: {len(result.messages)}")
    print(f"Is complete: {result.is_complete}")
    print(f"Errors: {len(result.errors)}")
    print()

    for i, msg in enumerate(result.messages):
        print(f"Message {i + 1}:")
        print(f"  Role: {msg.role.value}")
        print(f"  Content: {msg.content!r}")
        if msg.channel:
            print(f"  Channel: {msg.channel}")
        print()

    # Example 2: Inspect with spans
    print("=" * 60)
    print("Example 2: Examining spans")
    print("=" * 60)

    for span in result.spans:
        if span.span_type == SpanType.SPECIAL_TOKEN:
            print(f"Special token: {span.text} (ID: {span.metadata.get('token_id')})")
        elif span.span_type == SpanType.ROLE:
            print(f"Role span: {span.text!r}")
        elif span.span_type == SpanType.MESSAGE_CONTENT:
            print(f"Content span: {span.text!r}")
    print()

    # Example 3: Inspect with function call
    print("=" * 60)
    print("Example 3: Function call example")
    print("=" * 60)

    text_with_call = (
        "<|start|>user<|message|>What is the weather?<|end|>"
        "<|start|>assistant to=functions.get_weather<|constrain|>json<|message|>"
        '{"location": "San Francisco"}<|call|>'
    )

    result2 = inspector.inspect_text(text_with_call)

    print(f"Messages found: {len(result2.messages)}")
    for i, msg in enumerate(result2.messages):
        print(f"Message {i + 1}:")
        print(f"  Role: {msg.role.value}")
        print(f"  Content: {msg.content!r}")
        if msg.recipient:
            print(f"  Recipient: {msg.recipient}")
        if msg.content_type:
            print(f"  Content-Type: {msg.content_type}")
        print()

    # Example 4: Incomplete message detection
    print("=" * 60)
    print("Example 4: Incomplete message")
    print("=" * 60)

    incomplete = "<|start|>assistant<|channel|>analysis<|message|>Let me think..."

    result3 = inspector.inspect_text(incomplete)

    print(f"Is complete: {result3.is_complete}")
    print(f"Errors/warnings: {len(result3.errors)}")
    for error in result3.errors:
        print(f"  [{error.code.value}] {error.message}")
    print()

    # Example 5: Working with token IDs directly
    print("=" * 60)
    print("Example 5: Inspect token IDs")
    print("=" * 60)

    # Use the tokens from our first example
    result4 = inspector.inspect_tokens(result.tokens)

    print(f"Token IDs: {result4.tokens[:10]}... (total: {len(result4.tokens)})")
    print(f"Decoded text: {result4.text[:50]}...")
    print()

    # Example 6: Parser state debugging
    print("=" * 60)
    print("Example 6: Parser state transitions")
    print("=" * 60)

    simple = "<|start|>user<|message|>Hi<|end|>"
    result5 = inspector.inspect_text(simple)

    print("Token-by-token parser states:")
    for state in result5.parser_states:
        print(
            f"  [{state.token_index}] {state.token_text!r:15} "
            f"state={state.state.value:12} "
            f"role={state.current_role or '-':9} "
            f"msgs={state.message_count}"
        )


if __name__ == "__main__":
    main()