Delete claude_parser.py
Browse files- claude_parser.py +0 -222
claude_parser.py
DELETED
|
@@ -1,222 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
import struct
|
| 3 |
-
import logging
|
| 4 |
-
from typing import Optional, Dict, Any, AsyncIterator
|
| 5 |
-
|
| 6 |
-
logger = logging.getLogger(__name__)
|
| 7 |
-
|
| 8 |
-
class EventStreamParser:
|
| 9 |
-
"""AWS Event Stream binary format parser (v2 style)."""
|
| 10 |
-
|
| 11 |
-
@staticmethod
|
| 12 |
-
def parse_headers(headers_data: bytes) -> Dict[str, str]:
|
| 13 |
-
"""Parse event stream headers."""
|
| 14 |
-
headers = {}
|
| 15 |
-
offset = 0
|
| 16 |
-
|
| 17 |
-
while offset < len(headers_data):
|
| 18 |
-
if offset >= len(headers_data):
|
| 19 |
-
break
|
| 20 |
-
name_length = headers_data[offset]
|
| 21 |
-
offset += 1
|
| 22 |
-
|
| 23 |
-
if offset + name_length > len(headers_data):
|
| 24 |
-
break
|
| 25 |
-
name = headers_data[offset:offset + name_length].decode('utf-8')
|
| 26 |
-
offset += name_length
|
| 27 |
-
|
| 28 |
-
if offset >= len(headers_data):
|
| 29 |
-
break
|
| 30 |
-
value_type = headers_data[offset]
|
| 31 |
-
offset += 1
|
| 32 |
-
|
| 33 |
-
if offset + 2 > len(headers_data):
|
| 34 |
-
break
|
| 35 |
-
value_length = struct.unpack('>H', headers_data[offset:offset + 2])[0]
|
| 36 |
-
offset += 2
|
| 37 |
-
|
| 38 |
-
if offset + value_length > len(headers_data):
|
| 39 |
-
break
|
| 40 |
-
|
| 41 |
-
if value_type == 7:
|
| 42 |
-
value = headers_data[offset:offset + value_length].decode('utf-8')
|
| 43 |
-
else:
|
| 44 |
-
value = headers_data[offset:offset + value_length]
|
| 45 |
-
|
| 46 |
-
offset += value_length
|
| 47 |
-
headers[name] = value
|
| 48 |
-
|
| 49 |
-
return headers
|
| 50 |
-
|
| 51 |
-
@staticmethod
|
| 52 |
-
def parse_message(data: bytes) -> Optional[Dict[str, Any]]:
|
| 53 |
-
"""Parse single Event Stream message."""
|
| 54 |
-
try:
|
| 55 |
-
if len(data) < 16:
|
| 56 |
-
return None
|
| 57 |
-
|
| 58 |
-
total_length = struct.unpack('>I', data[0:4])[0]
|
| 59 |
-
headers_length = struct.unpack('>I', data[4:8])[0]
|
| 60 |
-
|
| 61 |
-
if len(data) < total_length:
|
| 62 |
-
logger.warning(f"Incomplete message: expected {total_length} bytes, got {len(data)}")
|
| 63 |
-
return None
|
| 64 |
-
|
| 65 |
-
headers_data = data[12:12 + headers_length]
|
| 66 |
-
headers = EventStreamParser.parse_headers(headers_data)
|
| 67 |
-
|
| 68 |
-
payload_start = 12 + headers_length
|
| 69 |
-
payload_end = total_length - 4
|
| 70 |
-
payload_data = data[payload_start:payload_end]
|
| 71 |
-
|
| 72 |
-
payload = None
|
| 73 |
-
if payload_data:
|
| 74 |
-
try:
|
| 75 |
-
payload = json.loads(payload_data.decode('utf-8'))
|
| 76 |
-
except (json.JSONDecodeError, UnicodeDecodeError):
|
| 77 |
-
payload = payload_data
|
| 78 |
-
|
| 79 |
-
return {
|
| 80 |
-
'headers': headers,
|
| 81 |
-
'payload': payload,
|
| 82 |
-
'total_length': total_length
|
| 83 |
-
}
|
| 84 |
-
|
| 85 |
-
except Exception as e:
|
| 86 |
-
logger.error(f"Failed to parse message: {e}", exc_info=True)
|
| 87 |
-
return None
|
| 88 |
-
|
| 89 |
-
@staticmethod
|
| 90 |
-
async def parse_stream(byte_stream: AsyncIterator[bytes]) -> AsyncIterator[Dict[str, Any]]:
|
| 91 |
-
"""Parse byte stream and extract events."""
|
| 92 |
-
buffer = bytearray()
|
| 93 |
-
|
| 94 |
-
async for chunk in byte_stream:
|
| 95 |
-
buffer.extend(chunk)
|
| 96 |
-
|
| 97 |
-
while len(buffer) >= 12:
|
| 98 |
-
try:
|
| 99 |
-
total_length = struct.unpack('>I', buffer[0:4])[0]
|
| 100 |
-
except struct.error:
|
| 101 |
-
break
|
| 102 |
-
|
| 103 |
-
if len(buffer) < total_length:
|
| 104 |
-
break
|
| 105 |
-
|
| 106 |
-
message_data = bytes(buffer[:total_length])
|
| 107 |
-
buffer = buffer[total_length:]
|
| 108 |
-
|
| 109 |
-
message = EventStreamParser.parse_message(message_data)
|
| 110 |
-
if message:
|
| 111 |
-
yield message
|
| 112 |
-
|
| 113 |
-
def extract_event_info(message: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
| 114 |
-
"""Extract event information from parsed message."""
|
| 115 |
-
headers = message.get('headers', {})
|
| 116 |
-
payload = message.get('payload')
|
| 117 |
-
|
| 118 |
-
event_type = headers.get(':event-type') or headers.get('event-type')
|
| 119 |
-
content_type = headers.get(':content-type') or headers.get('content-type')
|
| 120 |
-
message_type = headers.get(':message-type') or headers.get('message-type')
|
| 121 |
-
|
| 122 |
-
return {
|
| 123 |
-
'event_type': event_type,
|
| 124 |
-
'content_type': content_type,
|
| 125 |
-
'message_type': message_type,
|
| 126 |
-
'payload': payload
|
| 127 |
-
}
|
| 128 |
-
|
| 129 |
-
def _sse_format(event_type: str, data: Dict[str, Any]) -> str:
|
| 130 |
-
"""Format SSE event."""
|
| 131 |
-
json_data = json.dumps(data, ensure_ascii=False)
|
| 132 |
-
return f"event: {event_type}\ndata: {json_data}\n\n"
|
| 133 |
-
|
| 134 |
-
def build_message_start(conversation_id: str, model: str = "claude-sonnet-4.5", input_tokens: int = 0) -> str:
|
| 135 |
-
"""Build message_start SSE event."""
|
| 136 |
-
data = {
|
| 137 |
-
"type": "message_start",
|
| 138 |
-
"message": {
|
| 139 |
-
"id": conversation_id,
|
| 140 |
-
"type": "message",
|
| 141 |
-
"role": "assistant",
|
| 142 |
-
"content": [],
|
| 143 |
-
"model": model,
|
| 144 |
-
"stop_reason": None,
|
| 145 |
-
"stop_sequence": None,
|
| 146 |
-
"usage": {"input_tokens": input_tokens, "output_tokens": 0}
|
| 147 |
-
}
|
| 148 |
-
}
|
| 149 |
-
return _sse_format("message_start", data)
|
| 150 |
-
|
| 151 |
-
def build_content_block_start(index: int, block_type: str = "text") -> str:
|
| 152 |
-
"""Build content_block_start SSE event."""
|
| 153 |
-
data = {
|
| 154 |
-
"type": "content_block_start",
|
| 155 |
-
"index": index,
|
| 156 |
-
"content_block": {"type": block_type, "text": ""} if block_type == "text" else {"type": block_type}
|
| 157 |
-
}
|
| 158 |
-
return _sse_format("content_block_start", data)
|
| 159 |
-
|
| 160 |
-
def build_content_block_delta(index: int, text: str) -> str:
|
| 161 |
-
"""Build content_block_delta SSE event (text)."""
|
| 162 |
-
data = {
|
| 163 |
-
"type": "content_block_delta",
|
| 164 |
-
"index": index,
|
| 165 |
-
"delta": {"type": "text_delta", "text": text}
|
| 166 |
-
}
|
| 167 |
-
return _sse_format("content_block_delta", data)
|
| 168 |
-
|
| 169 |
-
def build_content_block_stop(index: int) -> str:
|
| 170 |
-
"""Build content_block_stop SSE event."""
|
| 171 |
-
data = {
|
| 172 |
-
"type": "content_block_stop",
|
| 173 |
-
"index": index
|
| 174 |
-
}
|
| 175 |
-
return _sse_format("content_block_stop", data)
|
| 176 |
-
|
| 177 |
-
def build_ping() -> str:
|
| 178 |
-
"""Build ping SSE event."""
|
| 179 |
-
data = {"type": "ping"}
|
| 180 |
-
return _sse_format("ping", data)
|
| 181 |
-
|
| 182 |
-
def build_message_stop(input_tokens: int, output_tokens: int, stop_reason: Optional[str] = None) -> str:
|
| 183 |
-
"""Build message_delta and message_stop SSE events."""
|
| 184 |
-
delta_data = {
|
| 185 |
-
"type": "message_delta",
|
| 186 |
-
"delta": {"stop_reason": stop_reason or "end_turn", "stop_sequence": None},
|
| 187 |
-
"usage": {"output_tokens": output_tokens}
|
| 188 |
-
}
|
| 189 |
-
delta_event = _sse_format("message_delta", delta_data)
|
| 190 |
-
|
| 191 |
-
stop_data = {
|
| 192 |
-
"type": "message_stop"
|
| 193 |
-
}
|
| 194 |
-
stop_event = _sse_format("message_stop", stop_data)
|
| 195 |
-
|
| 196 |
-
return delta_event + stop_event
|
| 197 |
-
|
| 198 |
-
def build_tool_use_start(index: int, tool_use_id: str, tool_name: str) -> str:
|
| 199 |
-
"""Build tool_use content_block_start SSE event."""
|
| 200 |
-
data = {
|
| 201 |
-
"type": "content_block_start",
|
| 202 |
-
"index": index,
|
| 203 |
-
"content_block": {
|
| 204 |
-
"type": "tool_use",
|
| 205 |
-
"id": tool_use_id,
|
| 206 |
-
"name": tool_name,
|
| 207 |
-
"input": {}
|
| 208 |
-
}
|
| 209 |
-
}
|
| 210 |
-
return _sse_format("content_block_start", data)
|
| 211 |
-
|
| 212 |
-
def build_tool_use_input_delta(index: int, input_json_delta: str) -> str:
|
| 213 |
-
"""Build tool_use input_json_delta SSE event."""
|
| 214 |
-
data = {
|
| 215 |
-
"type": "content_block_delta",
|
| 216 |
-
"index": index,
|
| 217 |
-
"delta": {
|
| 218 |
-
"type": "input_json_delta",
|
| 219 |
-
"partial_json": input_json_delta
|
| 220 |
-
}
|
| 221 |
-
}
|
| 222 |
-
return _sse_format("content_block_delta", data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|