Update app.py
Browse files
app.py
CHANGED
|
@@ -37,10 +37,11 @@ class ContentPart(BaseModel):
|
|
| 37 |
type: str = "text"
|
| 38 |
text: str
|
| 39 |
|
|
|
|
| 40 |
class ChatMessage(BaseModel):
|
| 41 |
role: str
|
| 42 |
content: Union[str, List[ContentPart]]
|
| 43 |
-
|
| 44 |
def get_content_text(self) -> str:
|
| 45 |
"""Extract text content from either string or content parts"""
|
| 46 |
if isinstance(self.content, str):
|
|
@@ -57,15 +58,18 @@ class ChatMessage(BaseModel):
|
|
| 57 |
return "".join(text_parts)
|
| 58 |
return str(self.content)
|
| 59 |
|
|
|
|
| 60 |
# Anthropic Claude format models
|
| 61 |
class AnthropicContentBlock(BaseModel):
|
| 62 |
type: str = "text"
|
| 63 |
text: str
|
| 64 |
|
|
|
|
| 65 |
class AnthropicMessage(BaseModel):
|
| 66 |
role: str # "user" or "assistant"
|
| 67 |
content: Union[str, List[AnthropicContentBlock]]
|
| 68 |
|
|
|
|
| 69 |
class AnthropicMessagesRequest(BaseModel):
|
| 70 |
model: str
|
| 71 |
max_tokens: int
|
|
@@ -74,6 +78,7 @@ class AnthropicMessagesRequest(BaseModel):
|
|
| 74 |
temperature: Optional[float] = 0.7
|
| 75 |
stream: Optional[bool] = False
|
| 76 |
|
|
|
|
| 77 |
class AnthropicMessagesResponse(BaseModel):
|
| 78 |
id: str = Field(default_factory=lambda: f"msg_{uuid.uuid4()}")
|
| 79 |
type: str = "message"
|
|
@@ -84,6 +89,7 @@ class AnthropicMessagesResponse(BaseModel):
|
|
| 84 |
stop_sequence: Optional[str] = None
|
| 85 |
usage: Dict[str, int]
|
| 86 |
|
|
|
|
| 87 |
class AnthropicStreamResponse(BaseModel):
|
| 88 |
type: str
|
| 89 |
index: Optional[int] = None
|
|
@@ -92,6 +98,7 @@ class AnthropicStreamResponse(BaseModel):
|
|
| 92 |
message: Optional[Dict[str, Any]] = None
|
| 93 |
usage: Optional[Dict[str, int]] = None
|
| 94 |
|
|
|
|
| 95 |
class ChatCompletionRequest(BaseModel):
|
| 96 |
model: str
|
| 97 |
messages: List[ChatMessage]
|
|
@@ -99,6 +106,7 @@ class ChatCompletionRequest(BaseModel):
|
|
| 99 |
max_tokens: Optional[int] = 4000
|
| 100 |
stream: Optional[bool] = False
|
| 101 |
|
|
|
|
| 102 |
class ChatCompletionResponse(BaseModel):
|
| 103 |
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}")
|
| 104 |
object: str = "chat.completion"
|
|
@@ -107,6 +115,7 @@ class ChatCompletionResponse(BaseModel):
|
|
| 107 |
choices: List[Dict[str, Any]]
|
| 108 |
usage: Dict[str, int]
|
| 109 |
|
|
|
|
| 110 |
class ChatCompletionStreamResponse(BaseModel):
|
| 111 |
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}")
|
| 112 |
object: str = "chat.completion.chunk"
|
|
@@ -114,6 +123,7 @@ class ChatCompletionStreamResponse(BaseModel):
|
|
| 114 |
model: str
|
| 115 |
choices: List[Dict[str, Any]]
|
| 116 |
|
|
|
|
| 117 |
# Token management
|
| 118 |
class TokenManager:
|
| 119 |
def __init__(self):
|
|
@@ -124,7 +134,7 @@ class TokenManager:
|
|
| 124 |
async def refresh_tokens(self):
|
| 125 |
if not self.refresh_token:
|
| 126 |
return None
|
| 127 |
-
|
| 128 |
try:
|
| 129 |
async with httpx.AsyncClient() as client:
|
| 130 |
response = await client.post(
|
|
@@ -133,7 +143,7 @@ class TokenManager:
|
|
| 133 |
timeout=30
|
| 134 |
)
|
| 135 |
response.raise_for_status()
|
| 136 |
-
|
| 137 |
data = response.json()
|
| 138 |
self.access_token = data.get("accessToken")
|
| 139 |
return self.access_token
|
|
@@ -144,63 +154,78 @@ class TokenManager:
|
|
| 144 |
def get_token(self):
|
| 145 |
return self.access_token
|
| 146 |
|
|
|
|
| 147 |
token_manager = TokenManager()
|
| 148 |
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
| 151 |
# Extract system prompt and user messages
|
| 152 |
system_prompt = ""
|
| 153 |
-
|
| 154 |
-
|
| 155 |
for msg in messages:
|
| 156 |
if msg.role == "system":
|
| 157 |
system_prompt = msg.get_content_text()
|
| 158 |
else:
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
})
|
| 163 |
-
|
| 164 |
-
if not chat_messages:
|
| 165 |
raise HTTPException(status_code=400, detail="No user messages found")
|
| 166 |
-
|
| 167 |
-
# Build
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
if system_prompt:
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
# Wrap in Kiro's expected format
|
| 178 |
return {
|
| 179 |
"profileArn": PROFILE_ARN,
|
| 180 |
"conversationState": {
|
| 181 |
"chatTriggerType": "MANUAL",
|
| 182 |
-
"conversationId":
|
| 183 |
"currentMessage": {
|
| 184 |
"userInputMessage": {
|
| 185 |
-
"content":
|
| 186 |
"modelId": CODEWHISPERER_MODEL,
|
| 187 |
"origin": "AI_EDITOR",
|
| 188 |
"userInputMessageContext": {}
|
| 189 |
}
|
| 190 |
},
|
| 191 |
-
"history":
|
| 192 |
}
|
| 193 |
}
|
| 194 |
|
|
|
|
| 195 |
# Convert Anthropic messages to internal ChatMessage format
|
| 196 |
def anthropic_to_chat_messages(anthropic_request: AnthropicMessagesRequest) -> List[ChatMessage]:
|
| 197 |
"""Convert Anthropic messages format to internal ChatMessage format"""
|
| 198 |
chat_messages = []
|
| 199 |
-
|
| 200 |
# Add system message if present
|
| 201 |
if anthropic_request.system:
|
| 202 |
chat_messages.append(ChatMessage(role="system", content=anthropic_request.system))
|
| 203 |
-
|
| 204 |
# Convert Anthropic messages
|
| 205 |
for msg in anthropic_request.messages:
|
| 206 |
if isinstance(msg.content, str):
|
|
@@ -212,11 +237,12 @@ def anthropic_to_chat_messages(anthropic_request: AnthropicMessagesRequest) -> L
|
|
| 212 |
if block.type == "text":
|
| 213 |
text_parts.append(block.text)
|
| 214 |
content = "".join(text_parts)
|
| 215 |
-
|
| 216 |
chat_messages.append(ChatMessage(role=msg.role, content=content))
|
| 217 |
-
|
| 218 |
return chat_messages
|
| 219 |
|
|
|
|
| 220 |
# AWS Event Stream Parser
|
| 221 |
class AWSStreamParser:
|
| 222 |
@staticmethod
|
|
@@ -233,12 +259,12 @@ class AWSStreamParser:
|
|
| 233 |
raw_str = raw_data.decode('utf-8', errors='ignore')
|
| 234 |
else:
|
| 235 |
raw_str = str(raw_data)
|
| 236 |
-
|
| 237 |
# Look for JSON content in the response
|
| 238 |
# AWS event stream contains binary headers followed by JSON payloads
|
| 239 |
json_pattern = r'\{[^{}]*"content"[^{}]*\}'
|
| 240 |
matches = re.findall(json_pattern, raw_str, re.DOTALL)
|
| 241 |
-
|
| 242 |
if matches:
|
| 243 |
content_parts = []
|
| 244 |
for match in matches:
|
|
@@ -250,7 +276,7 @@ class AWSStreamParser:
|
|
| 250 |
continue
|
| 251 |
if content_parts:
|
| 252 |
return {"content": ''.join(content_parts)}
|
| 253 |
-
|
| 254 |
# Try to extract from AWS event stream format
|
| 255 |
# Look for :content-type and extract JSON after headers
|
| 256 |
content_type_pattern = r':content-type[^:]*:[^:]*:[^:]*:(\{.*\})'
|
|
@@ -263,7 +289,7 @@ class AWSStreamParser:
|
|
| 263 |
return {"content": data['content']}
|
| 264 |
except:
|
| 265 |
continue
|
| 266 |
-
|
| 267 |
# Try to extract any JSON objects
|
| 268 |
json_objects = re.findall(r'\{[^{}]*\}', raw_str)
|
| 269 |
for obj in json_objects:
|
|
@@ -273,37 +299,37 @@ class AWSStreamParser:
|
|
| 273 |
return {"content": data['content']}
|
| 274 |
except:
|
| 275 |
continue
|
| 276 |
-
|
| 277 |
# Final fallback: extract readable text
|
| 278 |
readable_text = re.sub(r'[^\x20-\x7E\n\r\t]', '', raw_str)
|
| 279 |
readable_text = re.sub(r':event-type[^:]*:[^:]*:[^:]*:', '', readable_text)
|
| 280 |
-
|
| 281 |
# Look for Chinese characters or meaningful content
|
| 282 |
chinese_pattern = r'[\u4e00-\u9fff]+'
|
| 283 |
chinese_matches = re.findall(chinese_pattern, raw_str)
|
| 284 |
if chinese_matches:
|
| 285 |
return {"content": ''.join(chinese_matches)}
|
| 286 |
-
|
| 287 |
return {"content": readable_text.strip() or "No content found in response"}
|
| 288 |
-
|
| 289 |
except Exception as e:
|
| 290 |
return {"content": f"Error parsing response: {str(e)}"}
|
| 291 |
|
| 292 |
|
| 293 |
-
# Make API call to Kiro
|
| 294 |
-
async def call_kiro_api(messages: List[ChatMessage],
|
| 295 |
token = token_manager.get_token()
|
| 296 |
if not token:
|
| 297 |
raise HTTPException(status_code=401, detail="No access token available")
|
| 298 |
-
|
| 299 |
-
request_data =
|
| 300 |
-
|
| 301 |
headers = {
|
| 302 |
"Authorization": f"Bearer {token}",
|
| 303 |
"Content-Type": "application/json",
|
| 304 |
"Accept": "text/event-stream" if stream else "application/json"
|
| 305 |
}
|
| 306 |
-
|
| 307 |
try:
|
| 308 |
async with httpx.AsyncClient() as client:
|
| 309 |
response = await client.post(
|
|
@@ -312,7 +338,7 @@ async def call_kiro_api(messages: List[ChatMessage], max_tokens: int = 4000, str
|
|
| 312 |
json=request_data,
|
| 313 |
timeout=120
|
| 314 |
)
|
| 315 |
-
|
| 316 |
if response.status_code == 403:
|
| 317 |
# Try to refresh token
|
| 318 |
new_token = await token_manager.refresh_tokens()
|
|
@@ -324,15 +350,16 @@ async def call_kiro_api(messages: List[ChatMessage], max_tokens: int = 4000, str
|
|
| 324 |
json=request_data,
|
| 325 |
timeout=120
|
| 326 |
)
|
| 327 |
-
|
| 328 |
response.raise_for_status()
|
| 329 |
return response
|
| 330 |
-
|
| 331 |
except Exception as e:
|
| 332 |
import traceback
|
| 333 |
-
print(f"
|
| 334 |
print(traceback.format_exc())
|
| 335 |
-
raise HTTPException(status_code=503, detail=f"
|
|
|
|
| 336 |
|
| 337 |
# API endpoints
|
| 338 |
@app.get("/v1/models")
|
|
@@ -349,125 +376,92 @@ async def list_models():
|
|
| 349 |
]
|
| 350 |
}
|
| 351 |
|
|
|
|
| 352 |
@app.post("/v1/chat/completions")
|
| 353 |
async def create_chat_completion(request: ChatCompletionRequest):
|
| 354 |
if request.model != MODEL_NAME:
|
| 355 |
raise HTTPException(status_code=400, detail=f"Only {MODEL_NAME} is supported")
|
| 356 |
-
|
| 357 |
if request.stream:
|
| 358 |
return await create_streaming_response(request)
|
| 359 |
else:
|
| 360 |
return await create_non_streaming_response(request)
|
| 361 |
|
|
|
|
| 362 |
async def create_non_streaming_response(request: ChatCompletionRequest):
|
| 363 |
-
response = await call_kiro_api(request.messages,
|
| 364 |
-
return await
|
| 365 |
|
| 366 |
-
|
| 367 |
-
async def
|
| 368 |
-
"""
|
| 369 |
try:
|
| 370 |
print(f"Response status: {response.status_code}")
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
|
|
|
|
|
|
| 377 |
try:
|
| 378 |
response_data = response.json()
|
| 379 |
print(f"Successfully parsed JSON response")
|
| 380 |
-
|
| 381 |
-
# Check if response contains Bedrock-style content
|
| 382 |
if isinstance(response_data, dict) and 'content' in response_data:
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
# If content is a list of content blocks (Bedrock style)
|
| 386 |
-
if isinstance(content, list) and len(content) > 0:
|
| 387 |
-
for block in content:
|
| 388 |
-
if isinstance(block, dict) and block.get("type") == "text":
|
| 389 |
-
response_text += block.get("text", "")
|
| 390 |
-
elif isinstance(content, str):
|
| 391 |
-
# Try to parse as JSON if it's a string
|
| 392 |
-
try:
|
| 393 |
-
bedrock_data = json.loads(content)
|
| 394 |
-
if isinstance(bedrock_data, dict) and 'content' in bedrock_data:
|
| 395 |
-
bedrock_content = bedrock_data['content']
|
| 396 |
-
if isinstance(bedrock_content, list):
|
| 397 |
-
for block in bedrock_content:
|
| 398 |
-
if isinstance(block, dict) and block.get("type") == "text":
|
| 399 |
-
response_text += block.get("text", "")
|
| 400 |
-
else:
|
| 401 |
-
response_text = str(bedrock_content)
|
| 402 |
-
|
| 403 |
-
# Extract usage if available
|
| 404 |
-
if 'usage' in bedrock_data:
|
| 405 |
-
usage = bedrock_data['usage']
|
| 406 |
-
else:
|
| 407 |
-
response_text = content
|
| 408 |
-
except:
|
| 409 |
-
response_text = content
|
| 410 |
-
else:
|
| 411 |
-
response_text = str(content)
|
| 412 |
else:
|
| 413 |
response_text = str(response_data)
|
| 414 |
-
|
| 415 |
except Exception as e:
|
| 416 |
print(f"JSON parsing failed: {e}")
|
| 417 |
-
#
|
| 418 |
-
|
| 419 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
print(f"Final response text: {response_text[:200]}...")
|
| 421 |
-
|
| 422 |
-
return ChatCompletionResponse(
|
| 423 |
-
model=MODEL_NAME,
|
| 424 |
-
choices=[{
|
| 425 |
-
"index": 0,
|
| 426 |
-
"message": {
|
| 427 |
-
"role": "assistant",
|
| 428 |
-
"content": response_text
|
| 429 |
-
},
|
| 430 |
-
"finish_reason": "stop"
|
| 431 |
-
}],
|
| 432 |
-
usage={
|
| 433 |
-
"prompt_tokens": usage.get("input_tokens", 0),
|
| 434 |
-
"completion_tokens": usage.get("output_tokens", 0),
|
| 435 |
-
"total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
|
| 436 |
-
}
|
| 437 |
-
)
|
| 438 |
-
|
| 439 |
except Exception as e:
|
| 440 |
-
print(f"Error in
|
| 441 |
import traceback
|
| 442 |
traceback.print_exc()
|
| 443 |
response_text = f"Error processing response: {str(e)}"
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
|
| 462 |
|
| 463 |
async def create_streaming_response(request: ChatCompletionRequest):
|
| 464 |
-
response = await call_kiro_api(request.messages,
|
| 465 |
-
return await
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
-
async def create_kiro_streaming_response(response):
|
| 468 |
-
"""Parse Kiro streaming response with Bedrock format"""
|
| 469 |
-
print(f"Starting Kiro streaming response")
|
| 470 |
-
|
| 471 |
async def generate():
|
| 472 |
# Send initial response
|
| 473 |
initial_chunk = {
|
|
@@ -481,46 +475,41 @@ async def create_kiro_streaming_response(response):
|
|
| 481 |
'finish_reason': None
|
| 482 |
}]
|
| 483 |
}
|
|
|
|
| 484 |
yield f"data: {json.dumps(initial_chunk)}\n\n"
|
| 485 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
try:
|
| 487 |
-
#
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
try:
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
if chunk_data.get('type') == 'content_block_delta':
|
| 502 |
-
if 'delta' in chunk_data and 'text' in chunk_data['delta']:
|
| 503 |
-
chunk_text = chunk_data['delta']['text']
|
| 504 |
-
content += chunk_text
|
| 505 |
-
|
| 506 |
-
chunk = {
|
| 507 |
-
'id': f'chatcmpl-{uuid.uuid4()}',
|
| 508 |
-
'object': 'chat.completion.chunk',
|
| 509 |
-
'created': int(time.time()),
|
| 510 |
-
'model': MODEL_NAME,
|
| 511 |
-
'choices': [{
|
| 512 |
-
'index': 0,
|
| 513 |
-
'delta': {'content': chunk_text},
|
| 514 |
-
'finish_reason': None
|
| 515 |
-
}]
|
| 516 |
-
}
|
| 517 |
-
yield f"data: {json.dumps(chunk)}\n\n"
|
| 518 |
-
|
| 519 |
-
# Handle other streaming formats
|
| 520 |
-
elif 'content' in chunk_data:
|
| 521 |
-
chunk_text = chunk_data['content']
|
| 522 |
content += chunk_text
|
| 523 |
-
|
|
|
|
| 524 |
chunk = {
|
| 525 |
'id': f'chatcmpl-{uuid.uuid4()}',
|
| 526 |
'object': 'chat.completion.chunk',
|
|
@@ -532,17 +521,72 @@ async def create_kiro_streaming_response(response):
|
|
| 532 |
'finish_reason': None
|
| 533 |
}]
|
| 534 |
}
|
|
|
|
| 535 |
yield f"data: {json.dumps(chunk)}\n\n"
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
|
|
|
|
|
|
|
|
|
| 539 |
continue
|
| 540 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
except Exception as e:
|
| 542 |
-
print(f"Error in
|
| 543 |
import traceback
|
| 544 |
traceback.print_exc()
|
| 545 |
-
|
|
|
|
| 546 |
error_chunk = {
|
| 547 |
'id': f'chatcmpl-{uuid.uuid4()}',
|
| 548 |
'object': 'chat.completion.chunk',
|
|
@@ -555,7 +599,9 @@ async def create_kiro_streaming_response(response):
|
|
| 555 |
}]
|
| 556 |
}
|
| 557 |
yield f"data: {json.dumps(error_chunk)}\n\n"
|
| 558 |
-
|
|
|
|
|
|
|
| 559 |
# Send final response
|
| 560 |
final_chunk = {
|
| 561 |
'id': f'chatcmpl-{uuid.uuid4()}',
|
|
@@ -569,22 +615,24 @@ async def create_kiro_streaming_response(response):
|
|
| 569 |
}]
|
| 570 |
}
|
| 571 |
yield f"data: {json.dumps(final_chunk)}\n\n"
|
|
|
|
| 572 |
yield "data: [DONE]\n\n"
|
| 573 |
-
|
| 574 |
return StreamingResponse(generate(), media_type="text/event-stream")
|
| 575 |
|
|
|
|
| 576 |
# Anthropic response conversion functions
|
| 577 |
async def create_anthropic_response(response, model: str):
|
| 578 |
"""Convert AWS event stream to Anthropic Messages format"""
|
| 579 |
try:
|
| 580 |
print(f"Response status: {response.status_code}")
|
| 581 |
print(f"Response headers: {dict(response.headers)}")
|
| 582 |
-
|
| 583 |
# Get response content as bytes to handle binary data
|
| 584 |
response_bytes = response.content
|
| 585 |
print(f"Response content type: {type(response_bytes)}")
|
| 586 |
print(f"Response content length: {len(response_bytes)}")
|
| 587 |
-
|
| 588 |
# Try to parse as JSON first
|
| 589 |
try:
|
| 590 |
response_data = response.json()
|
|
@@ -599,7 +647,7 @@ async def create_anthropic_response(response, model: str):
|
|
| 599 |
parsed_data = AWSStreamParser.parse_event_stream_to_json(response_bytes)
|
| 600 |
response_text = parsed_data.get('content', "")
|
| 601 |
print(f"Parsed content length: {len(response_text)}")
|
| 602 |
-
|
| 603 |
if not response_text or response_text == "No content found in response":
|
| 604 |
# Last resort: try to decode as text
|
| 605 |
try:
|
|
@@ -607,15 +655,15 @@ async def create_anthropic_response(response, model: str):
|
|
| 607 |
print(f"Fallback text decode length: {len(response_text)}")
|
| 608 |
except Exception as decode_error:
|
| 609 |
response_text = f"Unable to decode response: {str(decode_error)}"
|
| 610 |
-
|
| 611 |
print(f"Final response text: {response_text[:200]}...")
|
| 612 |
-
|
| 613 |
except Exception as e:
|
| 614 |
print(f"Error in conversion: {e}")
|
| 615 |
import traceback
|
| 616 |
traceback.print_exc()
|
| 617 |
response_text = f"Error processing response: {str(e)}"
|
| 618 |
-
|
| 619 |
return AnthropicMessagesResponse(
|
| 620 |
model=model,
|
| 621 |
content=[AnthropicContentBlock(type="text", text=response_text)],
|
|
@@ -625,10 +673,11 @@ async def create_anthropic_response(response, model: str):
|
|
| 625 |
}
|
| 626 |
)
|
| 627 |
|
|
|
|
| 628 |
async def create_anthropic_streaming_response(response, model: str):
|
| 629 |
"""Convert AWS event stream to Anthropic streaming format"""
|
| 630 |
print(f"Starting Anthropic streaming response, status: {response.status_code}")
|
| 631 |
-
|
| 632 |
async def generate():
|
| 633 |
# Send message_start event
|
| 634 |
message_start = {
|
|
@@ -646,7 +695,7 @@ async def create_anthropic_streaming_response(response, model: str):
|
|
| 646 |
}
|
| 647 |
print(f"Sending message_start: {message_start}")
|
| 648 |
yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
|
| 649 |
-
|
| 650 |
# Send content_block_start event
|
| 651 |
content_block_start = {
|
| 652 |
"type": "content_block_start",
|
|
@@ -657,15 +706,15 @@ async def create_anthropic_streaming_response(response, model: str):
|
|
| 657 |
}
|
| 658 |
}
|
| 659 |
yield f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n"
|
| 660 |
-
|
| 661 |
# Read response and stream content
|
| 662 |
content = ""
|
| 663 |
chunk_count = 0
|
| 664 |
-
|
| 665 |
# Read the entire response as bytes first
|
| 666 |
response_bytes = response.content
|
| 667 |
print(f"Anthropic streaming response bytes length: {len(response_bytes)}")
|
| 668 |
-
|
| 669 |
# Parse the AWS event stream
|
| 670 |
try:
|
| 671 |
# Convert bytes to string
|
|
@@ -673,12 +722,12 @@ async def create_anthropic_streaming_response(response, model: str):
|
|
| 673 |
response_str = response_bytes.decode('utf-8', errors='ignore')
|
| 674 |
else:
|
| 675 |
response_str = str(response_bytes)
|
| 676 |
-
|
| 677 |
# Look for content in the AWS event stream
|
| 678 |
# Method 1: Look for JSON objects with content
|
| 679 |
json_pattern = r'\{[^{}]*"content"[^{}]*\}'
|
| 680 |
json_matches = re.findall(json_pattern, response_str, re.DOTALL)
|
| 681 |
-
|
| 682 |
if json_matches:
|
| 683 |
for match in json_matches:
|
| 684 |
try:
|
|
@@ -687,7 +736,7 @@ async def create_anthropic_streaming_response(response, model: str):
|
|
| 687 |
chunk_text = data['content']
|
| 688 |
content += chunk_text
|
| 689 |
chunk_count += 1
|
| 690 |
-
|
| 691 |
# Send content_block_delta event
|
| 692 |
content_block_delta = {
|
| 693 |
"type": "content_block_delta",
|
|
@@ -699,7 +748,7 @@ async def create_anthropic_streaming_response(response, model: str):
|
|
| 699 |
}
|
| 700 |
print(f"Streaming Anthropic JSON chunk {chunk_count}: {chunk_text[:50]}...")
|
| 701 |
yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
|
| 702 |
-
|
| 703 |
# Small delay to simulate streaming
|
| 704 |
import asyncio
|
| 705 |
await asyncio.sleep(0.01)
|
|
@@ -709,20 +758,20 @@ async def create_anthropic_streaming_response(response, model: str):
|
|
| 709 |
else:
|
| 710 |
# Method 2: Try to extract readable text
|
| 711 |
readable_text = re.sub(r'[^\x20-\x7E\n\r\t\u4e00-\u9fff]', '', response_str)
|
| 712 |
-
|
| 713 |
# Look for Chinese text specifically
|
| 714 |
chinese_pattern = r'[\u4e00-\u9fff][\u4e00-\u9fff\s\.,!?]*[\u4e00-\u9fff]'
|
| 715 |
chinese_matches = re.findall(chinese_pattern, response_str)
|
| 716 |
-
|
| 717 |
if chinese_matches:
|
| 718 |
combined_text = ''.join(chinese_matches)
|
| 719 |
# Split into chunks for streaming
|
| 720 |
chunk_size = max(1, len(combined_text) // 10)
|
| 721 |
for i in range(0, len(combined_text), chunk_size):
|
| 722 |
-
chunk_text = combined_text[i:i+chunk_size]
|
| 723 |
content += chunk_text
|
| 724 |
chunk_count += 1
|
| 725 |
-
|
| 726 |
# Send content_block_delta event
|
| 727 |
content_block_delta = {
|
| 728 |
"type": "content_block_delta",
|
|
@@ -734,7 +783,7 @@ async def create_anthropic_streaming_response(response, model: str):
|
|
| 734 |
}
|
| 735 |
print(f"Streaming Anthropic Chinese text chunk {chunk_count}: {chunk_text[:50]}...")
|
| 736 |
yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
|
| 737 |
-
|
| 738 |
import asyncio
|
| 739 |
await asyncio.sleep(0.05)
|
| 740 |
else:
|
|
@@ -751,12 +800,12 @@ async def create_anthropic_streaming_response(response, model: str):
|
|
| 751 |
print(f"Streaming Anthropic fallback text: {readable_text[:100]}...")
|
| 752 |
yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
|
| 753 |
content = readable_text.strip()
|
| 754 |
-
|
| 755 |
except Exception as e:
|
| 756 |
print(f"Error in Anthropic streaming generation: {e}")
|
| 757 |
import traceback
|
| 758 |
traceback.print_exc()
|
| 759 |
-
|
| 760 |
# Send error as content
|
| 761 |
error_delta = {
|
| 762 |
"type": "content_block_delta",
|
|
@@ -767,254 +816,41 @@ async def create_anthropic_streaming_response(response, model: str):
|
|
| 767 |
}
|
| 768 |
}
|
| 769 |
yield f"event: content_block_delta\ndata: {json.dumps(error_delta)}\n\n"
|
| 770 |
-
|
| 771 |
-
print(f"Anthropic streaming complete, total chunks: {chunk_count}, content length: {len(content)}")
|
| 772 |
-
|
| 773 |
-
# Send content_block_stop event
|
| 774 |
-
content_block_stop = {
|
| 775 |
-
"type": "content_block_stop",
|
| 776 |
-
"index": 0
|
| 777 |
-
}
|
| 778 |
-
yield f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n"
|
| 779 |
-
|
| 780 |
-
# Send message_stop event
|
| 781 |
-
message_stop = {
|
| 782 |
-
"type": "message_stop"
|
| 783 |
-
}
|
| 784 |
-
yield f"event: message_stop\ndata: {json.dumps(message_stop)}\n\n"
|
| 785 |
-
|
| 786 |
-
return StreamingResponse(generate(), media_type="text/event-stream")
|
| 787 |
|
| 788 |
-
|
| 789 |
-
async def create_anthropic_kiro_response(response, model: str):
|
| 790 |
-
"""Convert Kiro response to Anthropic Messages format"""
|
| 791 |
-
try:
|
| 792 |
-
print(f"Response status: {response.status_code}")
|
| 793 |
-
|
| 794 |
-
# Initialize variables
|
| 795 |
-
response_text = ""
|
| 796 |
-
usage = {"input_tokens": 0, "output_tokens": 0}
|
| 797 |
-
|
| 798 |
-
# Try to parse the response as JSON first
|
| 799 |
-
try:
|
| 800 |
-
response_data = response.json()
|
| 801 |
-
print(f"Successfully parsed JSON response")
|
| 802 |
-
|
| 803 |
-
# Check if response contains Bedrock-style content
|
| 804 |
-
if isinstance(response_data, dict) and 'content' in response_data:
|
| 805 |
-
content = response_data['content']
|
| 806 |
-
|
| 807 |
-
# If content is a list of content blocks (Bedrock style)
|
| 808 |
-
if isinstance(content, list) and len(content) > 0:
|
| 809 |
-
for block in content:
|
| 810 |
-
if isinstance(block, dict) and block.get("type") == "text":
|
| 811 |
-
response_text += block.get("text", "")
|
| 812 |
-
elif isinstance(content, str):
|
| 813 |
-
# Try to parse as JSON if it's a string
|
| 814 |
-
try:
|
| 815 |
-
bedrock_data = json.loads(content)
|
| 816 |
-
if isinstance(bedrock_data, dict) and 'content' in bedrock_data:
|
| 817 |
-
bedrock_content = bedrock_data['content']
|
| 818 |
-
if isinstance(bedrock_content, list):
|
| 819 |
-
for block in bedrock_content:
|
| 820 |
-
if isinstance(block, dict) and block.get("type") == "text":
|
| 821 |
-
response_text += block.get("text", "")
|
| 822 |
-
else:
|
| 823 |
-
response_text = str(bedrock_content)
|
| 824 |
-
|
| 825 |
-
# Extract usage if available
|
| 826 |
-
if 'usage' in bedrock_data:
|
| 827 |
-
usage = bedrock_data['usage']
|
| 828 |
-
else:
|
| 829 |
-
response_text = content
|
| 830 |
-
except:
|
| 831 |
-
response_text = content
|
| 832 |
-
else:
|
| 833 |
-
response_text = str(content)
|
| 834 |
-
else:
|
| 835 |
-
response_text = str(response_data)
|
| 836 |
-
|
| 837 |
-
except Exception as e:
|
| 838 |
-
print(f"JSON parsing failed: {e}")
|
| 839 |
-
# Fallback to text content
|
| 840 |
-
response_text = response.text
|
| 841 |
-
|
| 842 |
-
print(f"Final response text: {response_text[:200]}...")
|
| 843 |
-
|
| 844 |
-
return AnthropicMessagesResponse(
|
| 845 |
-
model=model,
|
| 846 |
-
content=[AnthropicContentBlock(type="text", text=response_text)],
|
| 847 |
-
usage={
|
| 848 |
-
"input_tokens": usage.get("input_tokens", 0),
|
| 849 |
-
"output_tokens": usage.get("output_tokens", 0)
|
| 850 |
-
}
|
| 851 |
-
)
|
| 852 |
-
|
| 853 |
-
except Exception as e:
|
| 854 |
-
print(f"Error in Anthropic Kiro response conversion: {e}")
|
| 855 |
-
import traceback
|
| 856 |
-
traceback.print_exc()
|
| 857 |
-
response_text = f"Error processing response: {str(e)}"
|
| 858 |
-
|
| 859 |
-
return AnthropicMessagesResponse(
|
| 860 |
-
model=model,
|
| 861 |
-
content=[AnthropicContentBlock(type="text", text=response_text)],
|
| 862 |
-
usage={
|
| 863 |
-
"input_tokens": 0,
|
| 864 |
-
"output_tokens": 0
|
| 865 |
-
}
|
| 866 |
-
)
|
| 867 |
|
| 868 |
-
async def create_anthropic_kiro_streaming_response(response, model: str):
|
| 869 |
-
"""Convert Kiro streaming response to Anthropic streaming format"""
|
| 870 |
-
print(f"Starting Anthropic Kiro streaming response")
|
| 871 |
-
|
| 872 |
-
async def generate():
|
| 873 |
-
# Send message_start event
|
| 874 |
-
message_start = {
|
| 875 |
-
"type": "message_start",
|
| 876 |
-
"message": {
|
| 877 |
-
"id": f"msg_{uuid.uuid4()}",
|
| 878 |
-
"type": "message",
|
| 879 |
-
"role": "assistant",
|
| 880 |
-
"content": [],
|
| 881 |
-
"model": model,
|
| 882 |
-
"stop_reason": None,
|
| 883 |
-
"stop_sequence": None,
|
| 884 |
-
"usage": {"input_tokens": 0, "output_tokens": 0}
|
| 885 |
-
}
|
| 886 |
-
}
|
| 887 |
-
yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
|
| 888 |
-
|
| 889 |
-
# Send content_block_start event
|
| 890 |
-
content_block_start = {
|
| 891 |
-
"type": "content_block_start",
|
| 892 |
-
"index": 0,
|
| 893 |
-
"content_block": {
|
| 894 |
-
"type": "text",
|
| 895 |
-
"text": ""
|
| 896 |
-
}
|
| 897 |
-
}
|
| 898 |
-
yield f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n"
|
| 899 |
-
|
| 900 |
-
try:
|
| 901 |
-
# Read response content
|
| 902 |
-
async for line in response.aiter_lines():
|
| 903 |
-
if line.startswith('data: '):
|
| 904 |
-
data_str = line[6:] # Remove 'data: ' prefix
|
| 905 |
-
|
| 906 |
-
if data_str == '[DONE]':
|
| 907 |
-
break
|
| 908 |
-
|
| 909 |
-
try:
|
| 910 |
-
chunk_data = json.loads(data_str)
|
| 911 |
-
|
| 912 |
-
# Check for Bedrock-style content_block_delta
|
| 913 |
-
if chunk_data.get('type') == 'content_block_delta':
|
| 914 |
-
if 'delta' in chunk_data and 'text' in chunk_data['delta']:
|
| 915 |
-
chunk_text = chunk_data['delta']['text']
|
| 916 |
-
|
| 917 |
-
# Send content_block_delta event
|
| 918 |
-
content_block_delta = {
|
| 919 |
-
"type": "content_block_delta",
|
| 920 |
-
"index": 0,
|
| 921 |
-
"delta": {
|
| 922 |
-
"type": "text_delta",
|
| 923 |
-
"text": chunk_text
|
| 924 |
-
}
|
| 925 |
-
}
|
| 926 |
-
yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
|
| 927 |
-
|
| 928 |
-
# Handle other streaming formats
|
| 929 |
-
elif 'content' in chunk_data:
|
| 930 |
-
chunk_text = chunk_data['content']
|
| 931 |
-
|
| 932 |
-
# Send content_block_delta event
|
| 933 |
-
content_block_delta = {
|
| 934 |
-
"type": "content_block_delta",
|
| 935 |
-
"index": 0,
|
| 936 |
-
"delta": {
|
| 937 |
-
"type": "text_delta",
|
| 938 |
-
"text": chunk_text
|
| 939 |
-
}
|
| 940 |
-
}
|
| 941 |
-
yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
|
| 942 |
-
|
| 943 |
-
except json.JSONDecodeError:
|
| 944 |
-
# Handle non-JSON lines
|
| 945 |
-
continue
|
| 946 |
-
|
| 947 |
-
except Exception as e:
|
| 948 |
-
print(f"Error in Anthropic Kiro streaming: {e}")
|
| 949 |
-
import traceback
|
| 950 |
-
traceback.print_exc()
|
| 951 |
-
|
| 952 |
-
# Send error as content
|
| 953 |
-
error_delta = {
|
| 954 |
-
"type": "content_block_delta",
|
| 955 |
-
"index": 0,
|
| 956 |
-
"delta": {
|
| 957 |
-
"type": "text_delta",
|
| 958 |
-
"text": f"Error: {str(e)}"
|
| 959 |
-
}
|
| 960 |
-
}
|
| 961 |
-
yield f"event: content_block_delta\ndata: {json.dumps(error_delta)}\n\n"
|
| 962 |
-
|
| 963 |
# Send content_block_stop event
|
| 964 |
content_block_stop = {
|
| 965 |
"type": "content_block_stop",
|
| 966 |
"index": 0
|
| 967 |
}
|
| 968 |
yield f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n"
|
| 969 |
-
|
| 970 |
# Send message_stop event
|
| 971 |
message_stop = {
|
| 972 |
"type": "message_stop"
|
| 973 |
}
|
| 974 |
yield f"event: message_stop\ndata: {json.dumps(message_stop)}\n\n"
|
| 975 |
-
|
| 976 |
return StreamingResponse(generate(), media_type="text/event-stream")
|
| 977 |
|
|
|
|
| 978 |
# API endpoints
|
| 979 |
@app.post("/v1/messages")
|
| 980 |
-
async def create_messages(request: AnthropicMessagesRequest
|
| 981 |
-
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
api_key_header = http_request.headers.get("x-api-key")
|
| 985 |
-
|
| 986 |
-
if auth_header and auth_header.startswith("Bearer "):
|
| 987 |
-
auth_token = auth_header[7:]
|
| 988 |
-
elif api_key_header:
|
| 989 |
-
auth_token = api_key_header
|
| 990 |
-
|
| 991 |
-
if not auth_token or auth_token != API_KEY:
|
| 992 |
-
raise HTTPException(status_code=401, detail="Invalid API key")
|
| 993 |
-
|
| 994 |
-
# Support both claude-sonnet-4-20250514 and claude-opus-4-20250514 for compatibility
|
| 995 |
-
supported_models = [MODEL_NAME, "claude-opus-4-20250514"]
|
| 996 |
-
if request.model not in supported_models:
|
| 997 |
-
raise HTTPException(status_code=400, detail=f"Only {', '.join(supported_models)} are supported")
|
| 998 |
-
|
| 999 |
-
# Log headers for debugging
|
| 1000 |
-
anthropic_version = http_request.headers.get("anthropic-version")
|
| 1001 |
-
if anthropic_version:
|
| 1002 |
-
print(f"Anthropic version header: {anthropic_version}")
|
| 1003 |
-
|
| 1004 |
-
# Log URL query parameters for debugging (but ignore them in processing)
|
| 1005 |
-
if http_request.query_params:
|
| 1006 |
-
print(f"Ignoring query parameters: {dict(http_request.query_params)}")
|
| 1007 |
-
|
| 1008 |
# Convert Anthropic format to internal ChatMessage format
|
| 1009 |
chat_messages = anthropic_to_chat_messages(request)
|
| 1010 |
-
|
| 1011 |
# Call the Kiro API
|
| 1012 |
-
response = await call_kiro_api(chat_messages,
|
| 1013 |
-
|
| 1014 |
if request.stream:
|
| 1015 |
-
return await
|
| 1016 |
else:
|
| 1017 |
-
return await
|
| 1018 |
|
| 1019 |
|
| 1020 |
# Health check
|
|
@@ -1022,6 +858,8 @@ async def create_messages(request: AnthropicMessagesRequest, http_request: Reque
|
|
| 1022 |
async def health_check():
|
| 1023 |
return {"status": "ok", "service": "ki2api"}
|
| 1024 |
|
|
|
|
| 1025 |
if __name__ == "__main__":
|
| 1026 |
import uvicorn
|
|
|
|
| 1027 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 37 |
type: str = "text"
|
| 38 |
text: str
|
| 39 |
|
| 40 |
+
|
| 41 |
class ChatMessage(BaseModel):
|
| 42 |
role: str
|
| 43 |
content: Union[str, List[ContentPart]]
|
| 44 |
+
|
| 45 |
def get_content_text(self) -> str:
|
| 46 |
"""Extract text content from either string or content parts"""
|
| 47 |
if isinstance(self.content, str):
|
|
|
|
| 58 |
return "".join(text_parts)
|
| 59 |
return str(self.content)
|
| 60 |
|
| 61 |
+
|
| 62 |
# Anthropic Claude format models
|
| 63 |
class AnthropicContentBlock(BaseModel):
|
| 64 |
type: str = "text"
|
| 65 |
text: str
|
| 66 |
|
| 67 |
+
|
| 68 |
class AnthropicMessage(BaseModel):
|
| 69 |
role: str # "user" or "assistant"
|
| 70 |
content: Union[str, List[AnthropicContentBlock]]
|
| 71 |
|
| 72 |
+
|
| 73 |
class AnthropicMessagesRequest(BaseModel):
|
| 74 |
model: str
|
| 75 |
max_tokens: int
|
|
|
|
| 78 |
temperature: Optional[float] = 0.7
|
| 79 |
stream: Optional[bool] = False
|
| 80 |
|
| 81 |
+
|
| 82 |
class AnthropicMessagesResponse(BaseModel):
|
| 83 |
id: str = Field(default_factory=lambda: f"msg_{uuid.uuid4()}")
|
| 84 |
type: str = "message"
|
|
|
|
| 89 |
stop_sequence: Optional[str] = None
|
| 90 |
usage: Dict[str, int]
|
| 91 |
|
| 92 |
+
|
| 93 |
class AnthropicStreamResponse(BaseModel):
|
| 94 |
type: str
|
| 95 |
index: Optional[int] = None
|
|
|
|
| 98 |
message: Optional[Dict[str, Any]] = None
|
| 99 |
usage: Optional[Dict[str, int]] = None
|
| 100 |
|
| 101 |
+
|
| 102 |
class ChatCompletionRequest(BaseModel):
|
| 103 |
model: str
|
| 104 |
messages: List[ChatMessage]
|
|
|
|
| 106 |
max_tokens: Optional[int] = 4000
|
| 107 |
stream: Optional[bool] = False
|
| 108 |
|
| 109 |
+
|
| 110 |
class ChatCompletionResponse(BaseModel):
|
| 111 |
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}")
|
| 112 |
object: str = "chat.completion"
|
|
|
|
| 115 |
choices: List[Dict[str, Any]]
|
| 116 |
usage: Dict[str, int]
|
| 117 |
|
| 118 |
+
|
| 119 |
class ChatCompletionStreamResponse(BaseModel):
|
| 120 |
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}")
|
| 121 |
object: str = "chat.completion.chunk"
|
|
|
|
| 123 |
model: str
|
| 124 |
choices: List[Dict[str, Any]]
|
| 125 |
|
| 126 |
+
|
| 127 |
# Token management
|
| 128 |
class TokenManager:
|
| 129 |
def __init__(self):
|
|
|
|
| 134 |
async def refresh_tokens(self):
|
| 135 |
if not self.refresh_token:
|
| 136 |
return None
|
| 137 |
+
|
| 138 |
try:
|
| 139 |
async with httpx.AsyncClient() as client:
|
| 140 |
response = await client.post(
|
|
|
|
| 143 |
timeout=30
|
| 144 |
)
|
| 145 |
response.raise_for_status()
|
| 146 |
+
|
| 147 |
data = response.json()
|
| 148 |
self.access_token = data.get("accessToken")
|
| 149 |
return self.access_token
|
|
|
|
| 154 |
def get_token(self):
|
| 155 |
return self.access_token
|
| 156 |
|
| 157 |
+
|
| 158 |
token_manager = TokenManager()
|
| 159 |
|
| 160 |
+
|
| 161 |
+
# Build CodeWhisperer request
|
| 162 |
+
def build_codewhisperer_request(messages: List[ChatMessage]):
|
| 163 |
+
conversation_id = str(uuid.uuid4())
|
| 164 |
+
|
| 165 |
# Extract system prompt and user messages
|
| 166 |
system_prompt = ""
|
| 167 |
+
user_messages = []
|
| 168 |
+
|
| 169 |
for msg in messages:
|
| 170 |
if msg.role == "system":
|
| 171 |
system_prompt = msg.get_content_text()
|
| 172 |
else:
|
| 173 |
+
user_messages.append(msg)
|
| 174 |
+
|
| 175 |
+
if not user_messages:
|
|
|
|
|
|
|
|
|
|
| 176 |
raise HTTPException(status_code=400, detail="No user messages found")
|
| 177 |
+
|
| 178 |
+
# Build history
|
| 179 |
+
history = []
|
| 180 |
+
for i in range(0, len(user_messages) - 1, 2):
|
| 181 |
+
if i + 1 < len(user_messages):
|
| 182 |
+
history.append({
|
| 183 |
+
"userInputMessage": {
|
| 184 |
+
"content": user_messages[i].get_content_text(),
|
| 185 |
+
"modelId": CODEWHISPERER_MODEL,
|
| 186 |
+
"origin": "AI_EDITOR"
|
| 187 |
+
}
|
| 188 |
+
})
|
| 189 |
+
history.append({
|
| 190 |
+
"assistantResponseMessage": {
|
| 191 |
+
"content": user_messages[i + 1].get_content_text(),
|
| 192 |
+
"toolUses": []
|
| 193 |
+
}
|
| 194 |
+
})
|
| 195 |
+
|
| 196 |
+
# Build current message
|
| 197 |
+
current_message = user_messages[-1]
|
| 198 |
+
content = current_message.get_content_text()
|
| 199 |
if system_prompt:
|
| 200 |
+
content = f"{system_prompt}\n\n{content}"
|
| 201 |
+
|
|
|
|
| 202 |
return {
|
| 203 |
"profileArn": PROFILE_ARN,
|
| 204 |
"conversationState": {
|
| 205 |
"chatTriggerType": "MANUAL",
|
| 206 |
+
"conversationId": conversation_id,
|
| 207 |
"currentMessage": {
|
| 208 |
"userInputMessage": {
|
| 209 |
+
"content": content,
|
| 210 |
"modelId": CODEWHISPERER_MODEL,
|
| 211 |
"origin": "AI_EDITOR",
|
| 212 |
"userInputMessageContext": {}
|
| 213 |
}
|
| 214 |
},
|
| 215 |
+
"history": history
|
| 216 |
}
|
| 217 |
}
|
| 218 |
|
| 219 |
+
|
| 220 |
# Convert Anthropic messages to internal ChatMessage format
|
| 221 |
def anthropic_to_chat_messages(anthropic_request: AnthropicMessagesRequest) -> List[ChatMessage]:
|
| 222 |
"""Convert Anthropic messages format to internal ChatMessage format"""
|
| 223 |
chat_messages = []
|
| 224 |
+
|
| 225 |
# Add system message if present
|
| 226 |
if anthropic_request.system:
|
| 227 |
chat_messages.append(ChatMessage(role="system", content=anthropic_request.system))
|
| 228 |
+
|
| 229 |
# Convert Anthropic messages
|
| 230 |
for msg in anthropic_request.messages:
|
| 231 |
if isinstance(msg.content, str):
|
|
|
|
| 237 |
if block.type == "text":
|
| 238 |
text_parts.append(block.text)
|
| 239 |
content = "".join(text_parts)
|
| 240 |
+
|
| 241 |
chat_messages.append(ChatMessage(role=msg.role, content=content))
|
| 242 |
+
|
| 243 |
return chat_messages
|
| 244 |
|
| 245 |
+
|
| 246 |
# AWS Event Stream Parser
|
| 247 |
class AWSStreamParser:
|
| 248 |
@staticmethod
|
|
|
|
| 259 |
raw_str = raw_data.decode('utf-8', errors='ignore')
|
| 260 |
else:
|
| 261 |
raw_str = str(raw_data)
|
| 262 |
+
|
| 263 |
# Look for JSON content in the response
|
| 264 |
# AWS event stream contains binary headers followed by JSON payloads
|
| 265 |
json_pattern = r'\{[^{}]*"content"[^{}]*\}'
|
| 266 |
matches = re.findall(json_pattern, raw_str, re.DOTALL)
|
| 267 |
+
|
| 268 |
if matches:
|
| 269 |
content_parts = []
|
| 270 |
for match in matches:
|
|
|
|
| 276 |
continue
|
| 277 |
if content_parts:
|
| 278 |
return {"content": ''.join(content_parts)}
|
| 279 |
+
|
| 280 |
# Try to extract from AWS event stream format
|
| 281 |
# Look for :content-type and extract JSON after headers
|
| 282 |
content_type_pattern = r':content-type[^:]*:[^:]*:[^:]*:(\{.*\})'
|
|
|
|
| 289 |
return {"content": data['content']}
|
| 290 |
except:
|
| 291 |
continue
|
| 292 |
+
|
| 293 |
# Try to extract any JSON objects
|
| 294 |
json_objects = re.findall(r'\{[^{}]*\}', raw_str)
|
| 295 |
for obj in json_objects:
|
|
|
|
| 299 |
return {"content": data['content']}
|
| 300 |
except:
|
| 301 |
continue
|
| 302 |
+
|
| 303 |
# Final fallback: extract readable text
|
| 304 |
readable_text = re.sub(r'[^\x20-\x7E\n\r\t]', '', raw_str)
|
| 305 |
readable_text = re.sub(r':event-type[^:]*:[^:]*:[^:]*:', '', readable_text)
|
| 306 |
+
|
| 307 |
# Look for Chinese characters or meaningful content
|
| 308 |
chinese_pattern = r'[\u4e00-\u9fff]+'
|
| 309 |
chinese_matches = re.findall(chinese_pattern, raw_str)
|
| 310 |
if chinese_matches:
|
| 311 |
return {"content": ''.join(chinese_matches)}
|
| 312 |
+
|
| 313 |
return {"content": readable_text.strip() or "No content found in response"}
|
| 314 |
+
|
| 315 |
except Exception as e:
|
| 316 |
return {"content": f"Error parsing response: {str(e)}"}
|
| 317 |
|
| 318 |
|
| 319 |
+
# Make API call to Kiro/CodeWhisperer
|
| 320 |
+
async def call_kiro_api(messages: List[ChatMessage], stream: bool = False):
|
| 321 |
token = token_manager.get_token()
|
| 322 |
if not token:
|
| 323 |
raise HTTPException(status_code=401, detail="No access token available")
|
| 324 |
+
|
| 325 |
+
request_data = build_codewhisperer_request(messages)
|
| 326 |
+
|
| 327 |
headers = {
|
| 328 |
"Authorization": f"Bearer {token}",
|
| 329 |
"Content-Type": "application/json",
|
| 330 |
"Accept": "text/event-stream" if stream else "application/json"
|
| 331 |
}
|
| 332 |
+
|
| 333 |
try:
|
| 334 |
async with httpx.AsyncClient() as client:
|
| 335 |
response = await client.post(
|
|
|
|
| 338 |
json=request_data,
|
| 339 |
timeout=120
|
| 340 |
)
|
| 341 |
+
|
| 342 |
if response.status_code == 403:
|
| 343 |
# Try to refresh token
|
| 344 |
new_token = await token_manager.refresh_tokens()
|
|
|
|
| 350 |
json=request_data,
|
| 351 |
timeout=120
|
| 352 |
)
|
| 353 |
+
|
| 354 |
response.raise_for_status()
|
| 355 |
return response
|
| 356 |
+
|
| 357 |
except Exception as e:
|
| 358 |
import traceback
|
| 359 |
+
print(f"API call failed: {str(e)}")
|
| 360 |
print(traceback.format_exc())
|
| 361 |
+
raise HTTPException(status_code=503, detail=f"API call failed: {str(e)}")
|
| 362 |
+
|
| 363 |
|
| 364 |
# API endpoints
|
| 365 |
@app.get("/v1/models")
|
|
|
|
| 376 |
]
|
| 377 |
}
|
| 378 |
|
| 379 |
+
|
| 380 |
@app.post("/v1/chat/completions")
|
| 381 |
async def create_chat_completion(request: ChatCompletionRequest):
|
| 382 |
if request.model != MODEL_NAME:
|
| 383 |
raise HTTPException(status_code=400, detail=f"Only {MODEL_NAME} is supported")
|
| 384 |
+
|
| 385 |
if request.stream:
|
| 386 |
return await create_streaming_response(request)
|
| 387 |
else:
|
| 388 |
return await create_non_streaming_response(request)
|
| 389 |
|
| 390 |
+
|
| 391 |
async def create_non_streaming_response(request: ChatCompletionRequest):
|
| 392 |
+
response = await call_kiro_api(request.messages, stream=False)
|
| 393 |
+
return await create_conversion_response(response)
|
| 394 |
|
| 395 |
+
|
| 396 |
+
async def create_conversion_response(response):
|
| 397 |
+
"""Convert AWS event stream to OpenAI format"""
|
| 398 |
try:
|
| 399 |
print(f"Response status: {response.status_code}")
|
| 400 |
+
print(f"Response headers: {dict(response.headers)}")
|
| 401 |
+
|
| 402 |
+
# Get response content as bytes to handle binary data
|
| 403 |
+
response_bytes = response.content
|
| 404 |
+
print(f"Response content type: {type(response_bytes)}")
|
| 405 |
+
print(f"Response content length: {len(response_bytes)}")
|
| 406 |
+
|
| 407 |
+
# Try to parse as JSON first
|
| 408 |
try:
|
| 409 |
response_data = response.json()
|
| 410 |
print(f"Successfully parsed JSON response")
|
|
|
|
|
|
|
| 411 |
if isinstance(response_data, dict) and 'content' in response_data:
|
| 412 |
+
response_text = response_data['content']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
else:
|
| 414 |
response_text = str(response_data)
|
|
|
|
| 415 |
except Exception as e:
|
| 416 |
print(f"JSON parsing failed: {e}")
|
| 417 |
+
# Handle event stream format using AWS parser
|
| 418 |
+
parsed_data = AWSStreamParser.parse_event_stream_to_json(response_bytes)
|
| 419 |
+
response_text = parsed_data.get('content', "")
|
| 420 |
+
print(f"Parsed content length: {len(response_text)}")
|
| 421 |
+
|
| 422 |
+
if not response_text or response_text == "No content found in response":
|
| 423 |
+
# Last resort: try to decode as text
|
| 424 |
+
try:
|
| 425 |
+
response_text = response_bytes.decode('utf-8', errors='ignore')
|
| 426 |
+
print(f"Fallback text decode length: {len(response_text)}")
|
| 427 |
+
except Exception as decode_error:
|
| 428 |
+
response_text = f"Unable to decode response: {str(decode_error)}"
|
| 429 |
+
|
| 430 |
print(f"Final response text: {response_text[:200]}...")
|
| 431 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
except Exception as e:
|
| 433 |
+
print(f"Error in conversion: {e}")
|
| 434 |
import traceback
|
| 435 |
traceback.print_exc()
|
| 436 |
response_text = f"Error processing response: {str(e)}"
|
| 437 |
+
|
| 438 |
+
return ChatCompletionResponse(
|
| 439 |
+
model=MODEL_NAME,
|
| 440 |
+
choices=[{
|
| 441 |
+
"index": 0,
|
| 442 |
+
"message": {
|
| 443 |
+
"role": "assistant",
|
| 444 |
+
"content": response_text
|
| 445 |
+
},
|
| 446 |
+
"finish_reason": "stop"
|
| 447 |
+
}],
|
| 448 |
+
usage={
|
| 449 |
+
"prompt_tokens": 0,
|
| 450 |
+
"completion_tokens": 0,
|
| 451 |
+
"total_tokens": 0
|
| 452 |
+
}
|
| 453 |
+
)
|
| 454 |
|
| 455 |
|
| 456 |
async def create_streaming_response(request: ChatCompletionRequest):
|
| 457 |
+
response = await call_kiro_api(request.messages, stream=True)
|
| 458 |
+
return await create_streaming_conversion_response(response)
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
async def create_streaming_conversion_response(response):
|
| 462 |
+
"""Convert AWS event stream to OpenAI streaming format"""
|
| 463 |
+
print(f"Starting streaming response, status: {response.status_code}")
|
| 464 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
async def generate():
|
| 466 |
# Send initial response
|
| 467 |
initial_chunk = {
|
|
|
|
| 475 |
'finish_reason': None
|
| 476 |
}]
|
| 477 |
}
|
| 478 |
+
print(f"Sending initial chunk: {initial_chunk}")
|
| 479 |
yield f"data: {json.dumps(initial_chunk)}\n\n"
|
| 480 |
+
|
| 481 |
+
# Read response and stream content
|
| 482 |
+
content = ""
|
| 483 |
+
chunk_count = 0
|
| 484 |
+
|
| 485 |
+
# Read the entire response as bytes first
|
| 486 |
+
response_bytes = response.content
|
| 487 |
+
print(f"Streaming response bytes length: {len(response_bytes)}")
|
| 488 |
+
|
| 489 |
+
# Parse the AWS event stream
|
| 490 |
try:
|
| 491 |
+
# Convert bytes to string
|
| 492 |
+
if isinstance(response_bytes, bytes):
|
| 493 |
+
response_str = response_bytes.decode('utf-8', errors='ignore')
|
| 494 |
+
else:
|
| 495 |
+
response_str = str(response_bytes)
|
| 496 |
+
|
| 497 |
+
# Look for content in the AWS event stream
|
| 498 |
+
# AWS uses a specific format with binary headers and JSON payloads
|
| 499 |
+
|
| 500 |
+
# Method 1: Look for JSON objects with content
|
| 501 |
+
json_pattern = r'\{[^{}]*"content"[^{}]*\}'
|
| 502 |
+
json_matches = re.findall(json_pattern, response_str, re.DOTALL)
|
| 503 |
+
|
| 504 |
+
if json_matches:
|
| 505 |
+
for match in json_matches:
|
| 506 |
try:
|
| 507 |
+
data = json.loads(match)
|
| 508 |
+
if 'content' in data and data['content']:
|
| 509 |
+
chunk_text = data['content']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 510 |
content += chunk_text
|
| 511 |
+
chunk_count += 1
|
| 512 |
+
|
| 513 |
chunk = {
|
| 514 |
'id': f'chatcmpl-{uuid.uuid4()}',
|
| 515 |
'object': 'chat.completion.chunk',
|
|
|
|
| 521 |
'finish_reason': None
|
| 522 |
}]
|
| 523 |
}
|
| 524 |
+
print(f"Streaming JSON chunk {chunk_count}: {chunk_text[:50]}...")
|
| 525 |
yield f"data: {json.dumps(chunk)}\n\n"
|
| 526 |
+
|
| 527 |
+
# Small delay to simulate streaming
|
| 528 |
+
import asyncio
|
| 529 |
+
await asyncio.sleep(0.01)
|
| 530 |
+
except Exception as e:
|
| 531 |
+
print(f"Error streaming JSON chunk: {e}")
|
| 532 |
continue
|
| 533 |
+
else:
|
| 534 |
+
# Method 2: Try to extract readable text
|
| 535 |
+
readable_text = re.sub(r'[^\x20-\x7E\n\r\t\u4e00-\u9fff]', '', response_str)
|
| 536 |
+
|
| 537 |
+
# Look for Chinese text specifically
|
| 538 |
+
chinese_pattern = r'[\u4e00-\u9fff][\u4e00-\u9fff\s\.,!?]*[\u4e00-\u9fff]'
|
| 539 |
+
chinese_matches = re.findall(chinese_pattern, response_str)
|
| 540 |
+
|
| 541 |
+
if chinese_matches:
|
| 542 |
+
combined_text = ''.join(chinese_matches)
|
| 543 |
+
# Split into chunks for streaming
|
| 544 |
+
chunk_size = max(1, len(combined_text) // 10)
|
| 545 |
+
for i in range(0, len(combined_text), chunk_size):
|
| 546 |
+
chunk_text = combined_text[i:i + chunk_size]
|
| 547 |
+
content += chunk_text
|
| 548 |
+
chunk_count += 1
|
| 549 |
+
|
| 550 |
+
chunk = {
|
| 551 |
+
'id': f'chatcmpl-{uuid.uuid4()}',
|
| 552 |
+
'object': 'chat.completion.chunk',
|
| 553 |
+
'created': int(time.time()),
|
| 554 |
+
'model': MODEL_NAME,
|
| 555 |
+
'choices': [{
|
| 556 |
+
'index': 0,
|
| 557 |
+
'delta': {'content': chunk_text},
|
| 558 |
+
'finish_reason': None
|
| 559 |
+
}]
|
| 560 |
+
}
|
| 561 |
+
print(f"Streaming Chinese text chunk {chunk_count}: {chunk_text[:50]}...")
|
| 562 |
+
yield f"data: {json.dumps(chunk)}\n\n"
|
| 563 |
+
|
| 564 |
+
import asyncio
|
| 565 |
+
await asyncio.sleep(0.05)
|
| 566 |
+
else:
|
| 567 |
+
# Method 3: Use the entire readable text
|
| 568 |
+
if readable_text.strip():
|
| 569 |
+
chunk = {
|
| 570 |
+
'id': f'chatcmpl-{uuid.uuid4()}',
|
| 571 |
+
'object': 'chat.completion.chunk',
|
| 572 |
+
'created': int(time.time()),
|
| 573 |
+
'model': MODEL_NAME,
|
| 574 |
+
'choices': [{
|
| 575 |
+
'index': 0,
|
| 576 |
+
'delta': {'content': readable_text.strip()},
|
| 577 |
+
'finish_reason': None
|
| 578 |
+
}]
|
| 579 |
+
}
|
| 580 |
+
print(f"Streaming fallback text: {readable_text[:100]}...")
|
| 581 |
+
yield f"data: {json.dumps(chunk)}\n\n"
|
| 582 |
+
content = readable_text.strip()
|
| 583 |
+
|
| 584 |
except Exception as e:
|
| 585 |
+
print(f"Error in streaming generation: {e}")
|
| 586 |
import traceback
|
| 587 |
traceback.print_exc()
|
| 588 |
+
|
| 589 |
+
# Send error as content
|
| 590 |
error_chunk = {
|
| 591 |
'id': f'chatcmpl-{uuid.uuid4()}',
|
| 592 |
'object': 'chat.completion.chunk',
|
|
|
|
| 599 |
}]
|
| 600 |
}
|
| 601 |
yield f"data: {json.dumps(error_chunk)}\n\n"
|
| 602 |
+
|
| 603 |
+
print(f"Streaming complete, total chunks: {chunk_count}, content length: {len(content)}")
|
| 604 |
+
|
| 605 |
# Send final response
|
| 606 |
final_chunk = {
|
| 607 |
'id': f'chatcmpl-{uuid.uuid4()}',
|
|
|
|
| 615 |
}]
|
| 616 |
}
|
| 617 |
yield f"data: {json.dumps(final_chunk)}\n\n"
|
| 618 |
+
|
| 619 |
yield "data: [DONE]\n\n"
|
| 620 |
+
|
| 621 |
return StreamingResponse(generate(), media_type="text/event-stream")
|
| 622 |
|
| 623 |
+
|
| 624 |
# Anthropic response conversion functions
|
| 625 |
async def create_anthropic_response(response, model: str):
|
| 626 |
"""Convert AWS event stream to Anthropic Messages format"""
|
| 627 |
try:
|
| 628 |
print(f"Response status: {response.status_code}")
|
| 629 |
print(f"Response headers: {dict(response.headers)}")
|
| 630 |
+
|
| 631 |
# Get response content as bytes to handle binary data
|
| 632 |
response_bytes = response.content
|
| 633 |
print(f"Response content type: {type(response_bytes)}")
|
| 634 |
print(f"Response content length: {len(response_bytes)}")
|
| 635 |
+
|
| 636 |
# Try to parse as JSON first
|
| 637 |
try:
|
| 638 |
response_data = response.json()
|
|
|
|
| 647 |
parsed_data = AWSStreamParser.parse_event_stream_to_json(response_bytes)
|
| 648 |
response_text = parsed_data.get('content', "")
|
| 649 |
print(f"Parsed content length: {len(response_text)}")
|
| 650 |
+
|
| 651 |
if not response_text or response_text == "No content found in response":
|
| 652 |
# Last resort: try to decode as text
|
| 653 |
try:
|
|
|
|
| 655 |
print(f"Fallback text decode length: {len(response_text)}")
|
| 656 |
except Exception as decode_error:
|
| 657 |
response_text = f"Unable to decode response: {str(decode_error)}"
|
| 658 |
+
|
| 659 |
print(f"Final response text: {response_text[:200]}...")
|
| 660 |
+
|
| 661 |
except Exception as e:
|
| 662 |
print(f"Error in conversion: {e}")
|
| 663 |
import traceback
|
| 664 |
traceback.print_exc()
|
| 665 |
response_text = f"Error processing response: {str(e)}"
|
| 666 |
+
|
| 667 |
return AnthropicMessagesResponse(
|
| 668 |
model=model,
|
| 669 |
content=[AnthropicContentBlock(type="text", text=response_text)],
|
|
|
|
| 673 |
}
|
| 674 |
)
|
| 675 |
|
| 676 |
+
|
| 677 |
async def create_anthropic_streaming_response(response, model: str):
|
| 678 |
"""Convert AWS event stream to Anthropic streaming format"""
|
| 679 |
print(f"Starting Anthropic streaming response, status: {response.status_code}")
|
| 680 |
+
|
| 681 |
async def generate():
|
| 682 |
# Send message_start event
|
| 683 |
message_start = {
|
|
|
|
| 695 |
}
|
| 696 |
print(f"Sending message_start: {message_start}")
|
| 697 |
yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
|
| 698 |
+
|
| 699 |
# Send content_block_start event
|
| 700 |
content_block_start = {
|
| 701 |
"type": "content_block_start",
|
|
|
|
| 706 |
}
|
| 707 |
}
|
| 708 |
yield f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n"
|
| 709 |
+
|
| 710 |
# Read response and stream content
|
| 711 |
content = ""
|
| 712 |
chunk_count = 0
|
| 713 |
+
|
| 714 |
# Read the entire response as bytes first
|
| 715 |
response_bytes = response.content
|
| 716 |
print(f"Anthropic streaming response bytes length: {len(response_bytes)}")
|
| 717 |
+
|
| 718 |
# Parse the AWS event stream
|
| 719 |
try:
|
| 720 |
# Convert bytes to string
|
|
|
|
| 722 |
response_str = response_bytes.decode('utf-8', errors='ignore')
|
| 723 |
else:
|
| 724 |
response_str = str(response_bytes)
|
| 725 |
+
|
| 726 |
# Look for content in the AWS event stream
|
| 727 |
# Method 1: Look for JSON objects with content
|
| 728 |
json_pattern = r'\{[^{}]*"content"[^{}]*\}'
|
| 729 |
json_matches = re.findall(json_pattern, response_str, re.DOTALL)
|
| 730 |
+
|
| 731 |
if json_matches:
|
| 732 |
for match in json_matches:
|
| 733 |
try:
|
|
|
|
| 736 |
chunk_text = data['content']
|
| 737 |
content += chunk_text
|
| 738 |
chunk_count += 1
|
| 739 |
+
|
| 740 |
# Send content_block_delta event
|
| 741 |
content_block_delta = {
|
| 742 |
"type": "content_block_delta",
|
|
|
|
| 748 |
}
|
| 749 |
print(f"Streaming Anthropic JSON chunk {chunk_count}: {chunk_text[:50]}...")
|
| 750 |
yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
|
| 751 |
+
|
| 752 |
# Small delay to simulate streaming
|
| 753 |
import asyncio
|
| 754 |
await asyncio.sleep(0.01)
|
|
|
|
| 758 |
else:
|
| 759 |
# Method 2: Try to extract readable text
|
| 760 |
readable_text = re.sub(r'[^\x20-\x7E\n\r\t\u4e00-\u9fff]', '', response_str)
|
| 761 |
+
|
| 762 |
# Look for Chinese text specifically
|
| 763 |
chinese_pattern = r'[\u4e00-\u9fff][\u4e00-\u9fff\s\.,!?]*[\u4e00-\u9fff]'
|
| 764 |
chinese_matches = re.findall(chinese_pattern, response_str)
|
| 765 |
+
|
| 766 |
if chinese_matches:
|
| 767 |
combined_text = ''.join(chinese_matches)
|
| 768 |
# Split into chunks for streaming
|
| 769 |
chunk_size = max(1, len(combined_text) // 10)
|
| 770 |
for i in range(0, len(combined_text), chunk_size):
|
| 771 |
+
chunk_text = combined_text[i:i + chunk_size]
|
| 772 |
content += chunk_text
|
| 773 |
chunk_count += 1
|
| 774 |
+
|
| 775 |
# Send content_block_delta event
|
| 776 |
content_block_delta = {
|
| 777 |
"type": "content_block_delta",
|
|
|
|
| 783 |
}
|
| 784 |
print(f"Streaming Anthropic Chinese text chunk {chunk_count}: {chunk_text[:50]}...")
|
| 785 |
yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
|
| 786 |
+
|
| 787 |
import asyncio
|
| 788 |
await asyncio.sleep(0.05)
|
| 789 |
else:
|
|
|
|
| 800 |
print(f"Streaming Anthropic fallback text: {readable_text[:100]}...")
|
| 801 |
yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
|
| 802 |
content = readable_text.strip()
|
| 803 |
+
|
| 804 |
except Exception as e:
|
| 805 |
print(f"Error in Anthropic streaming generation: {e}")
|
| 806 |
import traceback
|
| 807 |
traceback.print_exc()
|
| 808 |
+
|
| 809 |
# Send error as content
|
| 810 |
error_delta = {
|
| 811 |
"type": "content_block_delta",
|
|
|
|
| 816 |
}
|
| 817 |
}
|
| 818 |
yield f"event: content_block_delta\ndata: {json.dumps(error_delta)}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 819 |
|
| 820 |
+
print(f"Anthropic streaming complete, total chunks: {chunk_count}, content length: {len(content)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 821 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 822 |
# Send content_block_stop event
|
| 823 |
content_block_stop = {
|
| 824 |
"type": "content_block_stop",
|
| 825 |
"index": 0
|
| 826 |
}
|
| 827 |
yield f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n"
|
| 828 |
+
|
| 829 |
# Send message_stop event
|
| 830 |
message_stop = {
|
| 831 |
"type": "message_stop"
|
| 832 |
}
|
| 833 |
yield f"event: message_stop\ndata: {json.dumps(message_stop)}\n\n"
|
| 834 |
+
|
| 835 |
return StreamingResponse(generate(), media_type="text/event-stream")
|
| 836 |
|
| 837 |
+
|
| 838 |
# API endpoints
|
| 839 |
@app.post("/v1/messages")
|
| 840 |
+
async def create_messages(request: AnthropicMessagesRequest):
|
| 841 |
+
if request.model != MODEL_NAME:
|
| 842 |
+
raise HTTPException(status_code=400, detail=f"Only {MODEL_NAME} is supported")
|
| 843 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 844 |
# Convert Anthropic format to internal ChatMessage format
|
| 845 |
chat_messages = anthropic_to_chat_messages(request)
|
| 846 |
+
|
| 847 |
# Call the Kiro API
|
| 848 |
+
response = await call_kiro_api(chat_messages, stream=request.stream)
|
| 849 |
+
|
| 850 |
if request.stream:
|
| 851 |
+
return await create_anthropic_streaming_response(response, request.model)
|
| 852 |
else:
|
| 853 |
+
return await create_anthropic_response(response, request.model)
|
| 854 |
|
| 855 |
|
| 856 |
# Health check
|
|
|
|
| 858 |
async def health_check():
|
| 859 |
return {"status": "ok", "service": "ki2api"}
|
| 860 |
|
| 861 |
+
|
| 862 |
if __name__ == "__main__":
|
| 863 |
import uvicorn
|
| 864 |
+
|
| 865 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|