jardan commited on
Commit
118abb7
·
verified ·
1 Parent(s): 6d10c5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +269 -431
app.py CHANGED
@@ -37,10 +37,11 @@ class ContentPart(BaseModel):
37
  type: str = "text"
38
  text: str
39
 
 
40
  class ChatMessage(BaseModel):
41
  role: str
42
  content: Union[str, List[ContentPart]]
43
-
44
  def get_content_text(self) -> str:
45
  """Extract text content from either string or content parts"""
46
  if isinstance(self.content, str):
@@ -57,15 +58,18 @@ class ChatMessage(BaseModel):
57
  return "".join(text_parts)
58
  return str(self.content)
59
 
 
60
  # Anthropic Claude format models
61
  class AnthropicContentBlock(BaseModel):
62
  type: str = "text"
63
  text: str
64
 
 
65
  class AnthropicMessage(BaseModel):
66
  role: str # "user" or "assistant"
67
  content: Union[str, List[AnthropicContentBlock]]
68
 
 
69
  class AnthropicMessagesRequest(BaseModel):
70
  model: str
71
  max_tokens: int
@@ -74,6 +78,7 @@ class AnthropicMessagesRequest(BaseModel):
74
  temperature: Optional[float] = 0.7
75
  stream: Optional[bool] = False
76
 
 
77
  class AnthropicMessagesResponse(BaseModel):
78
  id: str = Field(default_factory=lambda: f"msg_{uuid.uuid4()}")
79
  type: str = "message"
@@ -84,6 +89,7 @@ class AnthropicMessagesResponse(BaseModel):
84
  stop_sequence: Optional[str] = None
85
  usage: Dict[str, int]
86
 
 
87
  class AnthropicStreamResponse(BaseModel):
88
  type: str
89
  index: Optional[int] = None
@@ -92,6 +98,7 @@ class AnthropicStreamResponse(BaseModel):
92
  message: Optional[Dict[str, Any]] = None
93
  usage: Optional[Dict[str, int]] = None
94
 
 
95
  class ChatCompletionRequest(BaseModel):
96
  model: str
97
  messages: List[ChatMessage]
@@ -99,6 +106,7 @@ class ChatCompletionRequest(BaseModel):
99
  max_tokens: Optional[int] = 4000
100
  stream: Optional[bool] = False
101
 
 
102
  class ChatCompletionResponse(BaseModel):
103
  id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}")
104
  object: str = "chat.completion"
@@ -107,6 +115,7 @@ class ChatCompletionResponse(BaseModel):
107
  choices: List[Dict[str, Any]]
108
  usage: Dict[str, int]
109
 
 
110
  class ChatCompletionStreamResponse(BaseModel):
111
  id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}")
112
  object: str = "chat.completion.chunk"
@@ -114,6 +123,7 @@ class ChatCompletionStreamResponse(BaseModel):
114
  model: str
115
  choices: List[Dict[str, Any]]
116
 
 
117
  # Token management
118
  class TokenManager:
119
  def __init__(self):
@@ -124,7 +134,7 @@ class TokenManager:
124
  async def refresh_tokens(self):
125
  if not self.refresh_token:
126
  return None
127
-
128
  try:
129
  async with httpx.AsyncClient() as client:
130
  response = await client.post(
@@ -133,7 +143,7 @@ class TokenManager:
133
  timeout=30
134
  )
135
  response.raise_for_status()
136
-
137
  data = response.json()
138
  self.access_token = data.get("accessToken")
139
  return self.access_token
@@ -144,63 +154,78 @@ class TokenManager:
144
  def get_token(self):
145
  return self.access_token
146
 
 
147
  token_manager = TokenManager()
148
 
149
- # Build Bedrock-style request for Kiro API
150
- def build_kiro_bedrock_request(messages: List[ChatMessage], max_tokens: int = 4000):
 
 
 
151
  # Extract system prompt and user messages
152
  system_prompt = ""
153
- chat_messages = []
154
-
155
  for msg in messages:
156
  if msg.role == "system":
157
  system_prompt = msg.get_content_text()
158
  else:
159
- chat_messages.append({
160
- "role": msg.role,
161
- "content": msg.get_content_text()
162
- })
163
-
164
- if not chat_messages:
165
  raise HTTPException(status_code=400, detail="No user messages found")
166
-
167
- # Build Bedrock-style request body
168
- body = {
169
- "max_tokens": max_tokens,
170
- "messages": chat_messages,
171
- "anthropic_version": "bedrock-2023-05-31"
172
- }
173
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  if system_prompt:
175
- body["system"] = system_prompt
176
-
177
- # Wrap in Kiro's expected format
178
  return {
179
  "profileArn": PROFILE_ARN,
180
  "conversationState": {
181
  "chatTriggerType": "MANUAL",
182
- "conversationId": str(uuid.uuid4()),
183
  "currentMessage": {
184
  "userInputMessage": {
185
- "content": json.dumps(body), # Send Bedrock format as content
186
  "modelId": CODEWHISPERER_MODEL,
187
  "origin": "AI_EDITOR",
188
  "userInputMessageContext": {}
189
  }
190
  },
191
- "history": []
192
  }
193
  }
194
 
 
195
  # Convert Anthropic messages to internal ChatMessage format
196
  def anthropic_to_chat_messages(anthropic_request: AnthropicMessagesRequest) -> List[ChatMessage]:
197
  """Convert Anthropic messages format to internal ChatMessage format"""
198
  chat_messages = []
199
-
200
  # Add system message if present
201
  if anthropic_request.system:
202
  chat_messages.append(ChatMessage(role="system", content=anthropic_request.system))
203
-
204
  # Convert Anthropic messages
205
  for msg in anthropic_request.messages:
206
  if isinstance(msg.content, str):
@@ -212,11 +237,12 @@ def anthropic_to_chat_messages(anthropic_request: AnthropicMessagesRequest) -> L
212
  if block.type == "text":
213
  text_parts.append(block.text)
214
  content = "".join(text_parts)
215
-
216
  chat_messages.append(ChatMessage(role=msg.role, content=content))
217
-
218
  return chat_messages
219
 
 
220
  # AWS Event Stream Parser
221
  class AWSStreamParser:
222
  @staticmethod
@@ -233,12 +259,12 @@ class AWSStreamParser:
233
  raw_str = raw_data.decode('utf-8', errors='ignore')
234
  else:
235
  raw_str = str(raw_data)
236
-
237
  # Look for JSON content in the response
238
  # AWS event stream contains binary headers followed by JSON payloads
239
  json_pattern = r'\{[^{}]*"content"[^{}]*\}'
240
  matches = re.findall(json_pattern, raw_str, re.DOTALL)
241
-
242
  if matches:
243
  content_parts = []
244
  for match in matches:
@@ -250,7 +276,7 @@ class AWSStreamParser:
250
  continue
251
  if content_parts:
252
  return {"content": ''.join(content_parts)}
253
-
254
  # Try to extract from AWS event stream format
255
  # Look for :content-type and extract JSON after headers
256
  content_type_pattern = r':content-type[^:]*:[^:]*:[^:]*:(\{.*\})'
@@ -263,7 +289,7 @@ class AWSStreamParser:
263
  return {"content": data['content']}
264
  except:
265
  continue
266
-
267
  # Try to extract any JSON objects
268
  json_objects = re.findall(r'\{[^{}]*\}', raw_str)
269
  for obj in json_objects:
@@ -273,37 +299,37 @@ class AWSStreamParser:
273
  return {"content": data['content']}
274
  except:
275
  continue
276
-
277
  # Final fallback: extract readable text
278
  readable_text = re.sub(r'[^\x20-\x7E\n\r\t]', '', raw_str)
279
  readable_text = re.sub(r':event-type[^:]*:[^:]*:[^:]*:', '', readable_text)
280
-
281
  # Look for Chinese characters or meaningful content
282
  chinese_pattern = r'[\u4e00-\u9fff]+'
283
  chinese_matches = re.findall(chinese_pattern, raw_str)
284
  if chinese_matches:
285
  return {"content": ''.join(chinese_matches)}
286
-
287
  return {"content": readable_text.strip() or "No content found in response"}
288
-
289
  except Exception as e:
290
  return {"content": f"Error parsing response: {str(e)}"}
291
 
292
 
293
- # Make API call to Kiro with Bedrock-style format
294
- async def call_kiro_api(messages: List[ChatMessage], max_tokens: int = 4000, stream: bool = False):
295
  token = token_manager.get_token()
296
  if not token:
297
  raise HTTPException(status_code=401, detail="No access token available")
298
-
299
- request_data = build_kiro_bedrock_request(messages, max_tokens)
300
-
301
  headers = {
302
  "Authorization": f"Bearer {token}",
303
  "Content-Type": "application/json",
304
  "Accept": "text/event-stream" if stream else "application/json"
305
  }
306
-
307
  try:
308
  async with httpx.AsyncClient() as client:
309
  response = await client.post(
@@ -312,7 +338,7 @@ async def call_kiro_api(messages: List[ChatMessage], max_tokens: int = 4000, str
312
  json=request_data,
313
  timeout=120
314
  )
315
-
316
  if response.status_code == 403:
317
  # Try to refresh token
318
  new_token = await token_manager.refresh_tokens()
@@ -324,15 +350,16 @@ async def call_kiro_api(messages: List[ChatMessage], max_tokens: int = 4000, str
324
  json=request_data,
325
  timeout=120
326
  )
327
-
328
  response.raise_for_status()
329
  return response
330
-
331
  except Exception as e:
332
  import traceback
333
- print(f"Kiro API call failed: {str(e)}")
334
  print(traceback.format_exc())
335
- raise HTTPException(status_code=503, detail=f"Kiro API call failed: {str(e)}")
 
336
 
337
  # API endpoints
338
  @app.get("/v1/models")
@@ -349,125 +376,92 @@ async def list_models():
349
  ]
350
  }
351
 
 
352
  @app.post("/v1/chat/completions")
353
  async def create_chat_completion(request: ChatCompletionRequest):
354
  if request.model != MODEL_NAME:
355
  raise HTTPException(status_code=400, detail=f"Only {MODEL_NAME} is supported")
356
-
357
  if request.stream:
358
  return await create_streaming_response(request)
359
  else:
360
  return await create_non_streaming_response(request)
361
 
 
362
  async def create_non_streaming_response(request: ChatCompletionRequest):
363
- response = await call_kiro_api(request.messages, request.max_tokens or 4000, stream=False)
364
- return await create_kiro_response(response)
365
 
366
- # Parse Kiro response with Bedrock format
367
- async def create_kiro_response(response):
368
- """Parse Kiro response containing Bedrock-style JSON"""
369
  try:
370
  print(f"Response status: {response.status_code}")
371
-
372
- # Initialize variables
373
- response_text = ""
374
- usage = {"input_tokens": 0, "output_tokens": 0}
375
-
376
- # Try to parse the response as JSON first
 
 
377
  try:
378
  response_data = response.json()
379
  print(f"Successfully parsed JSON response")
380
-
381
- # Check if response contains Bedrock-style content
382
  if isinstance(response_data, dict) and 'content' in response_data:
383
- content = response_data['content']
384
-
385
- # If content is a list of content blocks (Bedrock style)
386
- if isinstance(content, list) and len(content) > 0:
387
- for block in content:
388
- if isinstance(block, dict) and block.get("type") == "text":
389
- response_text += block.get("text", "")
390
- elif isinstance(content, str):
391
- # Try to parse as JSON if it's a string
392
- try:
393
- bedrock_data = json.loads(content)
394
- if isinstance(bedrock_data, dict) and 'content' in bedrock_data:
395
- bedrock_content = bedrock_data['content']
396
- if isinstance(bedrock_content, list):
397
- for block in bedrock_content:
398
- if isinstance(block, dict) and block.get("type") == "text":
399
- response_text += block.get("text", "")
400
- else:
401
- response_text = str(bedrock_content)
402
-
403
- # Extract usage if available
404
- if 'usage' in bedrock_data:
405
- usage = bedrock_data['usage']
406
- else:
407
- response_text = content
408
- except:
409
- response_text = content
410
- else:
411
- response_text = str(content)
412
  else:
413
  response_text = str(response_data)
414
-
415
  except Exception as e:
416
  print(f"JSON parsing failed: {e}")
417
- # Fallback to text content
418
- response_text = response.text
419
-
 
 
 
 
 
 
 
 
 
 
420
  print(f"Final response text: {response_text[:200]}...")
421
-
422
- return ChatCompletionResponse(
423
- model=MODEL_NAME,
424
- choices=[{
425
- "index": 0,
426
- "message": {
427
- "role": "assistant",
428
- "content": response_text
429
- },
430
- "finish_reason": "stop"
431
- }],
432
- usage={
433
- "prompt_tokens": usage.get("input_tokens", 0),
434
- "completion_tokens": usage.get("output_tokens", 0),
435
- "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
436
- }
437
- )
438
-
439
  except Exception as e:
440
- print(f"Error in Kiro response conversion: {e}")
441
  import traceback
442
  traceback.print_exc()
443
  response_text = f"Error processing response: {str(e)}"
444
-
445
- return ChatCompletionResponse(
446
- model=MODEL_NAME,
447
- choices=[{
448
- "index": 0,
449
- "message": {
450
- "role": "assistant",
451
- "content": response_text
452
- },
453
- "finish_reason": "stop"
454
- }],
455
- usage={
456
- "prompt_tokens": 0,
457
- "completion_tokens": 0,
458
- "total_tokens": 0
459
- }
460
- )
461
 
462
 
463
  async def create_streaming_response(request: ChatCompletionRequest):
464
- response = await call_kiro_api(request.messages, request.max_tokens or 4000, stream=True)
465
- return await create_kiro_streaming_response(response)
 
 
 
 
 
466
 
467
- async def create_kiro_streaming_response(response):
468
- """Parse Kiro streaming response with Bedrock format"""
469
- print(f"Starting Kiro streaming response")
470
-
471
  async def generate():
472
  # Send initial response
473
  initial_chunk = {
@@ -481,46 +475,41 @@ async def create_kiro_streaming_response(response):
481
  'finish_reason': None
482
  }]
483
  }
 
484
  yield f"data: {json.dumps(initial_chunk)}\n\n"
485
-
 
 
 
 
 
 
 
 
 
486
  try:
487
- # Read response content
488
- content = ""
489
-
490
- async for line in response.aiter_lines():
491
- if line.startswith('data: '):
492
- data_str = line[6:] # Remove 'data: ' prefix
493
-
494
- if data_str == '[DONE]':
495
- break
496
-
 
 
 
 
 
497
  try:
498
- chunk_data = json.loads(data_str)
499
-
500
- # Check for Bedrock-style content_block_delta
501
- if chunk_data.get('type') == 'content_block_delta':
502
- if 'delta' in chunk_data and 'text' in chunk_data['delta']:
503
- chunk_text = chunk_data['delta']['text']
504
- content += chunk_text
505
-
506
- chunk = {
507
- 'id': f'chatcmpl-{uuid.uuid4()}',
508
- 'object': 'chat.completion.chunk',
509
- 'created': int(time.time()),
510
- 'model': MODEL_NAME,
511
- 'choices': [{
512
- 'index': 0,
513
- 'delta': {'content': chunk_text},
514
- 'finish_reason': None
515
- }]
516
- }
517
- yield f"data: {json.dumps(chunk)}\n\n"
518
-
519
- # Handle other streaming formats
520
- elif 'content' in chunk_data:
521
- chunk_text = chunk_data['content']
522
  content += chunk_text
523
-
 
524
  chunk = {
525
  'id': f'chatcmpl-{uuid.uuid4()}',
526
  'object': 'chat.completion.chunk',
@@ -532,17 +521,72 @@ async def create_kiro_streaming_response(response):
532
  'finish_reason': None
533
  }]
534
  }
 
535
  yield f"data: {json.dumps(chunk)}\n\n"
536
-
537
- except json.JSONDecodeError:
538
- # Handle non-JSON lines
 
 
 
539
  continue
540
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
  except Exception as e:
542
- print(f"Error in Kiro streaming: {e}")
543
  import traceback
544
  traceback.print_exc()
545
-
 
546
  error_chunk = {
547
  'id': f'chatcmpl-{uuid.uuid4()}',
548
  'object': 'chat.completion.chunk',
@@ -555,7 +599,9 @@ async def create_kiro_streaming_response(response):
555
  }]
556
  }
557
  yield f"data: {json.dumps(error_chunk)}\n\n"
558
-
 
 
559
  # Send final response
560
  final_chunk = {
561
  'id': f'chatcmpl-{uuid.uuid4()}',
@@ -569,22 +615,24 @@ async def create_kiro_streaming_response(response):
569
  }]
570
  }
571
  yield f"data: {json.dumps(final_chunk)}\n\n"
 
572
  yield "data: [DONE]\n\n"
573
-
574
  return StreamingResponse(generate(), media_type="text/event-stream")
575
 
 
576
  # Anthropic response conversion functions
577
  async def create_anthropic_response(response, model: str):
578
  """Convert AWS event stream to Anthropic Messages format"""
579
  try:
580
  print(f"Response status: {response.status_code}")
581
  print(f"Response headers: {dict(response.headers)}")
582
-
583
  # Get response content as bytes to handle binary data
584
  response_bytes = response.content
585
  print(f"Response content type: {type(response_bytes)}")
586
  print(f"Response content length: {len(response_bytes)}")
587
-
588
  # Try to parse as JSON first
589
  try:
590
  response_data = response.json()
@@ -599,7 +647,7 @@ async def create_anthropic_response(response, model: str):
599
  parsed_data = AWSStreamParser.parse_event_stream_to_json(response_bytes)
600
  response_text = parsed_data.get('content', "")
601
  print(f"Parsed content length: {len(response_text)}")
602
-
603
  if not response_text or response_text == "No content found in response":
604
  # Last resort: try to decode as text
605
  try:
@@ -607,15 +655,15 @@ async def create_anthropic_response(response, model: str):
607
  print(f"Fallback text decode length: {len(response_text)}")
608
  except Exception as decode_error:
609
  response_text = f"Unable to decode response: {str(decode_error)}"
610
-
611
  print(f"Final response text: {response_text[:200]}...")
612
-
613
  except Exception as e:
614
  print(f"Error in conversion: {e}")
615
  import traceback
616
  traceback.print_exc()
617
  response_text = f"Error processing response: {str(e)}"
618
-
619
  return AnthropicMessagesResponse(
620
  model=model,
621
  content=[AnthropicContentBlock(type="text", text=response_text)],
@@ -625,10 +673,11 @@ async def create_anthropic_response(response, model: str):
625
  }
626
  )
627
 
 
628
  async def create_anthropic_streaming_response(response, model: str):
629
  """Convert AWS event stream to Anthropic streaming format"""
630
  print(f"Starting Anthropic streaming response, status: {response.status_code}")
631
-
632
  async def generate():
633
  # Send message_start event
634
  message_start = {
@@ -646,7 +695,7 @@ async def create_anthropic_streaming_response(response, model: str):
646
  }
647
  print(f"Sending message_start: {message_start}")
648
  yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
649
-
650
  # Send content_block_start event
651
  content_block_start = {
652
  "type": "content_block_start",
@@ -657,15 +706,15 @@ async def create_anthropic_streaming_response(response, model: str):
657
  }
658
  }
659
  yield f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n"
660
-
661
  # Read response and stream content
662
  content = ""
663
  chunk_count = 0
664
-
665
  # Read the entire response as bytes first
666
  response_bytes = response.content
667
  print(f"Anthropic streaming response bytes length: {len(response_bytes)}")
668
-
669
  # Parse the AWS event stream
670
  try:
671
  # Convert bytes to string
@@ -673,12 +722,12 @@ async def create_anthropic_streaming_response(response, model: str):
673
  response_str = response_bytes.decode('utf-8', errors='ignore')
674
  else:
675
  response_str = str(response_bytes)
676
-
677
  # Look for content in the AWS event stream
678
  # Method 1: Look for JSON objects with content
679
  json_pattern = r'\{[^{}]*"content"[^{}]*\}'
680
  json_matches = re.findall(json_pattern, response_str, re.DOTALL)
681
-
682
  if json_matches:
683
  for match in json_matches:
684
  try:
@@ -687,7 +736,7 @@ async def create_anthropic_streaming_response(response, model: str):
687
  chunk_text = data['content']
688
  content += chunk_text
689
  chunk_count += 1
690
-
691
  # Send content_block_delta event
692
  content_block_delta = {
693
  "type": "content_block_delta",
@@ -699,7 +748,7 @@ async def create_anthropic_streaming_response(response, model: str):
699
  }
700
  print(f"Streaming Anthropic JSON chunk {chunk_count}: {chunk_text[:50]}...")
701
  yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
702
-
703
  # Small delay to simulate streaming
704
  import asyncio
705
  await asyncio.sleep(0.01)
@@ -709,20 +758,20 @@ async def create_anthropic_streaming_response(response, model: str):
709
  else:
710
  # Method 2: Try to extract readable text
711
  readable_text = re.sub(r'[^\x20-\x7E\n\r\t\u4e00-\u9fff]', '', response_str)
712
-
713
  # Look for Chinese text specifically
714
  chinese_pattern = r'[\u4e00-\u9fff][\u4e00-\u9fff\s\.,!?]*[\u4e00-\u9fff]'
715
  chinese_matches = re.findall(chinese_pattern, response_str)
716
-
717
  if chinese_matches:
718
  combined_text = ''.join(chinese_matches)
719
  # Split into chunks for streaming
720
  chunk_size = max(1, len(combined_text) // 10)
721
  for i in range(0, len(combined_text), chunk_size):
722
- chunk_text = combined_text[i:i+chunk_size]
723
  content += chunk_text
724
  chunk_count += 1
725
-
726
  # Send content_block_delta event
727
  content_block_delta = {
728
  "type": "content_block_delta",
@@ -734,7 +783,7 @@ async def create_anthropic_streaming_response(response, model: str):
734
  }
735
  print(f"Streaming Anthropic Chinese text chunk {chunk_count}: {chunk_text[:50]}...")
736
  yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
737
-
738
  import asyncio
739
  await asyncio.sleep(0.05)
740
  else:
@@ -751,12 +800,12 @@ async def create_anthropic_streaming_response(response, model: str):
751
  print(f"Streaming Anthropic fallback text: {readable_text[:100]}...")
752
  yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
753
  content = readable_text.strip()
754
-
755
  except Exception as e:
756
  print(f"Error in Anthropic streaming generation: {e}")
757
  import traceback
758
  traceback.print_exc()
759
-
760
  # Send error as content
761
  error_delta = {
762
  "type": "content_block_delta",
@@ -767,254 +816,41 @@ async def create_anthropic_streaming_response(response, model: str):
767
  }
768
  }
769
  yield f"event: content_block_delta\ndata: {json.dumps(error_delta)}\n\n"
770
-
771
- print(f"Anthropic streaming complete, total chunks: {chunk_count}, content length: {len(content)}")
772
-
773
- # Send content_block_stop event
774
- content_block_stop = {
775
- "type": "content_block_stop",
776
- "index": 0
777
- }
778
- yield f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n"
779
-
780
- # Send message_stop event
781
- message_stop = {
782
- "type": "message_stop"
783
- }
784
- yield f"event: message_stop\ndata: {json.dumps(message_stop)}\n\n"
785
-
786
- return StreamingResponse(generate(), media_type="text/event-stream")
787
 
788
- # Anthropic response conversion functions
789
- async def create_anthropic_kiro_response(response, model: str):
790
- """Convert Kiro response to Anthropic Messages format"""
791
- try:
792
- print(f"Response status: {response.status_code}")
793
-
794
- # Initialize variables
795
- response_text = ""
796
- usage = {"input_tokens": 0, "output_tokens": 0}
797
-
798
- # Try to parse the response as JSON first
799
- try:
800
- response_data = response.json()
801
- print(f"Successfully parsed JSON response")
802
-
803
- # Check if response contains Bedrock-style content
804
- if isinstance(response_data, dict) and 'content' in response_data:
805
- content = response_data['content']
806
-
807
- # If content is a list of content blocks (Bedrock style)
808
- if isinstance(content, list) and len(content) > 0:
809
- for block in content:
810
- if isinstance(block, dict) and block.get("type") == "text":
811
- response_text += block.get("text", "")
812
- elif isinstance(content, str):
813
- # Try to parse as JSON if it's a string
814
- try:
815
- bedrock_data = json.loads(content)
816
- if isinstance(bedrock_data, dict) and 'content' in bedrock_data:
817
- bedrock_content = bedrock_data['content']
818
- if isinstance(bedrock_content, list):
819
- for block in bedrock_content:
820
- if isinstance(block, dict) and block.get("type") == "text":
821
- response_text += block.get("text", "")
822
- else:
823
- response_text = str(bedrock_content)
824
-
825
- # Extract usage if available
826
- if 'usage' in bedrock_data:
827
- usage = bedrock_data['usage']
828
- else:
829
- response_text = content
830
- except:
831
- response_text = content
832
- else:
833
- response_text = str(content)
834
- else:
835
- response_text = str(response_data)
836
-
837
- except Exception as e:
838
- print(f"JSON parsing failed: {e}")
839
- # Fallback to text content
840
- response_text = response.text
841
-
842
- print(f"Final response text: {response_text[:200]}...")
843
-
844
- return AnthropicMessagesResponse(
845
- model=model,
846
- content=[AnthropicContentBlock(type="text", text=response_text)],
847
- usage={
848
- "input_tokens": usage.get("input_tokens", 0),
849
- "output_tokens": usage.get("output_tokens", 0)
850
- }
851
- )
852
-
853
- except Exception as e:
854
- print(f"Error in Anthropic Kiro response conversion: {e}")
855
- import traceback
856
- traceback.print_exc()
857
- response_text = f"Error processing response: {str(e)}"
858
-
859
- return AnthropicMessagesResponse(
860
- model=model,
861
- content=[AnthropicContentBlock(type="text", text=response_text)],
862
- usage={
863
- "input_tokens": 0,
864
- "output_tokens": 0
865
- }
866
- )
867
 
868
- async def create_anthropic_kiro_streaming_response(response, model: str):
869
- """Convert Kiro streaming response to Anthropic streaming format"""
870
- print(f"Starting Anthropic Kiro streaming response")
871
-
872
- async def generate():
873
- # Send message_start event
874
- message_start = {
875
- "type": "message_start",
876
- "message": {
877
- "id": f"msg_{uuid.uuid4()}",
878
- "type": "message",
879
- "role": "assistant",
880
- "content": [],
881
- "model": model,
882
- "stop_reason": None,
883
- "stop_sequence": None,
884
- "usage": {"input_tokens": 0, "output_tokens": 0}
885
- }
886
- }
887
- yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
888
-
889
- # Send content_block_start event
890
- content_block_start = {
891
- "type": "content_block_start",
892
- "index": 0,
893
- "content_block": {
894
- "type": "text",
895
- "text": ""
896
- }
897
- }
898
- yield f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n"
899
-
900
- try:
901
- # Read response content
902
- async for line in response.aiter_lines():
903
- if line.startswith('data: '):
904
- data_str = line[6:] # Remove 'data: ' prefix
905
-
906
- if data_str == '[DONE]':
907
- break
908
-
909
- try:
910
- chunk_data = json.loads(data_str)
911
-
912
- # Check for Bedrock-style content_block_delta
913
- if chunk_data.get('type') == 'content_block_delta':
914
- if 'delta' in chunk_data and 'text' in chunk_data['delta']:
915
- chunk_text = chunk_data['delta']['text']
916
-
917
- # Send content_block_delta event
918
- content_block_delta = {
919
- "type": "content_block_delta",
920
- "index": 0,
921
- "delta": {
922
- "type": "text_delta",
923
- "text": chunk_text
924
- }
925
- }
926
- yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
927
-
928
- # Handle other streaming formats
929
- elif 'content' in chunk_data:
930
- chunk_text = chunk_data['content']
931
-
932
- # Send content_block_delta event
933
- content_block_delta = {
934
- "type": "content_block_delta",
935
- "index": 0,
936
- "delta": {
937
- "type": "text_delta",
938
- "text": chunk_text
939
- }
940
- }
941
- yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
942
-
943
- except json.JSONDecodeError:
944
- # Handle non-JSON lines
945
- continue
946
-
947
- except Exception as e:
948
- print(f"Error in Anthropic Kiro streaming: {e}")
949
- import traceback
950
- traceback.print_exc()
951
-
952
- # Send error as content
953
- error_delta = {
954
- "type": "content_block_delta",
955
- "index": 0,
956
- "delta": {
957
- "type": "text_delta",
958
- "text": f"Error: {str(e)}"
959
- }
960
- }
961
- yield f"event: content_block_delta\ndata: {json.dumps(error_delta)}\n\n"
962
-
963
  # Send content_block_stop event
964
  content_block_stop = {
965
  "type": "content_block_stop",
966
  "index": 0
967
  }
968
  yield f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n"
969
-
970
  # Send message_stop event
971
  message_stop = {
972
  "type": "message_stop"
973
  }
974
  yield f"event: message_stop\ndata: {json.dumps(message_stop)}\n\n"
975
-
976
  return StreamingResponse(generate(), media_type="text/event-stream")
977
 
 
978
  # API endpoints
979
  @app.post("/v1/messages")
980
- async def create_messages(request: AnthropicMessagesRequest, http_request: Request):
981
- # Check authentication - support both Authorization Bearer and x-api-key headers
982
- auth_token = None
983
- auth_header = http_request.headers.get("Authorization")
984
- api_key_header = http_request.headers.get("x-api-key")
985
-
986
- if auth_header and auth_header.startswith("Bearer "):
987
- auth_token = auth_header[7:]
988
- elif api_key_header:
989
- auth_token = api_key_header
990
-
991
- if not auth_token or auth_token != API_KEY:
992
- raise HTTPException(status_code=401, detail="Invalid API key")
993
-
994
- # Support both claude-sonnet-4-20250514 and claude-opus-4-20250514 for compatibility
995
- supported_models = [MODEL_NAME, "claude-opus-4-20250514"]
996
- if request.model not in supported_models:
997
- raise HTTPException(status_code=400, detail=f"Only {', '.join(supported_models)} are supported")
998
-
999
- # Log headers for debugging
1000
- anthropic_version = http_request.headers.get("anthropic-version")
1001
- if anthropic_version:
1002
- print(f"Anthropic version header: {anthropic_version}")
1003
-
1004
- # Log URL query parameters for debugging (but ignore them in processing)
1005
- if http_request.query_params:
1006
- print(f"Ignoring query parameters: {dict(http_request.query_params)}")
1007
-
1008
  # Convert Anthropic format to internal ChatMessage format
1009
  chat_messages = anthropic_to_chat_messages(request)
1010
-
1011
  # Call the Kiro API
1012
- response = await call_kiro_api(chat_messages, request.max_tokens, stream=request.stream)
1013
-
1014
  if request.stream:
1015
- return await create_anthropic_kiro_streaming_response(response, request.model)
1016
  else:
1017
- return await create_anthropic_kiro_response(response, request.model)
1018
 
1019
 
1020
  # Health check
@@ -1022,6 +858,8 @@ async def create_messages(request: AnthropicMessagesRequest, http_request: Reque
1022
  async def health_check():
1023
  return {"status": "ok", "service": "ki2api"}
1024
 
 
1025
  if __name__ == "__main__":
1026
  import uvicorn
 
1027
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
37
  type: str = "text"
38
  text: str
39
 
40
+
41
  class ChatMessage(BaseModel):
42
  role: str
43
  content: Union[str, List[ContentPart]]
44
+
45
  def get_content_text(self) -> str:
46
  """Extract text content from either string or content parts"""
47
  if isinstance(self.content, str):
 
58
  return "".join(text_parts)
59
  return str(self.content)
60
 
61
+
62
  # Anthropic Claude format models
63
  class AnthropicContentBlock(BaseModel):
64
  type: str = "text"
65
  text: str
66
 
67
+
68
  class AnthropicMessage(BaseModel):
69
  role: str # "user" or "assistant"
70
  content: Union[str, List[AnthropicContentBlock]]
71
 
72
+
73
  class AnthropicMessagesRequest(BaseModel):
74
  model: str
75
  max_tokens: int
 
78
  temperature: Optional[float] = 0.7
79
  stream: Optional[bool] = False
80
 
81
+
82
  class AnthropicMessagesResponse(BaseModel):
83
  id: str = Field(default_factory=lambda: f"msg_{uuid.uuid4()}")
84
  type: str = "message"
 
89
  stop_sequence: Optional[str] = None
90
  usage: Dict[str, int]
91
 
92
+
93
  class AnthropicStreamResponse(BaseModel):
94
  type: str
95
  index: Optional[int] = None
 
98
  message: Optional[Dict[str, Any]] = None
99
  usage: Optional[Dict[str, int]] = None
100
 
101
+
102
  class ChatCompletionRequest(BaseModel):
103
  model: str
104
  messages: List[ChatMessage]
 
106
  max_tokens: Optional[int] = 4000
107
  stream: Optional[bool] = False
108
 
109
+
110
  class ChatCompletionResponse(BaseModel):
111
  id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}")
112
  object: str = "chat.completion"
 
115
  choices: List[Dict[str, Any]]
116
  usage: Dict[str, int]
117
 
118
+
119
  class ChatCompletionStreamResponse(BaseModel):
120
  id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}")
121
  object: str = "chat.completion.chunk"
 
123
  model: str
124
  choices: List[Dict[str, Any]]
125
 
126
+
127
  # Token management
128
  class TokenManager:
129
  def __init__(self):
 
134
  async def refresh_tokens(self):
135
  if not self.refresh_token:
136
  return None
137
+
138
  try:
139
  async with httpx.AsyncClient() as client:
140
  response = await client.post(
 
143
  timeout=30
144
  )
145
  response.raise_for_status()
146
+
147
  data = response.json()
148
  self.access_token = data.get("accessToken")
149
  return self.access_token
 
154
  def get_token(self):
155
  return self.access_token
156
 
157
+
158
  token_manager = TokenManager()
159
 
160
+
161
+ # Build CodeWhisperer request
162
+ def build_codewhisperer_request(messages: List[ChatMessage]):
163
+ conversation_id = str(uuid.uuid4())
164
+
165
  # Extract system prompt and user messages
166
  system_prompt = ""
167
+ user_messages = []
168
+
169
  for msg in messages:
170
  if msg.role == "system":
171
  system_prompt = msg.get_content_text()
172
  else:
173
+ user_messages.append(msg)
174
+
175
+ if not user_messages:
 
 
 
176
  raise HTTPException(status_code=400, detail="No user messages found")
177
+
178
+ # Build history
179
+ history = []
180
+ for i in range(0, len(user_messages) - 1, 2):
181
+ if i + 1 < len(user_messages):
182
+ history.append({
183
+ "userInputMessage": {
184
+ "content": user_messages[i].get_content_text(),
185
+ "modelId": CODEWHISPERER_MODEL,
186
+ "origin": "AI_EDITOR"
187
+ }
188
+ })
189
+ history.append({
190
+ "assistantResponseMessage": {
191
+ "content": user_messages[i + 1].get_content_text(),
192
+ "toolUses": []
193
+ }
194
+ })
195
+
196
+ # Build current message
197
+ current_message = user_messages[-1]
198
+ content = current_message.get_content_text()
199
  if system_prompt:
200
+ content = f"{system_prompt}\n\n{content}"
201
+
 
202
  return {
203
  "profileArn": PROFILE_ARN,
204
  "conversationState": {
205
  "chatTriggerType": "MANUAL",
206
+ "conversationId": conversation_id,
207
  "currentMessage": {
208
  "userInputMessage": {
209
+ "content": content,
210
  "modelId": CODEWHISPERER_MODEL,
211
  "origin": "AI_EDITOR",
212
  "userInputMessageContext": {}
213
  }
214
  },
215
+ "history": history
216
  }
217
  }
218
 
219
+
220
  # Convert Anthropic messages to internal ChatMessage format
221
  def anthropic_to_chat_messages(anthropic_request: AnthropicMessagesRequest) -> List[ChatMessage]:
222
  """Convert Anthropic messages format to internal ChatMessage format"""
223
  chat_messages = []
224
+
225
  # Add system message if present
226
  if anthropic_request.system:
227
  chat_messages.append(ChatMessage(role="system", content=anthropic_request.system))
228
+
229
  # Convert Anthropic messages
230
  for msg in anthropic_request.messages:
231
  if isinstance(msg.content, str):
 
237
  if block.type == "text":
238
  text_parts.append(block.text)
239
  content = "".join(text_parts)
240
+
241
  chat_messages.append(ChatMessage(role=msg.role, content=content))
242
+
243
  return chat_messages
244
 
245
+
246
  # AWS Event Stream Parser
247
  class AWSStreamParser:
248
  @staticmethod
 
259
  raw_str = raw_data.decode('utf-8', errors='ignore')
260
  else:
261
  raw_str = str(raw_data)
262
+
263
  # Look for JSON content in the response
264
  # AWS event stream contains binary headers followed by JSON payloads
265
  json_pattern = r'\{[^{}]*"content"[^{}]*\}'
266
  matches = re.findall(json_pattern, raw_str, re.DOTALL)
267
+
268
  if matches:
269
  content_parts = []
270
  for match in matches:
 
276
  continue
277
  if content_parts:
278
  return {"content": ''.join(content_parts)}
279
+
280
  # Try to extract from AWS event stream format
281
  # Look for :content-type and extract JSON after headers
282
  content_type_pattern = r':content-type[^:]*:[^:]*:[^:]*:(\{.*\})'
 
289
  return {"content": data['content']}
290
  except:
291
  continue
292
+
293
  # Try to extract any JSON objects
294
  json_objects = re.findall(r'\{[^{}]*\}', raw_str)
295
  for obj in json_objects:
 
299
  return {"content": data['content']}
300
  except:
301
  continue
302
+
303
  # Final fallback: extract readable text
304
  readable_text = re.sub(r'[^\x20-\x7E\n\r\t]', '', raw_str)
305
  readable_text = re.sub(r':event-type[^:]*:[^:]*:[^:]*:', '', readable_text)
306
+
307
  # Look for Chinese characters or meaningful content
308
  chinese_pattern = r'[\u4e00-\u9fff]+'
309
  chinese_matches = re.findall(chinese_pattern, raw_str)
310
  if chinese_matches:
311
  return {"content": ''.join(chinese_matches)}
312
+
313
  return {"content": readable_text.strip() or "No content found in response"}
314
+
315
  except Exception as e:
316
  return {"content": f"Error parsing response: {str(e)}"}
317
 
318
 
319
+ # Make API call to Kiro/CodeWhisperer
320
+ async def call_kiro_api(messages: List[ChatMessage], stream: bool = False):
321
  token = token_manager.get_token()
322
  if not token:
323
  raise HTTPException(status_code=401, detail="No access token available")
324
+
325
+ request_data = build_codewhisperer_request(messages)
326
+
327
  headers = {
328
  "Authorization": f"Bearer {token}",
329
  "Content-Type": "application/json",
330
  "Accept": "text/event-stream" if stream else "application/json"
331
  }
332
+
333
  try:
334
  async with httpx.AsyncClient() as client:
335
  response = await client.post(
 
338
  json=request_data,
339
  timeout=120
340
  )
341
+
342
  if response.status_code == 403:
343
  # Try to refresh token
344
  new_token = await token_manager.refresh_tokens()
 
350
  json=request_data,
351
  timeout=120
352
  )
353
+
354
  response.raise_for_status()
355
  return response
356
+
357
  except Exception as e:
358
  import traceback
359
+ print(f"API call failed: {str(e)}")
360
  print(traceback.format_exc())
361
+ raise HTTPException(status_code=503, detail=f"API call failed: {str(e)}")
362
+
363
 
364
  # API endpoints
365
  @app.get("/v1/models")
 
376
  ]
377
  }
378
 
379
+
380
  @app.post("/v1/chat/completions")
381
  async def create_chat_completion(request: ChatCompletionRequest):
382
  if request.model != MODEL_NAME:
383
  raise HTTPException(status_code=400, detail=f"Only {MODEL_NAME} is supported")
384
+
385
  if request.stream:
386
  return await create_streaming_response(request)
387
  else:
388
  return await create_non_streaming_response(request)
389
 
390
+
391
  async def create_non_streaming_response(request: ChatCompletionRequest):
392
+ response = await call_kiro_api(request.messages, stream=False)
393
+ return await create_conversion_response(response)
394
 
395
+
396
+ async def create_conversion_response(response):
397
+ """Convert AWS event stream to OpenAI format"""
398
  try:
399
  print(f"Response status: {response.status_code}")
400
+ print(f"Response headers: {dict(response.headers)}")
401
+
402
+ # Get response content as bytes to handle binary data
403
+ response_bytes = response.content
404
+ print(f"Response content type: {type(response_bytes)}")
405
+ print(f"Response content length: {len(response_bytes)}")
406
+
407
+ # Try to parse as JSON first
408
  try:
409
  response_data = response.json()
410
  print(f"Successfully parsed JSON response")
 
 
411
  if isinstance(response_data, dict) and 'content' in response_data:
412
+ response_text = response_data['content']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  else:
414
  response_text = str(response_data)
 
415
  except Exception as e:
416
  print(f"JSON parsing failed: {e}")
417
+ # Handle event stream format using AWS parser
418
+ parsed_data = AWSStreamParser.parse_event_stream_to_json(response_bytes)
419
+ response_text = parsed_data.get('content', "")
420
+ print(f"Parsed content length: {len(response_text)}")
421
+
422
+ if not response_text or response_text == "No content found in response":
423
+ # Last resort: try to decode as text
424
+ try:
425
+ response_text = response_bytes.decode('utf-8', errors='ignore')
426
+ print(f"Fallback text decode length: {len(response_text)}")
427
+ except Exception as decode_error:
428
+ response_text = f"Unable to decode response: {str(decode_error)}"
429
+
430
  print(f"Final response text: {response_text[:200]}...")
431
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  except Exception as e:
433
+ print(f"Error in conversion: {e}")
434
  import traceback
435
  traceback.print_exc()
436
  response_text = f"Error processing response: {str(e)}"
437
+
438
+ return ChatCompletionResponse(
439
+ model=MODEL_NAME,
440
+ choices=[{
441
+ "index": 0,
442
+ "message": {
443
+ "role": "assistant",
444
+ "content": response_text
445
+ },
446
+ "finish_reason": "stop"
447
+ }],
448
+ usage={
449
+ "prompt_tokens": 0,
450
+ "completion_tokens": 0,
451
+ "total_tokens": 0
452
+ }
453
+ )
454
 
455
 
456
  async def create_streaming_response(request: ChatCompletionRequest):
457
+ response = await call_kiro_api(request.messages, stream=True)
458
+ return await create_streaming_conversion_response(response)
459
+
460
+
461
+ async def create_streaming_conversion_response(response):
462
+ """Convert AWS event stream to OpenAI streaming format"""
463
+ print(f"Starting streaming response, status: {response.status_code}")
464
 
 
 
 
 
465
  async def generate():
466
  # Send initial response
467
  initial_chunk = {
 
475
  'finish_reason': None
476
  }]
477
  }
478
+ print(f"Sending initial chunk: {initial_chunk}")
479
  yield f"data: {json.dumps(initial_chunk)}\n\n"
480
+
481
+ # Read response and stream content
482
+ content = ""
483
+ chunk_count = 0
484
+
485
+ # Read the entire response as bytes first
486
+ response_bytes = response.content
487
+ print(f"Streaming response bytes length: {len(response_bytes)}")
488
+
489
+ # Parse the AWS event stream
490
  try:
491
+ # Convert bytes to string
492
+ if isinstance(response_bytes, bytes):
493
+ response_str = response_bytes.decode('utf-8', errors='ignore')
494
+ else:
495
+ response_str = str(response_bytes)
496
+
497
+ # Look for content in the AWS event stream
498
+ # AWS uses a specific format with binary headers and JSON payloads
499
+
500
+ # Method 1: Look for JSON objects with content
501
+ json_pattern = r'\{[^{}]*"content"[^{}]*\}'
502
+ json_matches = re.findall(json_pattern, response_str, re.DOTALL)
503
+
504
+ if json_matches:
505
+ for match in json_matches:
506
  try:
507
+ data = json.loads(match)
508
+ if 'content' in data and data['content']:
509
+ chunk_text = data['content']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510
  content += chunk_text
511
+ chunk_count += 1
512
+
513
  chunk = {
514
  'id': f'chatcmpl-{uuid.uuid4()}',
515
  'object': 'chat.completion.chunk',
 
521
  'finish_reason': None
522
  }]
523
  }
524
+ print(f"Streaming JSON chunk {chunk_count}: {chunk_text[:50]}...")
525
  yield f"data: {json.dumps(chunk)}\n\n"
526
+
527
+ # Small delay to simulate streaming
528
+ import asyncio
529
+ await asyncio.sleep(0.01)
530
+ except Exception as e:
531
+ print(f"Error streaming JSON chunk: {e}")
532
  continue
533
+ else:
534
+ # Method 2: Try to extract readable text
535
+ readable_text = re.sub(r'[^\x20-\x7E\n\r\t\u4e00-\u9fff]', '', response_str)
536
+
537
+ # Look for Chinese text specifically
538
+ chinese_pattern = r'[\u4e00-\u9fff][\u4e00-\u9fff\s\.,!?]*[\u4e00-\u9fff]'
539
+ chinese_matches = re.findall(chinese_pattern, response_str)
540
+
541
+ if chinese_matches:
542
+ combined_text = ''.join(chinese_matches)
543
+ # Split into chunks for streaming
544
+ chunk_size = max(1, len(combined_text) // 10)
545
+ for i in range(0, len(combined_text), chunk_size):
546
+ chunk_text = combined_text[i:i + chunk_size]
547
+ content += chunk_text
548
+ chunk_count += 1
549
+
550
+ chunk = {
551
+ 'id': f'chatcmpl-{uuid.uuid4()}',
552
+ 'object': 'chat.completion.chunk',
553
+ 'created': int(time.time()),
554
+ 'model': MODEL_NAME,
555
+ 'choices': [{
556
+ 'index': 0,
557
+ 'delta': {'content': chunk_text},
558
+ 'finish_reason': None
559
+ }]
560
+ }
561
+ print(f"Streaming Chinese text chunk {chunk_count}: {chunk_text[:50]}...")
562
+ yield f"data: {json.dumps(chunk)}\n\n"
563
+
564
+ import asyncio
565
+ await asyncio.sleep(0.05)
566
+ else:
567
+ # Method 3: Use the entire readable text
568
+ if readable_text.strip():
569
+ chunk = {
570
+ 'id': f'chatcmpl-{uuid.uuid4()}',
571
+ 'object': 'chat.completion.chunk',
572
+ 'created': int(time.time()),
573
+ 'model': MODEL_NAME,
574
+ 'choices': [{
575
+ 'index': 0,
576
+ 'delta': {'content': readable_text.strip()},
577
+ 'finish_reason': None
578
+ }]
579
+ }
580
+ print(f"Streaming fallback text: {readable_text[:100]}...")
581
+ yield f"data: {json.dumps(chunk)}\n\n"
582
+ content = readable_text.strip()
583
+
584
  except Exception as e:
585
+ print(f"Error in streaming generation: {e}")
586
  import traceback
587
  traceback.print_exc()
588
+
589
+ # Send error as content
590
  error_chunk = {
591
  'id': f'chatcmpl-{uuid.uuid4()}',
592
  'object': 'chat.completion.chunk',
 
599
  }]
600
  }
601
  yield f"data: {json.dumps(error_chunk)}\n\n"
602
+
603
+ print(f"Streaming complete, total chunks: {chunk_count}, content length: {len(content)}")
604
+
605
  # Send final response
606
  final_chunk = {
607
  'id': f'chatcmpl-{uuid.uuid4()}',
 
615
  }]
616
  }
617
  yield f"data: {json.dumps(final_chunk)}\n\n"
618
+
619
  yield "data: [DONE]\n\n"
620
+
621
  return StreamingResponse(generate(), media_type="text/event-stream")
622
 
623
+
624
  # Anthropic response conversion functions
625
  async def create_anthropic_response(response, model: str):
626
  """Convert AWS event stream to Anthropic Messages format"""
627
  try:
628
  print(f"Response status: {response.status_code}")
629
  print(f"Response headers: {dict(response.headers)}")
630
+
631
  # Get response content as bytes to handle binary data
632
  response_bytes = response.content
633
  print(f"Response content type: {type(response_bytes)}")
634
  print(f"Response content length: {len(response_bytes)}")
635
+
636
  # Try to parse as JSON first
637
  try:
638
  response_data = response.json()
 
647
  parsed_data = AWSStreamParser.parse_event_stream_to_json(response_bytes)
648
  response_text = parsed_data.get('content', "")
649
  print(f"Parsed content length: {len(response_text)}")
650
+
651
  if not response_text or response_text == "No content found in response":
652
  # Last resort: try to decode as text
653
  try:
 
655
  print(f"Fallback text decode length: {len(response_text)}")
656
  except Exception as decode_error:
657
  response_text = f"Unable to decode response: {str(decode_error)}"
658
+
659
  print(f"Final response text: {response_text[:200]}...")
660
+
661
  except Exception as e:
662
  print(f"Error in conversion: {e}")
663
  import traceback
664
  traceback.print_exc()
665
  response_text = f"Error processing response: {str(e)}"
666
+
667
  return AnthropicMessagesResponse(
668
  model=model,
669
  content=[AnthropicContentBlock(type="text", text=response_text)],
 
673
  }
674
  )
675
 
676
+
677
  async def create_anthropic_streaming_response(response, model: str):
678
  """Convert AWS event stream to Anthropic streaming format"""
679
  print(f"Starting Anthropic streaming response, status: {response.status_code}")
680
+
681
  async def generate():
682
  # Send message_start event
683
  message_start = {
 
695
  }
696
  print(f"Sending message_start: {message_start}")
697
  yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
698
+
699
  # Send content_block_start event
700
  content_block_start = {
701
  "type": "content_block_start",
 
706
  }
707
  }
708
  yield f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n"
709
+
710
  # Read response and stream content
711
  content = ""
712
  chunk_count = 0
713
+
714
  # Read the entire response as bytes first
715
  response_bytes = response.content
716
  print(f"Anthropic streaming response bytes length: {len(response_bytes)}")
717
+
718
  # Parse the AWS event stream
719
  try:
720
  # Convert bytes to string
 
722
  response_str = response_bytes.decode('utf-8', errors='ignore')
723
  else:
724
  response_str = str(response_bytes)
725
+
726
  # Look for content in the AWS event stream
727
  # Method 1: Look for JSON objects with content
728
  json_pattern = r'\{[^{}]*"content"[^{}]*\}'
729
  json_matches = re.findall(json_pattern, response_str, re.DOTALL)
730
+
731
  if json_matches:
732
  for match in json_matches:
733
  try:
 
736
  chunk_text = data['content']
737
  content += chunk_text
738
  chunk_count += 1
739
+
740
  # Send content_block_delta event
741
  content_block_delta = {
742
  "type": "content_block_delta",
 
748
  }
749
  print(f"Streaming Anthropic JSON chunk {chunk_count}: {chunk_text[:50]}...")
750
  yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
751
+
752
  # Small delay to simulate streaming
753
  import asyncio
754
  await asyncio.sleep(0.01)
 
758
  else:
759
  # Method 2: Try to extract readable text
760
  readable_text = re.sub(r'[^\x20-\x7E\n\r\t\u4e00-\u9fff]', '', response_str)
761
+
762
  # Look for Chinese text specifically
763
  chinese_pattern = r'[\u4e00-\u9fff][\u4e00-\u9fff\s\.,!?]*[\u4e00-\u9fff]'
764
  chinese_matches = re.findall(chinese_pattern, response_str)
765
+
766
  if chinese_matches:
767
  combined_text = ''.join(chinese_matches)
768
  # Split into chunks for streaming
769
  chunk_size = max(1, len(combined_text) // 10)
770
  for i in range(0, len(combined_text), chunk_size):
771
+ chunk_text = combined_text[i:i + chunk_size]
772
  content += chunk_text
773
  chunk_count += 1
774
+
775
  # Send content_block_delta event
776
  content_block_delta = {
777
  "type": "content_block_delta",
 
783
  }
784
  print(f"Streaming Anthropic Chinese text chunk {chunk_count}: {chunk_text[:50]}...")
785
  yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
786
+
787
  import asyncio
788
  await asyncio.sleep(0.05)
789
  else:
 
800
  print(f"Streaming Anthropic fallback text: {readable_text[:100]}...")
801
  yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
802
  content = readable_text.strip()
803
+
804
  except Exception as e:
805
  print(f"Error in Anthropic streaming generation: {e}")
806
  import traceback
807
  traceback.print_exc()
808
+
809
  # Send error as content
810
  error_delta = {
811
  "type": "content_block_delta",
 
816
  }
817
  }
818
  yield f"event: content_block_delta\ndata: {json.dumps(error_delta)}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
819
 
820
+ print(f"Anthropic streaming complete, total chunks: {chunk_count}, content length: {len(content)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
821
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
822
  # Send content_block_stop event
823
  content_block_stop = {
824
  "type": "content_block_stop",
825
  "index": 0
826
  }
827
  yield f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n"
828
+
829
  # Send message_stop event
830
  message_stop = {
831
  "type": "message_stop"
832
  }
833
  yield f"event: message_stop\ndata: {json.dumps(message_stop)}\n\n"
834
+
835
  return StreamingResponse(generate(), media_type="text/event-stream")
836
 
837
+
838
  # API endpoints
839
  @app.post("/v1/messages")
840
+ async def create_messages(request: AnthropicMessagesRequest):
841
+ if request.model != MODEL_NAME:
842
+ raise HTTPException(status_code=400, detail=f"Only {MODEL_NAME} is supported")
843
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
844
  # Convert Anthropic format to internal ChatMessage format
845
  chat_messages = anthropic_to_chat_messages(request)
846
+
847
  # Call the Kiro API
848
+ response = await call_kiro_api(chat_messages, stream=request.stream)
849
+
850
  if request.stream:
851
+ return await create_anthropic_streaming_response(response, request.model)
852
  else:
853
+ return await create_anthropic_response(response, request.model)
854
 
855
 
856
  # Health check
 
858
  async def health_check():
859
  return {"status": "ok", "service": "ki2api"}
860
 
861
+
862
  if __name__ == "__main__":
863
  import uvicorn
864
+
865
  uvicorn.run(app, host="0.0.0.0", port=7860)