Spaces:

jardan
/

kr2

Sleeping

App Files Files Community

jardan commited on Jul 25, 2025

Commit

118abb7

verified ·

1 Parent(s): 6d10c5f

Update app.py

Browse files

Files changed (1) hide show

app.py +269 -431

app.py CHANGED Viewed

@@ -37,10 +37,11 @@ class ContentPart(BaseModel):
     type: str = "text"
     text: str
 class ChatMessage(BaseModel):
     role: str
     content: Union[str, List[ContentPart]]
     def get_content_text(self) -> str:
         """Extract text content from either string or content parts"""
         if isinstance(self.content, str):
@@ -57,15 +58,18 @@ class ChatMessage(BaseModel):
             return "".join(text_parts)
         return str(self.content)
 # Anthropic Claude format models
 class AnthropicContentBlock(BaseModel):
     type: str = "text"
     text: str
 class AnthropicMessage(BaseModel):
     role: str  # "user" or "assistant"
     content: Union[str, List[AnthropicContentBlock]]
 class AnthropicMessagesRequest(BaseModel):
     model: str
     max_tokens: int
@@ -74,6 +78,7 @@ class AnthropicMessagesRequest(BaseModel):
     temperature: Optional[float] = 0.7
     stream: Optional[bool] = False
 class AnthropicMessagesResponse(BaseModel):
     id: str = Field(default_factory=lambda: f"msg_{uuid.uuid4()}")
     type: str = "message"
@@ -84,6 +89,7 @@ class AnthropicMessagesResponse(BaseModel):
     stop_sequence: Optional[str] = None
     usage: Dict[str, int]
 class AnthropicStreamResponse(BaseModel):
     type: str
     index: Optional[int] = None
@@ -92,6 +98,7 @@ class AnthropicStreamResponse(BaseModel):
     message: Optional[Dict[str, Any]] = None
     usage: Optional[Dict[str, int]] = None
 class ChatCompletionRequest(BaseModel):
     model: str
     messages: List[ChatMessage]
@@ -99,6 +106,7 @@ class ChatCompletionRequest(BaseModel):
     max_tokens: Optional[int] = 4000
     stream: Optional[bool] = False
 class ChatCompletionResponse(BaseModel):
     id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}")
     object: str = "chat.completion"
@@ -107,6 +115,7 @@ class ChatCompletionResponse(BaseModel):
     choices: List[Dict[str, Any]]
     usage: Dict[str, int]
 class ChatCompletionStreamResponse(BaseModel):
     id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}")
     object: str = "chat.completion.chunk"
@@ -114,6 +123,7 @@ class ChatCompletionStreamResponse(BaseModel):
     model: str
     choices: List[Dict[str, Any]]
 # Token management
 class TokenManager:
     def __init__(self):
@@ -124,7 +134,7 @@ class TokenManager:
     async def refresh_tokens(self):
         if not self.refresh_token:
             return None
         try:
             async with httpx.AsyncClient() as client:
                 response = await client.post(
@@ -133,7 +143,7 @@ class TokenManager:
                     timeout=30
                 )
                 response.raise_for_status()
                 data = response.json()
                 self.access_token = data.get("accessToken")
                 return self.access_token
@@ -144,63 +154,78 @@ class TokenManager:
     def get_token(self):
         return self.access_token
 token_manager = TokenManager()
-# Build Bedrock-style request for Kiro API
-def build_kiro_bedrock_request(messages: List[ChatMessage], max_tokens: int = 4000):
     # Extract system prompt and user messages
     system_prompt = ""
-    chat_messages = []
     for msg in messages:
         if msg.role == "system":
             system_prompt = msg.get_content_text()
         else:
-            chat_messages.append({
-                "role": msg.role,
-                "content": msg.get_content_text()
-            })
-    if not chat_messages:
         raise HTTPException(status_code=400, detail="No user messages found")
-    # Build Bedrock-style request body
-    body = {
-        "max_tokens": max_tokens,
-        "messages": chat_messages,
-        "anthropic_version": "bedrock-2023-05-31"
-    }
     if system_prompt:
-        body["system"] = system_prompt
-    # Wrap in Kiro's expected format
     return {
         "profileArn": PROFILE_ARN,
         "conversationState": {
             "chatTriggerType": "MANUAL",
-            "conversationId": str(uuid.uuid4()),
             "currentMessage": {
                 "userInputMessage": {
-                    "content": json.dumps(body),  # Send Bedrock format as content
                     "modelId": CODEWHISPERER_MODEL,
                     "origin": "AI_EDITOR",
                     "userInputMessageContext": {}
                 }
             },
-            "history": []
         }
     }
 # Convert Anthropic messages to internal ChatMessage format
 def anthropic_to_chat_messages(anthropic_request: AnthropicMessagesRequest) -> List[ChatMessage]:
     """Convert Anthropic messages format to internal ChatMessage format"""
     chat_messages = []
     # Add system message if present
     if anthropic_request.system:
         chat_messages.append(ChatMessage(role="system", content=anthropic_request.system))
     # Convert Anthropic messages
     for msg in anthropic_request.messages:
         if isinstance(msg.content, str):
@@ -212,11 +237,12 @@ def anthropic_to_chat_messages(anthropic_request: AnthropicMessagesRequest) -> L
                 if block.type == "text":
                     text_parts.append(block.text)
             content = "".join(text_parts)
         chat_messages.append(ChatMessage(role=msg.role, content=content))
     return chat_messages
 # AWS Event Stream Parser
 class AWSStreamParser:
     @staticmethod
@@ -233,12 +259,12 @@ class AWSStreamParser:
                     raw_str = raw_data.decode('utf-8', errors='ignore')
             else:
                 raw_str = str(raw_data)
             # Look for JSON content in the response
             # AWS event stream contains binary headers followed by JSON payloads
             json_pattern = r'\{[^{}]*"content"[^{}]*\}'
             matches = re.findall(json_pattern, raw_str, re.DOTALL)
             if matches:
                 content_parts = []
                 for match in matches:
@@ -250,7 +276,7 @@ class AWSStreamParser:
                         continue
                 if content_parts:
                     return {"content": ''.join(content_parts)}
             # Try to extract from AWS event stream format
             # Look for :content-type and extract JSON after headers
             content_type_pattern = r':content-type[^:]*:[^:]*:[^:]*:(\{.*\})'
@@ -263,7 +289,7 @@ class AWSStreamParser:
                             return {"content": data['content']}
                     except:
                         continue
             # Try to extract any JSON objects
             json_objects = re.findall(r'\{[^{}]*\}', raw_str)
             for obj in json_objects:
@@ -273,37 +299,37 @@ class AWSStreamParser:
                         return {"content": data['content']}
                 except:
                     continue
             # Final fallback: extract readable text
             readable_text = re.sub(r'[^\x20-\x7E\n\r\t]', '', raw_str)
             readable_text = re.sub(r':event-type[^:]*:[^:]*:[^:]*:', '', readable_text)
             # Look for Chinese characters or meaningful content
             chinese_pattern = r'[\u4e00-\u9fff]+'
             chinese_matches = re.findall(chinese_pattern, raw_str)
             if chinese_matches:
                 return {"content": ''.join(chinese_matches)}
             return {"content": readable_text.strip() or "No content found in response"}
         except Exception as e:
             return {"content": f"Error parsing response: {str(e)}"}
-# Make API call to Kiro with Bedrock-style format
-async def call_kiro_api(messages: List[ChatMessage], max_tokens: int = 4000, stream: bool = False):
     token = token_manager.get_token()
     if not token:
         raise HTTPException(status_code=401, detail="No access token available")
-    request_data = build_kiro_bedrock_request(messages, max_tokens)
     headers = {
         "Authorization": f"Bearer {token}",
         "Content-Type": "application/json",
         "Accept": "text/event-stream" if stream else "application/json"
     }
     try:
         async with httpx.AsyncClient() as client:
             response = await client.post(
@@ -312,7 +338,7 @@ async def call_kiro_api(messages: List[ChatMessage], max_tokens: int = 4000, str
                 json=request_data,
                 timeout=120
             )
             if response.status_code == 403:
                 # Try to refresh token
                 new_token = await token_manager.refresh_tokens()
@@ -324,15 +350,16 @@ async def call_kiro_api(messages: List[ChatMessage], max_tokens: int = 4000, str
                         json=request_data,
                         timeout=120
                     )
             response.raise_for_status()
             return response
     except Exception as e:
         import traceback
-        print(f"Kiro API call failed: {str(e)}")
         print(traceback.format_exc())
-        raise HTTPException(status_code=503, detail=f"Kiro API call failed: {str(e)}")
 # API endpoints
 @app.get("/v1/models")
@@ -349,125 +376,92 @@ async def list_models():
         ]
     }
 @app.post("/v1/chat/completions")
 async def create_chat_completion(request: ChatCompletionRequest):
     if request.model != MODEL_NAME:
         raise HTTPException(status_code=400, detail=f"Only {MODEL_NAME} is supported")
     if request.stream:
         return await create_streaming_response(request)
     else:
         return await create_non_streaming_response(request)
 async def create_non_streaming_response(request: ChatCompletionRequest):
-    response = await call_kiro_api(request.messages, request.max_tokens or 4000, stream=False)
-    return await create_kiro_response(response)
-# Parse Kiro response with Bedrock format
-async def create_kiro_response(response):
-    """Parse Kiro response containing Bedrock-style JSON"""
     try:
         print(f"Response status: {response.status_code}")
-        # Initialize variables
-        response_text = ""
-        usage = {"input_tokens": 0, "output_tokens": 0}
-        # Try to parse the response as JSON first
         try:
             response_data = response.json()
             print(f"Successfully parsed JSON response")
-            # Check if response contains Bedrock-style content
             if isinstance(response_data, dict) and 'content' in response_data:
-                content = response_data['content']
-                # If content is a list of content blocks (Bedrock style)
-                if isinstance(content, list) and len(content) > 0:
-                    for block in content:
-                        if isinstance(block, dict) and block.get("type") == "text":
-                            response_text += block.get("text", "")
-                elif isinstance(content, str):
-                    # Try to parse as JSON if it's a string
-                    try:
-                        bedrock_data = json.loads(content)
-                        if isinstance(bedrock_data, dict) and 'content' in bedrock_data:
-                            bedrock_content = bedrock_data['content']
-                            if isinstance(bedrock_content, list):
-                                for block in bedrock_content:
-                                    if isinstance(block, dict) and block.get("type") == "text":
-                                        response_text += block.get("text", "")
-                            else:
-                                response_text = str(bedrock_content)
-                            # Extract usage if available
-                            if 'usage' in bedrock_data:
-                                usage = bedrock_data['usage']
-                        else:
-                            response_text = content
-                    except:
-                        response_text = content
-                else:
-                    response_text = str(content)
             else:
                 response_text = str(response_data)
         except Exception as e:
             print(f"JSON parsing failed: {e}")
-            # Fallback to text content
-            response_text = response.text
         print(f"Final response text: {response_text[:200]}...")
-        return ChatCompletionResponse(
-            model=MODEL_NAME,
-            choices=[{
-                "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": response_text
-                },
-                "finish_reason": "stop"
-            }],
-            usage={
-                "prompt_tokens": usage.get("input_tokens", 0),
-                "completion_tokens": usage.get("output_tokens", 0),
-                "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
-            }
-        )
     except Exception as e:
-        print(f"Error in Kiro response conversion: {e}")
         import traceback
         traceback.print_exc()
         response_text = f"Error processing response: {str(e)}"
-        return ChatCompletionResponse(
-            model=MODEL_NAME,
-            choices=[{
-                "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": response_text
-                },
-                "finish_reason": "stop"
-            }],
-            usage={
-                "prompt_tokens": 0,
-                "completion_tokens": 0,
-                "total_tokens": 0
-            }
-        )
 async def create_streaming_response(request: ChatCompletionRequest):
-    response = await call_kiro_api(request.messages, request.max_tokens or 4000, stream=True)
-    return await create_kiro_streaming_response(response)
-async def create_kiro_streaming_response(response):
-    """Parse Kiro streaming response with Bedrock format"""
-    print(f"Starting Kiro streaming response")
     async def generate():
         # Send initial response
         initial_chunk = {
@@ -481,46 +475,41 @@ async def create_kiro_streaming_response(response):
                 'finish_reason': None
             }]
         }
         yield f"data: {json.dumps(initial_chunk)}\n\n"
         try:
-            # Read response content
-            content = ""
-            async for line in response.aiter_lines():
-                if line.startswith('data: '):
-                    data_str = line[6:]  # Remove 'data: ' prefix
-                    if data_str == '[DONE]':
-                        break
                     try:
-                        chunk_data = json.loads(data_str)
-                        # Check for Bedrock-style content_block_delta
-                        if chunk_data.get('type') == 'content_block_delta':
-                            if 'delta' in chunk_data and 'text' in chunk_data['delta']:
-                                chunk_text = chunk_data['delta']['text']
-                                content += chunk_text
-                                chunk = {
-                                    'id': f'chatcmpl-{uuid.uuid4()}',
-                                    'object': 'chat.completion.chunk',
-                                    'created': int(time.time()),
-                                    'model': MODEL_NAME,
-                                    'choices': [{
-                                        'index': 0,
-                                        'delta': {'content': chunk_text},
-                                        'finish_reason': None
-                                    }]
-                                }
-                                yield f"data: {json.dumps(chunk)}\n\n"
-                        # Handle other streaming formats
-                        elif 'content' in chunk_data:
-                            chunk_text = chunk_data['content']
                             content += chunk_text
                             chunk = {
                                 'id': f'chatcmpl-{uuid.uuid4()}',
                                 'object': 'chat.completion.chunk',
@@ -532,17 +521,72 @@ async def create_kiro_streaming_response(response):
                                     'finish_reason': None
                                 }]
                             }
                             yield f"data: {json.dumps(chunk)}\n\n"
-                    except json.JSONDecodeError:
-                        # Handle non-JSON lines
                         continue
         except Exception as e:
-            print(f"Error in Kiro streaming: {e}")
             import traceback
             traceback.print_exc()
             error_chunk = {
                 'id': f'chatcmpl-{uuid.uuid4()}',
                 'object': 'chat.completion.chunk',
@@ -555,7 +599,9 @@ async def create_kiro_streaming_response(response):
                 }]
             }
             yield f"data: {json.dumps(error_chunk)}\n\n"
         # Send final response
         final_chunk = {
             'id': f'chatcmpl-{uuid.uuid4()}',
@@ -569,22 +615,24 @@ async def create_kiro_streaming_response(response):
             }]
         }
         yield f"data: {json.dumps(final_chunk)}\n\n"
         yield "data: [DONE]\n\n"
     return StreamingResponse(generate(), media_type="text/event-stream")
 # Anthropic response conversion functions
 async def create_anthropic_response(response, model: str):
     """Convert AWS event stream to Anthropic Messages format"""
     try:
         print(f"Response status: {response.status_code}")
         print(f"Response headers: {dict(response.headers)}")
         # Get response content as bytes to handle binary data
         response_bytes = response.content
         print(f"Response content type: {type(response_bytes)}")
         print(f"Response content length: {len(response_bytes)}")
         # Try to parse as JSON first
         try:
             response_data = response.json()
@@ -599,7 +647,7 @@ async def create_anthropic_response(response, model: str):
             parsed_data = AWSStreamParser.parse_event_stream_to_json(response_bytes)
             response_text = parsed_data.get('content', "")
             print(f"Parsed content length: {len(response_text)}")
             if not response_text or response_text == "No content found in response":
                 # Last resort: try to decode as text
                 try:
@@ -607,15 +655,15 @@ async def create_anthropic_response(response, model: str):
                     print(f"Fallback text decode length: {len(response_text)}")
                 except Exception as decode_error:
                     response_text = f"Unable to decode response: {str(decode_error)}"
         print(f"Final response text: {response_text[:200]}...")
     except Exception as e:
         print(f"Error in conversion: {e}")
         import traceback
         traceback.print_exc()
         response_text = f"Error processing response: {str(e)}"
     return AnthropicMessagesResponse(
         model=model,
         content=[AnthropicContentBlock(type="text", text=response_text)],
@@ -625,10 +673,11 @@ async def create_anthropic_response(response, model: str):
         }
     )
 async def create_anthropic_streaming_response(response, model: str):
     """Convert AWS event stream to Anthropic streaming format"""
     print(f"Starting Anthropic streaming response, status: {response.status_code}")
     async def generate():
         # Send message_start event
         message_start = {
@@ -646,7 +695,7 @@ async def create_anthropic_streaming_response(response, model: str):
         }
         print(f"Sending message_start: {message_start}")
         yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
         # Send content_block_start event
         content_block_start = {
             "type": "content_block_start",
@@ -657,15 +706,15 @@ async def create_anthropic_streaming_response(response, model: str):
             }
         }
         yield f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n"
         # Read response and stream content
         content = ""
         chunk_count = 0
         # Read the entire response as bytes first
         response_bytes = response.content
         print(f"Anthropic streaming response bytes length: {len(response_bytes)}")
         # Parse the AWS event stream
         try:
             # Convert bytes to string
@@ -673,12 +722,12 @@ async def create_anthropic_streaming_response(response, model: str):
                 response_str = response_bytes.decode('utf-8', errors='ignore')
             else:
                 response_str = str(response_bytes)
             # Look for content in the AWS event stream
             # Method 1: Look for JSON objects with content
             json_pattern = r'\{[^{}]*"content"[^{}]*\}'
             json_matches = re.findall(json_pattern, response_str, re.DOTALL)
             if json_matches:
                 for match in json_matches:
                     try:
@@ -687,7 +736,7 @@ async def create_anthropic_streaming_response(response, model: str):
                             chunk_text = data['content']
                             content += chunk_text
                             chunk_count += 1
                             # Send content_block_delta event
                             content_block_delta = {
                                 "type": "content_block_delta",
@@ -699,7 +748,7 @@ async def create_anthropic_streaming_response(response, model: str):
                             }
                             print(f"Streaming Anthropic JSON chunk {chunk_count}: {chunk_text[:50]}...")
                             yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
                             # Small delay to simulate streaming
                             import asyncio
                             await asyncio.sleep(0.01)
@@ -709,20 +758,20 @@ async def create_anthropic_streaming_response(response, model: str):
             else:
                 # Method 2: Try to extract readable text
                 readable_text = re.sub(r'[^\x20-\x7E\n\r\t\u4e00-\u9fff]', '', response_str)
                 # Look for Chinese text specifically
                 chinese_pattern = r'[\u4e00-\u9fff][\u4e00-\u9fff\s\.,!?]*[\u4e00-\u9fff]'
                 chinese_matches = re.findall(chinese_pattern, response_str)
                 if chinese_matches:
                     combined_text = ''.join(chinese_matches)
                     # Split into chunks for streaming
                     chunk_size = max(1, len(combined_text) // 10)
                     for i in range(0, len(combined_text), chunk_size):
-                        chunk_text = combined_text[i:i+chunk_size]
                         content += chunk_text
                         chunk_count += 1
                         # Send content_block_delta event
                         content_block_delta = {
                             "type": "content_block_delta",
@@ -734,7 +783,7 @@ async def create_anthropic_streaming_response(response, model: str):
                         }
                         print(f"Streaming Anthropic Chinese text chunk {chunk_count}: {chunk_text[:50]}...")
                         yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
                         import asyncio
                         await asyncio.sleep(0.05)
                 else:
@@ -751,12 +800,12 @@ async def create_anthropic_streaming_response(response, model: str):
                         print(f"Streaming Anthropic fallback text: {readable_text[:100]}...")
                         yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
                         content = readable_text.strip()
         except Exception as e:
             print(f"Error in Anthropic streaming generation: {e}")
             import traceback
             traceback.print_exc()
             # Send error as content
             error_delta = {
                 "type": "content_block_delta",
@@ -767,254 +816,41 @@ async def create_anthropic_streaming_response(response, model: str):
                 }
             }
             yield f"event: content_block_delta\ndata: {json.dumps(error_delta)}\n\n"
-        print(f"Anthropic streaming complete, total chunks: {chunk_count}, content length: {len(content)}")
-        # Send content_block_stop event
-        content_block_stop = {
-            "type": "content_block_stop",
-            "index": 0
-        }
-        yield f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n"
-        # Send message_stop event
-        message_stop = {
-            "type": "message_stop"
-        }
-        yield f"event: message_stop\ndata: {json.dumps(message_stop)}\n\n"
-    return StreamingResponse(generate(), media_type="text/event-stream")
-# Anthropic response conversion functions
-async def create_anthropic_kiro_response(response, model: str):
-    """Convert Kiro response to Anthropic Messages format"""
-    try:
-        print(f"Response status: {response.status_code}")
-        # Initialize variables
-        response_text = ""
-        usage = {"input_tokens": 0, "output_tokens": 0}
-        # Try to parse the response as JSON first
-        try:
-            response_data = response.json()
-            print(f"Successfully parsed JSON response")
-            # Check if response contains Bedrock-style content
-            if isinstance(response_data, dict) and 'content' in response_data:
-                content = response_data['content']
-                # If content is a list of content blocks (Bedrock style)
-                if isinstance(content, list) and len(content) > 0:
-                    for block in content:
-                        if isinstance(block, dict) and block.get("type") == "text":
-                            response_text += block.get("text", "")
-                elif isinstance(content, str):
-                    # Try to parse as JSON if it's a string
-                    try:
-                        bedrock_data = json.loads(content)
-                        if isinstance(bedrock_data, dict) and 'content' in bedrock_data:
-                            bedrock_content = bedrock_data['content']
-                            if isinstance(bedrock_content, list):
-                                for block in bedrock_content:
-                                    if isinstance(block, dict) and block.get("type") == "text":
-                                        response_text += block.get("text", "")
-                            else:
-                                response_text = str(bedrock_content)
-                            # Extract usage if available
-                            if 'usage' in bedrock_data:
-                                usage = bedrock_data['usage']
-                        else:
-                            response_text = content
-                    except:
-                        response_text = content
-                else:
-                    response_text = str(content)
-            else:
-                response_text = str(response_data)
-        except Exception as e:
-            print(f"JSON parsing failed: {e}")
-            # Fallback to text content
-            response_text = response.text
-        print(f"Final response text: {response_text[:200]}...")
-        return AnthropicMessagesResponse(
-            model=model,
-            content=[AnthropicContentBlock(type="text", text=response_text)],
-            usage={
-                "input_tokens": usage.get("input_tokens", 0),
-                "output_tokens": usage.get("output_tokens", 0)
-            }
-        )
-    except Exception as e:
-        print(f"Error in Anthropic Kiro response conversion: {e}")
-        import traceback
-        traceback.print_exc()
-        response_text = f"Error processing response: {str(e)}"
-        return AnthropicMessagesResponse(
-            model=model,
-            content=[AnthropicContentBlock(type="text", text=response_text)],
-            usage={
-                "input_tokens": 0,
-                "output_tokens": 0
-            }
-        )
-async def create_anthropic_kiro_streaming_response(response, model: str):
-    """Convert Kiro streaming response to Anthropic streaming format"""
-    print(f"Starting Anthropic Kiro streaming response")
-    async def generate():
-        # Send message_start event
-        message_start = {
-            "type": "message_start",
-            "message": {
-                "id": f"msg_{uuid.uuid4()}",
-                "type": "message",
-                "role": "assistant",
-                "content": [],
-                "model": model,
-                "stop_reason": None,
-                "stop_sequence": None,
-                "usage": {"input_tokens": 0, "output_tokens": 0}
-            }
-        }
-        yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
-        # Send content_block_start event
-        content_block_start = {
-            "type": "content_block_start",
-            "index": 0,
-            "content_block": {
-                "type": "text",
-                "text": ""
-            }
-        }
-        yield f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n"
-        try:
-            # Read response content
-            async for line in response.aiter_lines():
-                if line.startswith('data: '):
-                    data_str = line[6:]  # Remove 'data: ' prefix
-                    if data_str == '[DONE]':
-                        break
-                    try:
-                        chunk_data = json.loads(data_str)
-                        # Check for Bedrock-style content_block_delta
-                        if chunk_data.get('type') == 'content_block_delta':
-                            if 'delta' in chunk_data and 'text' in chunk_data['delta']:
-                                chunk_text = chunk_data['delta']['text']
-                                # Send content_block_delta event
-                                content_block_delta = {
-                                    "type": "content_block_delta",
-                                    "index": 0,
-                                    "delta": {
-                                        "type": "text_delta",
-                                        "text": chunk_text
-                                    }
-                                }
-                                yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
-                        # Handle other streaming formats
-                        elif 'content' in chunk_data:
-                            chunk_text = chunk_data['content']
-                            # Send content_block_delta event
-                            content_block_delta = {
-                                "type": "content_block_delta",
-                                "index": 0,
-                                "delta": {
-                                    "type": "text_delta",
-                                    "text": chunk_text
-                                }
-                            }
-                            yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
-                    except json.JSONDecodeError:
-                        # Handle non-JSON lines
-                        continue
-        except Exception as e:
-            print(f"Error in Anthropic Kiro streaming: {e}")
-            import traceback
-            traceback.print_exc()
-            # Send error as content
-            error_delta = {
-                "type": "content_block_delta",
-                "index": 0,
-                "delta": {
-                    "type": "text_delta",
-                    "text": f"Error: {str(e)}"
-                }
-            }
-            yield f"event: content_block_delta\ndata: {json.dumps(error_delta)}\n\n"
         # Send content_block_stop event
         content_block_stop = {
             "type": "content_block_stop",
             "index": 0
         }
         yield f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n"
         # Send message_stop event
         message_stop = {
             "type": "message_stop"
         }
         yield f"event: message_stop\ndata: {json.dumps(message_stop)}\n\n"
     return StreamingResponse(generate(), media_type="text/event-stream")
 # API endpoints
 @app.post("/v1/messages")
-async def create_messages(request: AnthropicMessagesRequest, http_request: Request):
-    # Check authentication - support both Authorization Bearer and x-api-key headers
-    auth_token = None
-    auth_header = http_request.headers.get("Authorization")
-    api_key_header = http_request.headers.get("x-api-key")
-    if auth_header and auth_header.startswith("Bearer "):
-        auth_token = auth_header[7:]
-    elif api_key_header:
-        auth_token = api_key_header
-    if not auth_token or auth_token != API_KEY:
-        raise HTTPException(status_code=401, detail="Invalid API key")
-    # Support both claude-sonnet-4-20250514 and claude-opus-4-20250514 for compatibility
-    supported_models = [MODEL_NAME, "claude-opus-4-20250514"]
-    if request.model not in supported_models:
-        raise HTTPException(status_code=400, detail=f"Only {', '.join(supported_models)} are supported")
-    # Log headers for debugging
-    anthropic_version = http_request.headers.get("anthropic-version")
-    if anthropic_version:
-        print(f"Anthropic version header: {anthropic_version}")
-    # Log URL query parameters for debugging (but ignore them in processing)
-    if http_request.query_params:
-        print(f"Ignoring query parameters: {dict(http_request.query_params)}")
     # Convert Anthropic format to internal ChatMessage format
     chat_messages = anthropic_to_chat_messages(request)
     # Call the Kiro API
-    response = await call_kiro_api(chat_messages, request.max_tokens, stream=request.stream)
     if request.stream:
-        return await create_anthropic_kiro_streaming_response(response, request.model)
     else:
-        return await create_anthropic_kiro_response(response, request.model)
 # Health check
@@ -1022,6 +858,8 @@ async def create_messages(request: AnthropicMessagesRequest, http_request: Reque
 async def health_check():
     return {"status": "ok", "service": "ki2api"}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

     type: str = "text"
     text: str
 class ChatMessage(BaseModel):
     role: str
     content: Union[str, List[ContentPart]]
     def get_content_text(self) -> str:
         """Extract text content from either string or content parts"""
         if isinstance(self.content, str):
             return "".join(text_parts)
         return str(self.content)
 # Anthropic Claude format models
 class AnthropicContentBlock(BaseModel):
     type: str = "text"
     text: str
 class AnthropicMessage(BaseModel):
     role: str  # "user" or "assistant"
     content: Union[str, List[AnthropicContentBlock]]
 class AnthropicMessagesRequest(BaseModel):
     model: str
     max_tokens: int
     temperature: Optional[float] = 0.7
     stream: Optional[bool] = False
 class AnthropicMessagesResponse(BaseModel):
     id: str = Field(default_factory=lambda: f"msg_{uuid.uuid4()}")
     type: str = "message"
     stop_sequence: Optional[str] = None
     usage: Dict[str, int]
 class AnthropicStreamResponse(BaseModel):
     type: str
     index: Optional[int] = None
     message: Optional[Dict[str, Any]] = None
     usage: Optional[Dict[str, int]] = None
 class ChatCompletionRequest(BaseModel):
     model: str
     messages: List[ChatMessage]
     max_tokens: Optional[int] = 4000
     stream: Optional[bool] = False
 class ChatCompletionResponse(BaseModel):
     id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}")
     object: str = "chat.completion"
     choices: List[Dict[str, Any]]
     usage: Dict[str, int]
 class ChatCompletionStreamResponse(BaseModel):
     id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}")
     object: str = "chat.completion.chunk"
     model: str
     choices: List[Dict[str, Any]]
 # Token management
 class TokenManager:
     def __init__(self):
     async def refresh_tokens(self):
         if not self.refresh_token:
             return None
         try:
             async with httpx.AsyncClient() as client:
                 response = await client.post(
                     timeout=30
                 )
                 response.raise_for_status()
                 data = response.json()
                 self.access_token = data.get("accessToken")
                 return self.access_token
     def get_token(self):
         return self.access_token
 token_manager = TokenManager()
+# Build CodeWhisperer request
+def build_codewhisperer_request(messages: List[ChatMessage]):
+    conversation_id = str(uuid.uuid4())
     # Extract system prompt and user messages
     system_prompt = ""
+    user_messages = []
     for msg in messages:
         if msg.role == "system":
             system_prompt = msg.get_content_text()
         else:
+            user_messages.append(msg)
+    if not user_messages:
         raise HTTPException(status_code=400, detail="No user messages found")
+    # Build history
+    history = []
+    for i in range(0, len(user_messages) - 1, 2):
+        if i + 1 < len(user_messages):
+            history.append({
+                "userInputMessage": {
+                    "content": user_messages[i].get_content_text(),
+                    "modelId": CODEWHISPERER_MODEL,
+                    "origin": "AI_EDITOR"
+                }
+            })
+            history.append({
+                "assistantResponseMessage": {
+                    "content": user_messages[i + 1].get_content_text(),
+                    "toolUses": []
+                }
+            })
+    # Build current message
+    current_message = user_messages[-1]
+    content = current_message.get_content_text()
     if system_prompt:
+        content = f"{system_prompt}\n\n{content}"
     return {
         "profileArn": PROFILE_ARN,
         "conversationState": {
             "chatTriggerType": "MANUAL",
+            "conversationId": conversation_id,
             "currentMessage": {
                 "userInputMessage": {
+                    "content": content,
                     "modelId": CODEWHISPERER_MODEL,
                     "origin": "AI_EDITOR",
                     "userInputMessageContext": {}
                 }
             },
+            "history": history
         }
     }
 # Convert Anthropic messages to internal ChatMessage format
 def anthropic_to_chat_messages(anthropic_request: AnthropicMessagesRequest) -> List[ChatMessage]:
     """Convert Anthropic messages format to internal ChatMessage format"""
     chat_messages = []
     # Add system message if present
     if anthropic_request.system:
         chat_messages.append(ChatMessage(role="system", content=anthropic_request.system))
     # Convert Anthropic messages
     for msg in anthropic_request.messages:
         if isinstance(msg.content, str):
                 if block.type == "text":
                     text_parts.append(block.text)
             content = "".join(text_parts)
         chat_messages.append(ChatMessage(role=msg.role, content=content))
     return chat_messages
 # AWS Event Stream Parser
 class AWSStreamParser:
     @staticmethod
                     raw_str = raw_data.decode('utf-8', errors='ignore')
             else:
                 raw_str = str(raw_data)
             # Look for JSON content in the response
             # AWS event stream contains binary headers followed by JSON payloads
             json_pattern = r'\{[^{}]*"content"[^{}]*\}'
             matches = re.findall(json_pattern, raw_str, re.DOTALL)
             if matches:
                 content_parts = []
                 for match in matches:
                         continue
                 if content_parts:
                     return {"content": ''.join(content_parts)}
             # Try to extract from AWS event stream format
             # Look for :content-type and extract JSON after headers
             content_type_pattern = r':content-type[^:]*:[^:]*:[^:]*:(\{.*\})'
                             return {"content": data['content']}
                     except:
                         continue
             # Try to extract any JSON objects
             json_objects = re.findall(r'\{[^{}]*\}', raw_str)
             for obj in json_objects:
                         return {"content": data['content']}
                 except:
                     continue
             # Final fallback: extract readable text
             readable_text = re.sub(r'[^\x20-\x7E\n\r\t]', '', raw_str)
             readable_text = re.sub(r':event-type[^:]*:[^:]*:[^:]*:', '', readable_text)
             # Look for Chinese characters or meaningful content
             chinese_pattern = r'[\u4e00-\u9fff]+'
             chinese_matches = re.findall(chinese_pattern, raw_str)
             if chinese_matches:
                 return {"content": ''.join(chinese_matches)}
             return {"content": readable_text.strip() or "No content found in response"}
         except Exception as e:
             return {"content": f"Error parsing response: {str(e)}"}
+# Make API call to Kiro/CodeWhisperer
+async def call_kiro_api(messages: List[ChatMessage], stream: bool = False):
     token = token_manager.get_token()
     if not token:
         raise HTTPException(status_code=401, detail="No access token available")
+    request_data = build_codewhisperer_request(messages)
     headers = {
         "Authorization": f"Bearer {token}",
         "Content-Type": "application/json",
         "Accept": "text/event-stream" if stream else "application/json"
     }
     try:
         async with httpx.AsyncClient() as client:
             response = await client.post(
                 json=request_data,
                 timeout=120
             )
             if response.status_code == 403:
                 # Try to refresh token
                 new_token = await token_manager.refresh_tokens()
                         json=request_data,
                         timeout=120
                     )
             response.raise_for_status()
             return response
     except Exception as e:
         import traceback
+        print(f"API call failed: {str(e)}")
         print(traceback.format_exc())
+        raise HTTPException(status_code=503, detail=f"API call failed: {str(e)}")
 # API endpoints
 @app.get("/v1/models")
         ]
     }
 @app.post("/v1/chat/completions")
 async def create_chat_completion(request: ChatCompletionRequest):
     if request.model != MODEL_NAME:
         raise HTTPException(status_code=400, detail=f"Only {MODEL_NAME} is supported")
     if request.stream:
         return await create_streaming_response(request)
     else:
         return await create_non_streaming_response(request)
 async def create_non_streaming_response(request: ChatCompletionRequest):
+    response = await call_kiro_api(request.messages, stream=False)
+    return await create_conversion_response(response)
+async def create_conversion_response(response):
+    """Convert AWS event stream to OpenAI format"""
     try:
         print(f"Response status: {response.status_code}")
+        print(f"Response headers: {dict(response.headers)}")
+        # Get response content as bytes to handle binary data
+        response_bytes = response.content
+        print(f"Response content type: {type(response_bytes)}")
+        print(f"Response content length: {len(response_bytes)}")
+        # Try to parse as JSON first
         try:
             response_data = response.json()
             print(f"Successfully parsed JSON response")
             if isinstance(response_data, dict) and 'content' in response_data:
+                response_text = response_data['content']
             else:
                 response_text = str(response_data)
         except Exception as e:
             print(f"JSON parsing failed: {e}")
+            # Handle event stream format using AWS parser
+            parsed_data = AWSStreamParser.parse_event_stream_to_json(response_bytes)
+            response_text = parsed_data.get('content', "")
+            print(f"Parsed content length: {len(response_text)}")
+            if not response_text or response_text == "No content found in response":
+                # Last resort: try to decode as text
+                try:
+                    response_text = response_bytes.decode('utf-8', errors='ignore')
+                    print(f"Fallback text decode length: {len(response_text)}")
+                except Exception as decode_error:
+                    response_text = f"Unable to decode response: {str(decode_error)}"
         print(f"Final response text: {response_text[:200]}...")
     except Exception as e:
+        print(f"Error in conversion: {e}")
         import traceback
         traceback.print_exc()
         response_text = f"Error processing response: {str(e)}"
+    return ChatCompletionResponse(
+        model=MODEL_NAME,
+        choices=[{
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": response_text
+            },
+            "finish_reason": "stop"
+        }],
+        usage={
+            "prompt_tokens": 0,
+            "completion_tokens": 0,
+            "total_tokens": 0
+        }
+    )
 async def create_streaming_response(request: ChatCompletionRequest):
+    response = await call_kiro_api(request.messages, stream=True)
+    return await create_streaming_conversion_response(response)
+async def create_streaming_conversion_response(response):
+    """Convert AWS event stream to OpenAI streaming format"""
+    print(f"Starting streaming response, status: {response.status_code}")
     async def generate():
         # Send initial response
         initial_chunk = {
                 'finish_reason': None
             }]
         }
+        print(f"Sending initial chunk: {initial_chunk}")
         yield f"data: {json.dumps(initial_chunk)}\n\n"
+        # Read response and stream content
+        content = ""
+        chunk_count = 0
+        # Read the entire response as bytes first
+        response_bytes = response.content
+        print(f"Streaming response bytes length: {len(response_bytes)}")
+        # Parse the AWS event stream
         try:
+            # Convert bytes to string
+            if isinstance(response_bytes, bytes):
+                response_str = response_bytes.decode('utf-8', errors='ignore')
+            else:
+                response_str = str(response_bytes)
+            # Look for content in the AWS event stream
+            # AWS uses a specific format with binary headers and JSON payloads
+            # Method 1: Look for JSON objects with content
+            json_pattern = r'\{[^{}]*"content"[^{}]*\}'
+            json_matches = re.findall(json_pattern, response_str, re.DOTALL)
+            if json_matches:
+                for match in json_matches:
                     try:
+                        data = json.loads(match)
+                        if 'content' in data and data['content']:
+                            chunk_text = data['content']
                             content += chunk_text
+                            chunk_count += 1
                             chunk = {
                                 'id': f'chatcmpl-{uuid.uuid4()}',
                                 'object': 'chat.completion.chunk',
                                     'finish_reason': None
                                 }]
                             }
+                            print(f"Streaming JSON chunk {chunk_count}: {chunk_text[:50]}...")
                             yield f"data: {json.dumps(chunk)}\n\n"
+                            # Small delay to simulate streaming
+                            import asyncio
+                            await asyncio.sleep(0.01)
+                    except Exception as e:
+                        print(f"Error streaming JSON chunk: {e}")
                         continue
+            else:
+                # Method 2: Try to extract readable text
+                readable_text = re.sub(r'[^\x20-\x7E\n\r\t\u4e00-\u9fff]', '', response_str)
+                # Look for Chinese text specifically
+                chinese_pattern = r'[\u4e00-\u9fff][\u4e00-\u9fff\s\.,!?]*[\u4e00-\u9fff]'
+                chinese_matches = re.findall(chinese_pattern, response_str)
+                if chinese_matches:
+                    combined_text = ''.join(chinese_matches)
+                    # Split into chunks for streaming
+                    chunk_size = max(1, len(combined_text) // 10)
+                    for i in range(0, len(combined_text), chunk_size):
+                        chunk_text = combined_text[i:i + chunk_size]
+                        content += chunk_text
+                        chunk_count += 1
+                        chunk = {
+                            'id': f'chatcmpl-{uuid.uuid4()}',
+                            'object': 'chat.completion.chunk',
+                            'created': int(time.time()),
+                            'model': MODEL_NAME,
+                            'choices': [{
+                                'index': 0,
+                                'delta': {'content': chunk_text},
+                                'finish_reason': None
+                            }]
+                        }
+                        print(f"Streaming Chinese text chunk {chunk_count}: {chunk_text[:50]}...")
+                        yield f"data: {json.dumps(chunk)}\n\n"
+                        import asyncio
+                        await asyncio.sleep(0.05)
+                else:
+                    # Method 3: Use the entire readable text
+                    if readable_text.strip():
+                        chunk = {
+                            'id': f'chatcmpl-{uuid.uuid4()}',
+                            'object': 'chat.completion.chunk',
+                            'created': int(time.time()),
+                            'model': MODEL_NAME,
+                            'choices': [{
+                                'index': 0,
+                                'delta': {'content': readable_text.strip()},
+                                'finish_reason': None
+                            }]
+                        }
+                        print(f"Streaming fallback text: {readable_text[:100]}...")
+                        yield f"data: {json.dumps(chunk)}\n\n"
+                        content = readable_text.strip()
         except Exception as e:
+            print(f"Error in streaming generation: {e}")
             import traceback
             traceback.print_exc()
+            # Send error as content
             error_chunk = {
                 'id': f'chatcmpl-{uuid.uuid4()}',
                 'object': 'chat.completion.chunk',
                 }]
             }
             yield f"data: {json.dumps(error_chunk)}\n\n"
+        print(f"Streaming complete, total chunks: {chunk_count}, content length: {len(content)}")
         # Send final response
         final_chunk = {
             'id': f'chatcmpl-{uuid.uuid4()}',
             }]
         }
         yield f"data: {json.dumps(final_chunk)}\n\n"
         yield "data: [DONE]\n\n"
     return StreamingResponse(generate(), media_type="text/event-stream")
 # Anthropic response conversion functions
 async def create_anthropic_response(response, model: str):
     """Convert AWS event stream to Anthropic Messages format"""
     try:
         print(f"Response status: {response.status_code}")
         print(f"Response headers: {dict(response.headers)}")
         # Get response content as bytes to handle binary data
         response_bytes = response.content
         print(f"Response content type: {type(response_bytes)}")
         print(f"Response content length: {len(response_bytes)}")
         # Try to parse as JSON first
         try:
             response_data = response.json()
             parsed_data = AWSStreamParser.parse_event_stream_to_json(response_bytes)
             response_text = parsed_data.get('content', "")
             print(f"Parsed content length: {len(response_text)}")
             if not response_text or response_text == "No content found in response":
                 # Last resort: try to decode as text
                 try:
                     print(f"Fallback text decode length: {len(response_text)}")
                 except Exception as decode_error:
                     response_text = f"Unable to decode response: {str(decode_error)}"
         print(f"Final response text: {response_text[:200]}...")
     except Exception as e:
         print(f"Error in conversion: {e}")
         import traceback
         traceback.print_exc()
         response_text = f"Error processing response: {str(e)}"
     return AnthropicMessagesResponse(
         model=model,
         content=[AnthropicContentBlock(type="text", text=response_text)],
         }
     )
 async def create_anthropic_streaming_response(response, model: str):
     """Convert AWS event stream to Anthropic streaming format"""
     print(f"Starting Anthropic streaming response, status: {response.status_code}")
     async def generate():
         # Send message_start event
         message_start = {
         }
         print(f"Sending message_start: {message_start}")
         yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
         # Send content_block_start event
         content_block_start = {
             "type": "content_block_start",
             }
         }
         yield f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n"
         # Read response and stream content
         content = ""
         chunk_count = 0
         # Read the entire response as bytes first
         response_bytes = response.content
         print(f"Anthropic streaming response bytes length: {len(response_bytes)}")
         # Parse the AWS event stream
         try:
             # Convert bytes to string
                 response_str = response_bytes.decode('utf-8', errors='ignore')
             else:
                 response_str = str(response_bytes)
             # Look for content in the AWS event stream
             # Method 1: Look for JSON objects with content
             json_pattern = r'\{[^{}]*"content"[^{}]*\}'
             json_matches = re.findall(json_pattern, response_str, re.DOTALL)
             if json_matches:
                 for match in json_matches:
                     try:
                             chunk_text = data['content']
                             content += chunk_text
                             chunk_count += 1
                             # Send content_block_delta event
                             content_block_delta = {
                                 "type": "content_block_delta",
                             }
                             print(f"Streaming Anthropic JSON chunk {chunk_count}: {chunk_text[:50]}...")
                             yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
                             # Small delay to simulate streaming
                             import asyncio
                             await asyncio.sleep(0.01)
             else:
                 # Method 2: Try to extract readable text
                 readable_text = re.sub(r'[^\x20-\x7E\n\r\t\u4e00-\u9fff]', '', response_str)
                 # Look for Chinese text specifically
                 chinese_pattern = r'[\u4e00-\u9fff][\u4e00-\u9fff\s\.,!?]*[\u4e00-\u9fff]'
                 chinese_matches = re.findall(chinese_pattern, response_str)
                 if chinese_matches:
                     combined_text = ''.join(chinese_matches)
                     # Split into chunks for streaming
                     chunk_size = max(1, len(combined_text) // 10)
                     for i in range(0, len(combined_text), chunk_size):
+                        chunk_text = combined_text[i:i + chunk_size]
                         content += chunk_text
                         chunk_count += 1
                         # Send content_block_delta event
                         content_block_delta = {
                             "type": "content_block_delta",
                         }
                         print(f"Streaming Anthropic Chinese text chunk {chunk_count}: {chunk_text[:50]}...")
                         yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
                         import asyncio
                         await asyncio.sleep(0.05)
                 else:
                         print(f"Streaming Anthropic fallback text: {readable_text[:100]}...")
                         yield f"event: content_block_delta\ndata: {json.dumps(content_block_delta)}\n\n"
                         content = readable_text.strip()
         except Exception as e:
             print(f"Error in Anthropic streaming generation: {e}")
             import traceback
             traceback.print_exc()
             # Send error as content
             error_delta = {
                 "type": "content_block_delta",
                 }
             }
             yield f"event: content_block_delta\ndata: {json.dumps(error_delta)}\n\n"
+        print(f"Anthropic streaming complete, total chunks: {chunk_count}, content length: {len(content)}")
         # Send content_block_stop event
         content_block_stop = {
             "type": "content_block_stop",
             "index": 0
         }
         yield f"event: content_block_stop\ndata: {json.dumps(content_block_stop)}\n\n"
         # Send message_stop event
         message_stop = {
             "type": "message_stop"
         }
         yield f"event: message_stop\ndata: {json.dumps(message_stop)}\n\n"
     return StreamingResponse(generate(), media_type="text/event-stream")
 # API endpoints
 @app.post("/v1/messages")
+async def create_messages(request: AnthropicMessagesRequest):
+    if request.model != MODEL_NAME:
+        raise HTTPException(status_code=400, detail=f"Only {MODEL_NAME} is supported")
     # Convert Anthropic format to internal ChatMessage format
     chat_messages = anthropic_to_chat_messages(request)
     # Call the Kiro API
+    response = await call_kiro_api(chat_messages, stream=request.stream)
     if request.stream:
+        return await create_anthropic_streaming_response(response, request.model)
     else:
+        return await create_anthropic_response(response, request.model)
 # Health check
 async def health_check():
     return {"status": "ok", "service": "ki2api"}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)