| import os |
| import time |
| import json |
| import asyncio |
| from typing import List, Optional, Dict, Any, Union, Literal |
| from pydantic import BaseModel, Field |
| from fastapi import FastAPI, HTTPException, Request, Response |
| from fastapi.middleware.cors import CORSMiddleware |
| from sse_starlette.sse import EventSourceResponse |
| from duckai import DuckAI |
|
|
| |
| SUPPORTED_MODELS = [ |
| "gpt-4o-mini", |
| "llama-3.3-70b", |
| "claude-3-haiku", |
| "o3-mini", |
| "mistral-small-3" |
| ] |
|
|
| app = FastAPI(title="DuckAI OpenAI Adapter API") |
|
|
| |
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| class Message(BaseModel): |
| role: Literal["system", "user", "assistant"] |
| content: str |
|
|
| class ChatCompletionRequest(BaseModel): |
| model: str |
| messages: List[Message] |
| temperature: Optional[float] = 1.0 |
| max_tokens: Optional[int] = None |
| stream: Optional[bool] = False |
| |
| class ChatCompletionResponse(BaseModel): |
| id: str = Field(default_factory=lambda: f"chatcmpl-{os.urandom(12).hex()}") |
| object: str = "chat.completion" |
| created: int = Field(default_factory=lambda: int(time.time())) |
| model: str |
| choices: List[Dict[str, Any]] |
| usage: Dict[str, int] |
|
|
| def format_chat_history(messages: List[Message]) -> str: |
| """ |
| Chuyển đổi danh sách tin nhắn từ định dạng OpenAI sang định dạng |
| mà DuckAI có thể xử lý (string với các dòng "user: " và "assistant: ") |
| """ |
| formatted_history = "" |
| |
| for message in messages: |
| if message.role == "system": |
| |
| formatted_history += f"user: [SYSTEM] {message.content}\n" |
| else: |
| formatted_history += f"{message.role}: {message.content}\n" |
| |
| return formatted_history.strip() |
|
|
| async def stream_response_character_by_character(content: str, request: Request, response_id: str, model: str): |
| """ |
| Generator để stream phản hồi theo từng ký tự với tốc độ phù hợp |
| """ |
| if await request.is_disconnected(): |
| return |
| |
| |
| data = { |
| "id": response_id, |
| "object": "chat.completion.chunk", |
| "created": int(time.time()), |
| "model": model, |
| "choices": [ |
| { |
| "index": 0, |
| "delta": { |
| "role": "assistant", |
| }, |
| "finish_reason": None |
| } |
| ] |
| } |
| yield json.dumps(data) |
| await asyncio.sleep(0.001) |
| |
| |
| buffer = "" |
| for char in content: |
| if await request.is_disconnected(): |
| break |
| |
| buffer += char |
| |
| |
| |
| if len(buffer) >= 3 or char in [' ', '\n', '.', '!', '?', ',']: |
| data = { |
| "id": response_id, |
| "object": "chat.completion.chunk", |
| "created": int(time.time()), |
| "model": model, |
| "choices": [ |
| { |
| "index": 0, |
| "delta": { |
| "content": buffer |
| }, |
| "finish_reason": None |
| } |
| ] |
| } |
| yield json.dumps(data) |
| buffer = "" |
| |
| |
| |
| delay = 0.05 if char == '\n' else 0.01 |
| await asyncio.sleep(delay) |
| |
| |
| if buffer: |
| data = { |
| "id": response_id, |
| "object": "chat.completion.chunk", |
| "created": int(time.time()), |
| "model": model, |
| "choices": [ |
| { |
| "index": 0, |
| "delta": { |
| "content": buffer |
| }, |
| "finish_reason": None |
| } |
| ] |
| } |
| yield json.dumps(data) |
| |
| |
| data = { |
| "id": response_id, |
| "object": "chat.completion.chunk", |
| "created": int(time.time()), |
| "model": model, |
| "choices": [ |
| { |
| "index": 0, |
| "delta": {}, |
| "finish_reason": "stop" |
| } |
| ] |
| } |
| yield json.dumps(data) |
| |
| |
| yield "[DONE]" |
|
|
| @app.post("/v1/chat/completions") |
| async def create_chat_completion(request: Request, response: Response): |
| """ |
| Endpoint tạo chat completion với khả năng streaming |
| """ |
| try: |
| |
| body = await request.json() |
| |
| |
| completion_request = ChatCompletionRequest(**body) |
| |
| |
| if completion_request.model not in SUPPORTED_MODELS: |
| supported_models_str = ", ".join(SUPPORTED_MODELS) |
| raise HTTPException( |
| status_code=400, |
| detail=f"Model '{completion_request.model}' không được hỗ trợ. Các models được hỗ trợ: {supported_models_str}" |
| ) |
| |
| |
| chat_history = format_chat_history(completion_request.messages) |
| |
| |
| response_id = f"chatcmpl-{os.urandom(12).hex()}" |
| |
| |
| duck_response = DuckAI().chat(chat_history, model=completion_request.model) |
| duck_response = duck_response.strip() |
| |
| |
| if completion_request.stream: |
| return EventSourceResponse( |
| stream_response_character_by_character( |
| duck_response, |
| request, |
| response_id, |
| completion_request.model |
| ), |
| media_type="text/event-stream" |
| ) |
| |
| |
| response_data = { |
| "id": response_id, |
| "object": "chat.completion", |
| "created": int(time.time()), |
| "model": completion_request.model, |
| "choices": [ |
| { |
| "index": 0, |
| "message": { |
| "role": "assistant", |
| "content": duck_response |
| }, |
| "finish_reason": "stop" |
| } |
| ], |
| "usage": { |
| "prompt_tokens": len(chat_history) // 4, |
| "completion_tokens": len(duck_response) // 4, |
| "total_tokens": (len(chat_history) + len(duck_response)) // 4 |
| } |
| } |
| |
| return response_data |
| |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=f"Error: {str(e)}") |
|
|
| @app.get("/v1/models") |
| async def list_models(): |
| """Endpoint để liệt kê các model được hỗ trợ""" |
| models_data = [] |
| |
| for model_id in SUPPORTED_MODELS: |
| models_data.append({ |
| "id": model_id, |
| "object": "model", |
| "created": int(time.time()), |
| "owned_by": "duckai" |
| }) |
| |
| return { |
| "object": "list", |
| "data": models_data |
| } |
|
|
| @app.get("/") |
| async def root(): |
| return { |
| "message": "DuckAI OpenAI Adapter API is running. Send requests to /v1/chat/completions", |
| "supported_models": SUPPORTED_MODELS |
| } |
|
|
| if __name__ == "__main__": |
| import uvicorn |
| |
| uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True) |