| from fastapi import FastAPI, Request |
| from fastapi.responses import JSONResponse, StreamingResponse |
| import uvicorn |
| import json |
| import time |
| import random |
| import string |
| import httpx |
| import re |
|
|
| app = FastAPI() |
|
|
| MODEL = "llama3.1-8B" |
| PORT = 3000 |
|
|
|
|
| def generate_id(): |
| return "chatcmpl-" + "".join(random.choices(string.ascii_lowercase + string.digits, k=13)) |
|
|
|
|
| def get_timestamp(): |
| return int(time.time()) |
|
|
|
|
| @app.get("/") |
| async def root(): |
| return "hello world" |
|
|
|
|
| @app.get("/v1/models") |
| async def list_models(): |
| return {"object": "list", "data": [{"id": MODEL, "object": "model"}]} |
|
|
|
|
| @app.get("/models") |
| async def list_models_alt(): |
| return {"object": "list", "data": [{"id": MODEL, "object": "model"}]} |
|
|
|
|
| async def stream_chat_completion(messages, system_prompt, top_k): |
| chat_id = generate_id() |
| created = get_timestamp() |
|
|
| |
| yield f"data: {json.dumps({'id': chat_id, 'object': 'chat.completion.chunk', 'created': created, 'model': MODEL, 'choices': [{'index': 0, 'delta': {'role': 'assistant'}, 'finish_reason': None}]})}\n\n" |
|
|
| |
| async with httpx.AsyncClient() as client: |
| response = await client.post( |
| "https://chatjimmy.ai/api/chat", |
| json={ |
| "messages": messages, |
| "chatOptions": {"selectedModel": MODEL, "systemPrompt": system_prompt, "topK": top_k}, |
| "attachment": None, |
| }, |
| ) |
|
|
| text = response.text |
| content = re.sub(r"<\|stats\|>[\s\S]*?<\|/stats\|>", "", text).strip() |
|
|
| |
| yield f"data: {json.dumps({'id': chat_id, 'object': 'chat.completion.chunk', 'created': created, 'model': MODEL, 'choices': [{'index': 0, 'delta': {'content': content}, 'finish_reason': None}]})}\n\n" |
|
|
| |
| yield f"data: {json.dumps({'id': chat_id, 'object': 'chat.completion.chunk', 'created': created, 'model': MODEL, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n" |
|
|
| yield "data: [DONE]\n\n" |
|
|
|
|
| @app.post("/v1/chat/completions") |
| async def chat_completions(request: Request): |
| try: |
| body = await request.json() |
| messages = body.get("messages", []) |
| stream = body.get("stream", False) |
| top_k = body.get("topK", 8) |
|
|
| if not messages: |
| return JSONResponse(status_code=400, content={"error": {"message": "messages required"}}) |
|
|
| system_prompt = "" |
| user_messages = [] |
|
|
| for m in messages: |
| if m.get("role") == "system": |
| system_prompt = m.get("content", "") |
| else: |
| user_messages.append(m) |
|
|
| if stream: |
| return StreamingResponse(stream_chat_completion(user_messages, system_prompt, top_k), media_type="text/event-stream") |
| else: |
| async with httpx.AsyncClient() as client: |
| response = await client.post( |
| "https://chatjimmy.ai/api/chat", |
| json={ |
| "messages": user_messages, |
| "chatOptions": {"selectedModel": MODEL, "systemPrompt": system_prompt, "topK": top_k}, |
| "attachment": None, |
| }, |
| ) |
|
|
| text = response.text |
| content = re.sub(r"<\|stats\|>[\s\S]*?<\|/stats\|>", "", text).strip() |
|
|
| return { |
| "id": generate_id(), |
| "object": "chat.completion", |
| "created": get_timestamp(), |
| "model": MODEL, |
| "choices": [{"index": 0, "message": {"role": "assistant", "content": content}, "finish_reason": "stop"}], |
| } |
|
|
| except Exception as e: |
| return JSONResponse(status_code=500, content={"error": {"message": str(e)}}) |
|
|
|
|
| @app.post("/chat/completions") |
| async def chat_completions_alt(request: Request): |
| return await chat_completions(request) |
|
|
|
|
| if __name__ == "__main__": |
| uvicorn.run(app, host="0.0.0.0", port=PORT) |
|
|