cotcotquedec commited on
Commit
2a473f0
·
1 Parent(s): 8131ac3

feat(app): introduce FastAPI application with Docker support

Browse files

This commit introduces a new FastAPI application setup with Docker support. It includes the following changes:

- **Dockerfile**: A new Dockerfile is added to facilitate containerization of the application. It sets up a Python 3.12-slim environment, installs necessary system and Python dependencies, and configures the application to run using Uvicorn on port 9099.

- **app.py**: A new FastAPI application is created with endpoints to check server status (`/ping`), retrieve available Anthropic models (`/models`), and generate chat completions (`/v1/chat/completions`). The application leverages the Anthropic API for generating chat completions and supports both streaming and non-streaming responses.

- **requirements.txt**: A new requirements file is added listing the necessary Python packages: FastAPI, Uvicorn, Pydantic, and Anthropic.

These changes lay the foundation for deploying a scalable and containerized chat completion service using FastAPI and Docker.

Files changed (3) hide show
  1. Dockerfile +21 -0
  2. app.py +153 -0
  3. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ curl \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Install Python dependencies
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy application files
15
+ COPY . .
16
+
17
+ # Expose the port the app runs on
18
+ EXPOSE 9099
19
+
20
+ # Command to run the application
21
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "9099"]
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import FastAPI, HTTPException
3
+ from fastapi.responses import JSONResponse, StreamingResponse
4
+ from pydantic import BaseModel
5
+ from typing import List, Optional
6
+ from anthropic import Anthropic
7
+ import json
8
+ import time
9
+
10
+ app = FastAPI()
11
+
12
+ # Initialize Anthropic client with environment variable
13
+ client = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
14
+
15
+ # Available models
16
+ AVAILABLE_MODELS = [
17
+ "claude-3-haiku-20240307",
18
+ "claude-3-opus-20240229",
19
+ "claude-3-sonnet-20240229",
20
+ "claude-3-5-sonnet-20241022"
21
+ ]
22
+
23
+ class Message(BaseModel):
24
+ role: str
25
+ content: str
26
+
27
+ class ChatCompletionRequest(BaseModel):
28
+ model: str
29
+ messages: List[Message]
30
+ stream: bool = False
31
+ max_tokens: Optional[int] = 1024
32
+
33
+
34
+
35
+ @app.get("/ping")
36
+ def pong():
37
+ return "Pong"
38
+
39
+ @app.get("/models")
40
+ async def get_models():
41
+ """Get available Anthropic models."""
42
+ models = [
43
+ {
44
+ "id": model_id,
45
+ "object": "model",
46
+ "name": f"🤖 {model_id}",
47
+ "created": int(time.time()),
48
+ "owned_by": "anthropic",
49
+ "pipeline": {"type": "custom", "valves": False}
50
+ }
51
+ for model_id in AVAILABLE_MODELS
52
+ ]
53
+
54
+ return JSONResponse(
55
+ content={
56
+ "data": models,
57
+ "object": "list",
58
+ "pipelines": True,
59
+ }
60
+ )
61
+
62
+ return {"data": models, "object": "list"}
63
+
64
+ @app.post("/v1/chat/completions")
65
+ async def create_chat_completion(request: ChatCompletionRequest):
66
+ """Generate chat completions using Anthropic models."""
67
+ try:
68
+ if request.stream:
69
+ return StreamingResponse(
70
+ stream_response(request),
71
+ media_type="text/event-stream"
72
+ )
73
+ else:
74
+ return await generate_completion(request)
75
+ except Exception as e:
76
+ raise HTTPException(status_code=500, detail=str(e))
77
+
78
+
79
+
80
+ async def generate_completion(request: ChatCompletionRequest):
81
+ """Generate a non-streaming completion."""
82
+ messages = [{"role": m.role, "content": m.content} for m in request.messages]
83
+
84
+ response = client.messages.create(
85
+ model=request.model,
86
+ max_tokens=request.max_tokens,
87
+ messages=messages
88
+ )
89
+
90
+ return {
91
+ "id": response.id,
92
+ "object": "chat.completion",
93
+ "created": int(time.time()),
94
+ "model": request.model,
95
+ "choices": [{
96
+ "index": 0,
97
+ "message": {
98
+ "role": "assistant",
99
+ "content": response.content[0].text if response.content else "",
100
+ },
101
+ "finish_reason": "stop"
102
+ }],
103
+ "usage": {
104
+ "prompt_tokens": response.usage.input_tokens,
105
+ "completion_tokens": response.usage.output_tokens,
106
+ "total_tokens": response.usage.input_tokens + response.usage.output_tokens
107
+ }
108
+ }
109
+
110
+ async def stream_response(request: ChatCompletionRequest):
111
+ """Stream the completion response."""
112
+ messages = [{"role": m.role, "content": m.content} for m in request.messages]
113
+
114
+ response = client.messages.create(
115
+ model=request.model,
116
+ max_tokens=request.max_tokens,
117
+ messages=messages,
118
+ stream=True
119
+ )
120
+
121
+ for chunk in response:
122
+ if chunk.type == "message_start":
123
+ continue
124
+
125
+ if chunk.type == "content_block_delta":
126
+ data = {
127
+ "id": chunk.message.id,
128
+ "object": "chat.completion.chunk",
129
+ "created": int(time.time()),
130
+ "model": request.model,
131
+ "choices": [{
132
+ "index": 0,
133
+ "delta": {"content": chunk.delta.text if hasattr(chunk.delta, "text") else ""},
134
+ "finish_reason": None
135
+ }]
136
+ }
137
+ yield f"data: {json.dumps(data)}\n\n"
138
+
139
+ elif chunk.type == "content_block_stop":
140
+ data = {
141
+ "id": chunk.message.id,
142
+ "object": "chat.completion.chunk",
143
+ "created": int(time.time()),
144
+ "model": request.model,
145
+ "choices": [{
146
+ "index": 0,
147
+ "delta": {},
148
+ "finish_reason": "stop"
149
+ }]
150
+ }
151
+ yield f"data: {json.dumps(data)}\n\n"
152
+
153
+ yield "data: [DONE]\n\n"
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
4
+ anthropic