Hivra commited on
Commit
db7889b
·
verified ·
1 Parent(s): 60e2d8b

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +12 -0
  2. app.py +133 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY app.py .
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Entry point file for Hugging Face Spaces - OpenAI Compatible
2
+ import uvicorn
3
+ from fastapi import FastAPI, HTTPException, Request
4
+ import requests
5
+ from pydantic import BaseModel, Field
6
+ from typing import Optional, List, Dict, Any, Literal
7
+
8
+ app = FastAPI(title="OpenAI-Compatible Chat API",
9
+ description="A FastAPI application that provides an OpenAI-compatible interface")
10
+
11
+ # Models for OpenAI compatibility
12
+ class Message(BaseModel):
13
+ role: str
14
+ content: str
15
+ name: Optional[str] = None
16
+
17
+ class ChatCompletionRequest(BaseModel):
18
+ model: str = "granite-3-2-8b-instruct"
19
+ messages: List[Message]
20
+ temperature: Optional[float] = 0.7
21
+ top_p: Optional[float] = 0.9
22
+ max_tokens: Optional[int] = 2048
23
+ stream: Optional[bool] = False
24
+
25
+ class ChatCompletionChoice(BaseModel):
26
+ index: int
27
+ message: Message
28
+ finish_reason: str = "stop"
29
+
30
+ class Usage(BaseModel):
31
+ prompt_tokens: int
32
+ completion_tokens: int
33
+ total_tokens: int
34
+
35
+ class ChatCompletionResponse(BaseModel):
36
+ id: str
37
+ object: str = "chat.completion"
38
+ created: int
39
+ model: str
40
+ choices: List[ChatCompletionChoice]
41
+ usage: Usage
42
+
43
+ # Custom endpoints for graniteAI
44
+ @app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
45
+ async def chat_completion(request: ChatCompletionRequest):
46
+ # Forward to granite API
47
+ url = "https://d18n68ssusgr7r.cloudfront.net/v1/chat/completions"
48
+ headers = {
49
+ "Content-Type": "application/json",
50
+ "Authorization": "Bearer 89de4a8b-9dc6-4617-86a0-28690278b651"
51
+ }
52
+
53
+ # Convert to GraniteAI format if needed
54
+ granite_data = {
55
+ "messages": [{"role": msg.role, "content": msg.content} for msg in request.messages],
56
+ "model": request.model,
57
+ "max_tokens": request.max_tokens,
58
+ "temperature": request.temperature,
59
+ "top_p": request.top_p
60
+ }
61
+
62
+ try:
63
+ response = requests.post(url, headers=headers, json=granite_data)
64
+ response_json = response.json()
65
+
66
+ # Format into OpenAI-compatible response
67
+ # This assumes the granite API returns something we can parse
68
+ # You may need to adjust based on actual granite response
69
+
70
+ # Extract the assistant message
71
+ assistant_message = ""
72
+ if "choices" in response_json and len(response_json["choices"]) > 0:
73
+ assistant_message = response_json["choices"][0]["message"]["content"]
74
+ else:
75
+ # Fallback in case the response structure is different
76
+ assistant_message = str(response_json)
77
+
78
+ # Estimate token counts (very rough estimation)
79
+ prompt_tokens = sum(len(msg.content.split()) for msg in request.messages)
80
+ completion_tokens = len(assistant_message.split())
81
+
82
+ return ChatCompletionResponse(
83
+ id=f"chatcmpl-{response_json.get('id', 'unknown')}",
84
+ created=response_json.get("created", 0),
85
+ model=request.model,
86
+ choices=[
87
+ ChatCompletionChoice(
88
+ index=0,
89
+ message=Message(
90
+ role="assistant",
91
+ content=assistant_message
92
+ )
93
+ )
94
+ ],
95
+ usage=Usage(
96
+ prompt_tokens=prompt_tokens,
97
+ completion_tokens=completion_tokens,
98
+ total_tokens=prompt_tokens + completion_tokens
99
+ )
100
+ )
101
+ except Exception as e:
102
+ raise HTTPException(status_code=500, detail=str(e))
103
+
104
+ # Alternative version of the endpoint that directly passes through the raw granite API response
105
+ @app.post("/raw/chat/completions")
106
+ async def raw_chat_completion(request: Request):
107
+ data = await request.json()
108
+
109
+ # Forward to granite API
110
+ url = "https://d18n68ssusgr7r.cloudfront.net/v1/chat/completions"
111
+ headers = {
112
+ "Content-Type": "application/json",
113
+ "Authorization": "Bearer 89de4a8b-9dc6-4617-86a0-28690278b651"
114
+ }
115
+
116
+ try:
117
+ response = requests.post(url, headers=headers, json=data)
118
+ return response.json()
119
+ except Exception as e:
120
+ raise HTTPException(status_code=500, detail=str(e))
121
+
122
+ @app.get("/")
123
+ async def root():
124
+ return {
125
+ "message": "Welcome to the OpenAI-Compatible Chat API",
126
+ "endpoints": {
127
+ "/v1/chat/completions": "OpenAI-compatible chat completions endpoint",
128
+ "/raw/chat/completions": "Direct passthrough to the granite API"
129
+ }
130
+ }
131
+
132
+ if __name__ == "__main__":
133
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn==0.23.2
3
+ requests==2.31.0
4
+ pydantic==2.4.2
5
+ python-dotenv==1.0.0