Hivra commited on
Commit
d33d331
·
verified ·
1 Parent(s): db7889b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -54
app.py CHANGED
@@ -4,6 +4,18 @@ from fastapi import FastAPI, HTTPException, Request
4
  import requests
5
  from pydantic import BaseModel, Field
6
  from typing import Optional, List, Dict, Any, Literal
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  app = FastAPI(title="OpenAI-Compatible Chat API",
9
  description="A FastAPI application that provides an OpenAI-compatible interface")
@@ -40,32 +52,62 @@ class ChatCompletionResponse(BaseModel):
40
  choices: List[ChatCompletionChoice]
41
  usage: Usage
42
 
 
 
 
 
 
43
  # Custom endpoints for graniteAI
44
- @app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
45
- async def chat_completion(request: ChatCompletionRequest):
46
- # Forward to granite API
47
- url = "https://d18n68ssusgr7r.cloudfront.net/v1/chat/completions"
48
- headers = {
49
- "Content-Type": "application/json",
50
- "Authorization": "Bearer 89de4a8b-9dc6-4617-86a0-28690278b651"
51
- }
52
-
53
- # Convert to GraniteAI format if needed
54
- granite_data = {
55
- "messages": [{"role": msg.role, "content": msg.content} for msg in request.messages],
56
- "model": request.model,
57
- "max_tokens": request.max_tokens,
58
- "temperature": request.temperature,
59
- "top_p": request.top_p
60
- }
61
-
62
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  response = requests.post(url, headers=headers, json=granite_data)
64
- response_json = response.json()
65
 
66
- # Format into OpenAI-compatible response
67
- # This assumes the granite API returns something we can parse
68
- # You may need to adjust based on actual granite response
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  # Extract the assistant message
71
  assistant_message = ""
@@ -76,58 +118,83 @@ async def chat_completion(request: ChatCompletionRequest):
76
  assistant_message = str(response_json)
77
 
78
  # Estimate token counts (very rough estimation)
79
- prompt_tokens = sum(len(msg.content.split()) for msg in request.messages)
80
  completion_tokens = len(assistant_message.split())
81
 
82
- return ChatCompletionResponse(
83
- id=f"chatcmpl-{response_json.get('id', 'unknown')}",
84
- created=response_json.get("created", 0),
85
- model=request.model,
86
- choices=[
87
- ChatCompletionChoice(
88
- index=0,
89
- message=Message(
90
- role="assistant",
91
- content=assistant_message
92
- )
93
- )
 
 
 
94
  ],
95
- usage=Usage(
96
- prompt_tokens=prompt_tokens,
97
- completion_tokens=completion_tokens,
98
- total_tokens=prompt_tokens + completion_tokens
99
- )
100
- )
 
 
 
101
  except Exception as e:
102
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
103
 
104
  # Alternative version of the endpoint that directly passes through the raw granite API response
105
  @app.post("/raw/chat/completions")
106
  async def raw_chat_completion(request: Request):
107
- data = await request.json()
108
-
109
- # Forward to granite API
110
- url = "https://d18n68ssusgr7r.cloudfront.net/v1/chat/completions"
111
- headers = {
112
- "Content-Type": "application/json",
113
- "Authorization": "Bearer 89de4a8b-9dc6-4617-86a0-28690278b651"
114
- }
115
-
116
  try:
 
 
 
 
 
 
 
 
 
 
117
  response = requests.post(url, headers=headers, json=data)
118
- return response.json()
 
 
 
 
 
 
 
119
  except Exception as e:
120
- raise HTTPException(status_code=500, detail=str(e))
 
121
 
122
  @app.get("/")
123
  async def root():
124
  return {
125
  "message": "Welcome to the OpenAI-Compatible Chat API",
 
126
  "endpoints": {
127
  "/v1/chat/completions": "OpenAI-compatible chat completions endpoint",
128
- "/raw/chat/completions": "Direct passthrough to the granite API"
 
129
  }
130
  }
131
 
132
  if __name__ == "__main__":
 
133
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
4
  import requests
5
  from pydantic import BaseModel, Field
6
  from typing import Optional, List, Dict, Any, Literal
7
+ import json
8
+ import time
9
+ import logging
10
+ import sys
11
+
12
+ # Configure logging
13
+ logging.basicConfig(
14
+ level=logging.INFO,
15
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
16
+ handlers=[logging.StreamHandler(sys.stdout)]
17
+ )
18
+ logger = logging.getLogger(__name__)
19
 
20
  app = FastAPI(title="OpenAI-Compatible Chat API",
21
  description="A FastAPI application that provides an OpenAI-compatible interface")
 
52
  choices: List[ChatCompletionChoice]
53
  usage: Usage
54
 
55
+ # Simple API endpoint for debugging
56
+ @app.get("/health")
57
+ async def health_check():
58
+ return {"status": "ok", "timestamp": time.time()}
59
+
60
  # Custom endpoints for graniteAI
61
+ @app.post("/v1/chat/completions")
62
+ async def chat_completion(request: Request):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  try:
64
+ # Get raw request data
65
+ data = await request.json()
66
+ logger.info(f"Received request: {data}")
67
+
68
+ # Extract messages
69
+ messages = data.get("messages", [])
70
+ model = data.get("model", "granite-3-2-8b-instruct")
71
+ temperature = data.get("temperature", 0.7)
72
+ top_p = data.get("top_p", 0.9)
73
+ max_tokens = data.get("max_tokens", 2048)
74
+
75
+ # Forward to granite API
76
+ url = "https://d18n68ssusgr7r.cloudfront.net/v1/chat/completions"
77
+ headers = {
78
+ "Content-Type": "application/json",
79
+ "Authorization": "Bearer 89de4a8b-9dc6-4617-86a0-28690278b651"
80
+ }
81
+
82
+ # Format request for granite API
83
+ granite_data = {
84
+ "messages": messages,
85
+ "model": model,
86
+ "max_tokens": max_tokens,
87
+ "temperature": temperature,
88
+ "top_p": top_p
89
+ }
90
+
91
+ logger.info(f"Sending request to granite API: {granite_data}")
92
  response = requests.post(url, headers=headers, json=granite_data)
93
+ logger.info(f"Granite API response status: {response.status_code}")
94
 
95
+ if response.status_code != 200:
96
+ logger.error(f"Error from granite API: {response.text}")
97
+ return {
98
+ "error": {
99
+ "message": f"Error from upstream API: {response.text}",
100
+ "type": "api_error",
101
+ "status": response.status_code
102
+ }
103
+ }
104
+
105
+ try:
106
+ response_json = response.json()
107
+ logger.info(f"Granite API response: {response_json}")
108
+ except json.JSONDecodeError:
109
+ logger.error(f"Failed to parse JSON response: {response.text}")
110
+ response_json = {"error": "Failed to parse response"}
111
 
112
  # Extract the assistant message
113
  assistant_message = ""
 
118
  assistant_message = str(response_json)
119
 
120
  # Estimate token counts (very rough estimation)
121
+ prompt_tokens = sum(len(msg.get("content", "").split()) for msg in messages)
122
  completion_tokens = len(assistant_message.split())
123
 
124
+ # Format the response to match OpenAI's format
125
+ openai_response = {
126
+ "id": f"chatcmpl-{int(time.time())}",
127
+ "object": "chat.completion",
128
+ "created": int(time.time()),
129
+ "model": model,
130
+ "choices": [
131
+ {
132
+ "index": 0,
133
+ "message": {
134
+ "role": "assistant",
135
+ "content": assistant_message
136
+ },
137
+ "finish_reason": "stop"
138
+ }
139
  ],
140
+ "usage": {
141
+ "prompt_tokens": prompt_tokens,
142
+ "completion_tokens": completion_tokens,
143
+ "total_tokens": prompt_tokens + completion_tokens
144
+ }
145
+ }
146
+
147
+ logger.info(f"Returning OpenAI-compatible response")
148
+ return openai_response
149
  except Exception as e:
150
+ logger.exception(f"Exception in chat_completion: {str(e)}")
151
+ return {
152
+ "error": {
153
+ "message": f"Internal server error: {str(e)}",
154
+ "type": "server_error",
155
+ "status": 500
156
+ }
157
+ }
158
 
159
  # Alternative version of the endpoint that directly passes through the raw granite API response
160
  @app.post("/raw/chat/completions")
161
  async def raw_chat_completion(request: Request):
 
 
 
 
 
 
 
 
 
162
  try:
163
+ data = await request.json()
164
+ logger.info(f"Received raw request: {data}")
165
+
166
+ # Forward to granite API
167
+ url = "https://d18n68ssusgr7r.cloudfront.net/v1/chat/completions"
168
+ headers = {
169
+ "Content-Type": "application/json",
170
+ "Authorization": "Bearer 89de4a8b-9dc6-4617-86a0-28690278b651"
171
+ }
172
+
173
  response = requests.post(url, headers=headers, json=data)
174
+ logger.info(f"Raw API response status: {response.status_code}")
175
+
176
+ try:
177
+ result = response.json()
178
+ return result
179
+ except json.JSONDecodeError:
180
+ logger.error(f"Failed to parse raw JSON response: {response.text}")
181
+ return {"error": "Failed to parse response", "raw_response": response.text}
182
  except Exception as e:
183
+ logger.exception(f"Exception in raw_chat_completion: {str(e)}")
184
+ return {"error": str(e)}
185
 
186
  @app.get("/")
187
  async def root():
188
  return {
189
  "message": "Welcome to the OpenAI-Compatible Chat API",
190
+ "status": "running",
191
  "endpoints": {
192
  "/v1/chat/completions": "OpenAI-compatible chat completions endpoint",
193
+ "/raw/chat/completions": "Direct passthrough to the granite API",
194
+ "/health": "Health check endpoint"
195
  }
196
  }
197
 
198
  if __name__ == "__main__":
199
+ logger.info("Starting application on port 7860")
200
  uvicorn.run(app, host="0.0.0.0", port=7860)