sdsdsdghfgf commited on
Commit
b9a0449
·
verified ·
1 Parent(s): a30a065

Update openai_ondemand_adapter.py

Browse files
Files changed (1) hide show
  1. openai_ondemand_adapter.py +470 -297
openai_ondemand_adapter.py CHANGED
@@ -1,318 +1,491 @@
1
- from flask import Flask, request, Response, jsonify
2
  import requests
3
- import uuid
4
- import time
5
  import json
6
- import threading
7
- import logging
 
8
  import os
 
 
9
 
10
- # ====== 读取 Huggingface Secret 配置的私有key =======
11
- PRIVATE_KEY = os.environ.get("PRIVATE_KEY", "")
12
- PRIVATE_KEY = os.environ.get("PRIVATE_KEY", "")
13
- SAFE_HEADERS = ["Authorization", "X-API-KEY"]
14
-
15
- # 全局接口访问权限检查
16
- def check_private_key():
17
- # 可以在这里放宽部分接口,比如首页等
18
- if request.path in ["/", "/favicon.ico"]:
19
- return
20
- key = None
21
- for header in SAFE_HEADERS:
22
- key = request.headers.get(header)
23
- if key:
24
- if header == "Authorization" and key.startswith("Bearer "):
25
- key = key[len("Bearer "):].strip()
26
- break
27
- if not key or key != PRIVATE_KEY:
28
- return jsonify({"error": "Unauthorized, must provide correct Authorization or X-API-KEY"}), 401
29
-
30
- # 应用所有API鉴权
31
  app = Flask(__name__)
32
- app.before_request(check_private_key)
33
-
34
- # ========== KEY池(每行一个)==========
35
- ONDEMAND_APIKEYS = os.environ.get("ONDEMAND_APIKEYS", "").split(",") if os.environ.get("ONDEMAND_APIKEYS") else []
36
- BAD_KEY_RETRY_INTERVAL = 600 #
37
- SESSION_TIMEOUT = 600 # 对话超时时间(10分钟)
38
-
39
- # ========== OnDemand模型映射 ==========
40
- MODEL_MAP = {
41
- "o3-mini": "predefined-openai-gpto3-mini",
42
- "gpt-4o": "predefined-openai-gpt4o",
43
- "gpt-4.1": "predefined-openai-gpt4.1",
44
- "gpt-4.1-mini": "predefined-openai-gpt4.1-mini",
45
- "gpt-4.1-nano": "predefined-openai-gpt4.1-nano",
46
- "gpt-4o-mini": "predefined-openai-gpt4o-mini",
47
- "deepseek-v3": "predefined-deepseek-v3",
48
- "deepseek-r1": "predefined-deepseek-r1",
49
- "claude-3-7-sonnet": "predefined-claude-3.7-sonnet",
50
- "gemini-2.0-flash": "predefined-gemini-2.0-flash",
51
- }
52
- DEFAULT_ONDEMAND_MODEL = "predefined-openai-gpt4o"
53
- # ==========================================
54
-
55
- class KeyManager:
56
- def __init__(self, key_list):
57
- self.key_list = list(key_list)
58
- self.lock = threading.Lock()
59
- self.key_status = {k: {"bad": False, "bad_ts": None} for k in self.key_list}
60
- self.idx = 0
61
- # 新增:当前正在使用的key和session
62
- self.current_key = None
63
- self.current_session = None
64
- self.last_used_time = None
65
-
66
- def display_key(self, key):
67
- return f"{key[:6]}...{key[-4:]}"
68
-
69
- def get(self):
70
- with self.lock:
71
- now = time.time()
72
- # 检查对话是否超时
73
- if self.current_key and self.last_used_time and (now - self.last_used_time > SESSION_TIMEOUT):
74
- print(f"【对话超时】上次使用时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.last_used_time))}")
75
- print(f"【对话超时】当前时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(now))}")
76
- print(f"【对话超时】超时{SESSION_TIMEOUT//60}分钟,切换新会话")
77
- self.current_key = None
78
- self.current_session = None
79
-
80
- # 如果已有正在使用的key,继续使用
81
- if self.current_key:
82
- if not self.key_status[self.current_key]["bad"]:
83
- print(f"【对话请求】【继续使用API KEY: {self.display_key(self.current_key)}】【状态:正常】")
84
- self.last_used_time = now
85
- return self.current_key
86
- else:
87
- # 当前key已标记为异常,需要切换
88
- self.current_key = None
89
- self.current_session = None
90
-
91
- # 如果没有当前key或当前key无效,选择新的key
92
- total = len(self.key_list)
93
- for _ in range(total):
94
- key = self.key_list[self.idx]
95
- self.idx = (self.idx + 1) % total
96
- s = self.key_status[key]
97
- if not s["bad"]:
98
- print(f"【对话请求】【使用新API KEY: {self.display_key(key)}】【状态:正常】")
99
- self.current_key = key
100
- self.current_session = None # 强制创建新会话
101
- self.last_used_time = now
102
- return key
103
- if s["bad"] and s["bad_ts"]:
104
- ago = now - s["bad_ts"]
105
- if ago >= BAD_KEY_RETRY_INTERVAL:
106
- print(f"【KEY自动尝试恢复】API KEY: {self.display_key(key)} 满足重试周期,标记为正常")
107
- self.key_status[key]["bad"] = False
108
- self.key_status[key]["bad_ts"] = None
109
- self.current_key = key
110
- self.current_session = None # 强制创建新会话
111
- self.last_used_time = now
112
- return key
113
-
114
- print("【警告】全部KEY已被禁用,强制选用第一个KEY继续尝试:", self.display_key(self.key_list[0]))
115
- for k in self.key_list:
116
- self.key_status[k]["bad"] = False
117
- self.key_status[k]["bad_ts"] = None
118
- self.idx = 0
119
- self.current_key = self.key_list[0]
120
- self.current_session = None # 强制创建新会话
121
- self.last_used_time = now
122
- print(f"【对话请求】【使用API KEY: {self.display_key(self.current_key)}】【状态:强制尝试(全部异常)】")
123
- return self.current_key
124
-
125
- def mark_bad(self, key):
126
- with self.lock:
127
- if key in self.key_status and not self.key_status[key]["bad"]:
128
- print(f"【禁用KEY】API KEY: {self.display_key(key)},接口返回无效(将在{BAD_KEY_RETRY_INTERVAL//60}分钟后自动重试)")
129
- self.key_status[key]["bad"] = True
130
- self.key_status[key]["bad_ts"] = time.time()
131
- if self.current_key == key:
132
- self.current_key = None
133
- self.current_session = None
134
-
135
- def get_session(self, apikey):
136
- with self.lock:
137
- if not self.current_session:
138
- try:
139
- self.current_session = create_session(apikey)
140
- print(f"【创建新会话】SESSION ID: {self.current_session}")
141
- except Exception as e:
142
- print(f"【创建会话失败】错误: {str(e)}")
143
- raise
144
- self.last_used_time = time.time()
145
- return self.current_session
146
-
147
- keymgr = KeyManager(ONDEMAND_APIKEYS)
148
-
149
- ONDEMAND_API_BASE = "https://api.on-demand.io/chat/v1"
150
-
151
- def get_endpoint_id(openai_model):
152
- m = str(openai_model or "").lower().replace(" ", "")
153
- return MODEL_MAP.get(m, DEFAULT_ONDEMAND_MODEL)
154
-
155
- def create_session(apikey, external_user_id=None, plugin_ids=None):
156
- url = f"{ONDEMAND_API_BASE}/sessions"
157
- payload = {"externalUserId": external_user_id or str(uuid.uuid4())}
158
- if plugin_ids is not None:
159
- payload["pluginIds"] = plugin_ids
160
- headers = {"apikey": apikey, "Content-Type": "application/json"}
161
- resp = requests.post(url, json=payload, headers=headers, timeout=20)
162
- resp.raise_for_status()
163
- return resp.json()["data"]["id"]
164
-
165
- def format_openai_sse_delta(chunk_str):
166
- return f"data: {json.dumps(chunk_str, ensure_ascii=False)}\n\n"
167
-
168
- @app.route("/v1/chat/completions", methods=["POST"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  def chat_completions():
170
- keymgr.current_session = None
171
- data = request.json
172
- if not data or "messages" not in data:
173
- return jsonify({"error": "请求缺少messages字段"}), 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- messages = data["messages"]
176
- openai_model = data.get("model", "gpt-4o")
177
- endpoint_id = get_endpoint_id(openai_model)
178
- is_stream = bool(data.get("stream", False))
179
 
180
- user_msg = None
 
 
 
 
181
  for msg in reversed(messages):
182
- if msg.get("role") == "user":
183
- user_msg = msg.get("content")
184
  break
185
- if user_msg is None:
186
- return jsonify({"error": "未找到用户消息"}), 400
187
-
188
- def with_valid_key(func):
189
- bad_cnt = 0
190
- max_retry = len(keymgr.key_list)*2
191
- while bad_cnt < max_retry:
192
- key = keymgr.get()
193
- try:
194
- return func(key)
195
- except Exception as e:
196
- if hasattr(e, 'response'):
197
- r = e.response
198
- if r.status_code in (401, 403, 429, 500):
199
- keymgr.mark_bad(key)
200
- bad_cnt += 1
201
- continue
202
- raise
203
- return jsonify({"error": "���有可用API KEY,请补充新KEY或联系技术支持"}), 500
204
-
205
- if is_stream:
206
- def generate():
207
- def do_once(apikey):
208
- # 使用KeyManager获取或创建session
209
- sid = keymgr.get_session(apikey)
210
- url = f"{ONDEMAND_API_BASE}/sessions/{sid}/query"
211
- payload = {
212
- "query": user_msg,
213
- "endpointId": endpoint_id,
214
- "pluginIds": [],
215
- "responseMode": "stream"
216
- }
217
- headers = {"apikey": apikey, "Content-Type": "application/json", "Accept": "text/event-stream"}
218
- with requests.post(url, json=payload, headers=headers, stream=True, timeout=120) as resp:
219
- if resp.status_code != 200:
220
- raise requests.HTTPError(response=resp)
221
- answer_acc = ""
222
- first_chunk = True
223
- for line in resp.iter_lines():
224
- if not line:
225
- continue
226
- line = line.decode("utf-8")
227
- if line.startswith("data:"):
228
- datapart = line[5:].strip()
229
- if datapart == "[DONE]":
230
- yield "data: [DONE]\n\n"
231
- break
232
- elif datapart.startswith("[ERROR]:"):
233
- err_json = datapart[len("[ERROR]:"):].strip()
234
- yield format_openai_sse_delta({"error": err_json})
235
- break
236
- else:
237
- try:
238
- js = json.loads(datapart)
239
- except Exception:
240
- continue
241
- if js.get("eventType") == "fulfillment":
242
- delta = js.get("answer", "")
243
- answer_acc += delta
244
- chunk = {
245
- "id": "chatcmpl-" + str(uuid.uuid4())[:8],
246
- "object": "chat.completion.chunk",
247
- "created": int(time.time()),
248
- "model": openai_model,
249
- "choices": [{
250
- "delta": {
251
- "role": "assistant",
252
- "content": delta
253
- } if first_chunk else {
254
- "content": delta
255
- },
256
  "index": 0,
257
  "finish_reason": None
258
- }]
259
- }
260
- yield format_openai_sse_delta(chunk)
261
- first_chunk = False
262
- yield "data: [DONE]\n\n"
263
- yield from with_valid_key(do_once)
264
- return Response(generate(), content_type='text/event-stream')
265
-
266
- def nonstream(apikey):
267
- # 使用KeyManager获取或创建session
268
- sid = keymgr.get_session(apikey)
269
- url = f"{ONDEMAND_API_BASE}/sessions/{sid}/query"
270
- payload = {
271
- "query": user_msg,
272
- "endpointId": endpoint_id,
273
- "pluginIds": [],
274
- "responseMode": "sync"
275
- }
276
- headers = {"apikey": apikey, "Content-Type": "application/json"}
277
- resp = requests.post(url, json=payload, headers=headers, timeout=120)
278
- if resp.status_code != 200:
279
- raise requests.HTTPError(response=resp)
280
- ai_response = resp.json()["data"]["answer"]
281
- resp_obj = {
282
- "id": "chatcmpl-" + str(uuid.uuid4())[:8],
283
  "object": "chat.completion",
284
  "created": int(time.time()),
285
- "model": openai_model,
286
  "choices": [
287
  {
288
- "index": 0,
289
- "message": {"role": "assistant", "content": ai_response},
290
- "finish_reason": "stop"
 
 
 
291
  }
292
  ],
293
- "usage": {}
 
 
 
 
294
  }
295
- return jsonify(resp_obj)
296
-
297
- return with_valid_key(nonstream)
298
-
299
- @app.route("/v1/models", methods=["GET"])
300
- def models():
301
- model_objs = []
302
- for mdl in MODEL_MAP.keys():
303
- model_objs.append({
304
- "id": mdl,
305
- "object": "model",
306
- "owned_by": "ondemand-proxy"
307
- })
308
- uniq = {m["id"]: m for m in model_objs}.values()
309
- return jsonify({
310
- "object": "list",
311
- "data": list(uniq)
312
- })
313
 
314
  if __name__ == "__main__":
315
- log_fmt = '[%(asctime)s] %(levelname)s: %(message)s'
316
- logging.basicConfig(level=logging.INFO, format=log_fmt)
317
- print("======== OnDemand KEY池数量:", len(ONDEMAND_APIKEYS), "========")
318
- app.run(host="0.0.0.0", port=7860, debug=False)
 
 
 
1
  import requests
 
 
2
  import json
3
+ import base64
4
+ from typing import Dict, Optional
5
+ from flask import Flask, request, Response, stream_with_context
6
  import os
7
+ import time
8
+ from datetime import datetime, timedelta
9
 
10
+ # Initialize Flask app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  app = Flask(__name__)
12
+
13
+ # Load configuration from config.json if it exists, otherwise use environment variables
14
+ CONFIG_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json')
15
+ if os.path.exists(CONFIG_FILE):
16
+ with open(CONFIG_FILE, 'r') as f:
17
+ config = json.load(f)
18
+ ACCOUNTS = config.get('accounts', [])
19
+ else:
20
+ ACCOUNTS = []
21
+ accounts_env = os.getenv("ONDEMAND_ACCOUNTS", "")
22
+ if accounts_env:
23
+ try:
24
+ ACCOUNTS = json.loads(accounts_env).get('accounts', [])
25
+ except json.JSONDecodeError:
26
+ print("Error decoding ONDEMAND_ACCOUNTS environment variable. Using empty accounts list.")
27
+
28
+ if not ACCOUNTS:
29
+ raise ValueError("No accounts found in config.json or environment variable ONDEMAND_ACCOUNTS.")
30
+
31
+ # Current account index (for round-robin selection)
32
+ current_account_index = 0
33
+
34
+ # In-memory storage for session and last interaction time per client
35
+ CLIENT_SESSIONS = {} # Format: {client_id: {"session_id": str, "last_time": datetime, "user_id": str, "company_id": str, "token": str}}
36
+
37
+
38
+ class OnDemandAPIClient:
39
+ def __init__(self, email: str, password: str):
40
+ self.email = email
41
+ self.password = password
42
+ self.token = ""
43
+ self.refresh_token = ""
44
+ self.user_id = ""
45
+ self.company_id = ""
46
+ self.session_id = ""
47
+ self.base_url = "https://gateway.on-demand.io/v1"
48
+ self.chat_base_url = "https://api.on-demand.io/chat/v1/client"
49
+
50
+ def get_authorization(self) -> str:
51
+ """Generate Basic Authorization header for login."""
52
+ text = f"{self.email}:{self.password}"
53
+ encoded = base64.b64encode(text.encode("utf-8")).decode("utf-8")
54
+ return encoded
55
+
56
+ def sign_in(self) -> bool:
57
+ """Login to get token, refreshToken, userId, and companyId."""
58
+ url = f"{self.base_url}/auth/user/signin"
59
+ payload = {
60
+ "accountType": "default"
61
+ }
62
+ headers = {
63
+ 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0",
64
+ 'Accept': "application/json, text/plain, */*",
65
+ 'Accept-Encoding': "gzip, deflate, br, zstd",
66
+ 'Content-Type': "application/json",
67
+ 'Authorization': f"Basic {self.get_authorization()}",
68
+ 'Referer': "https://app.on-demand.io/"
69
+ }
70
+
71
+ try:
72
+ response = requests.post(url, data=json.dumps(payload), headers=headers)
73
+ response.raise_for_status()
74
+ data = response.json()
75
+ print("Raw response from sign_in:", json.dumps(data, indent=2))
76
+ self.token = data.get('data', {}).get('tokenData', {}).get('token', '')
77
+ self.refresh_token = data.get('data', {}).get('tokenData', {}).get('refreshToken', '')
78
+ self.user_id = data.get('data', {}).get('user', {}).get('userId', '')
79
+ self.company_id = data.get('data', {}).get('user', {}).get('default_company_id', '')
80
+ print(f"Extracted Token: {self.token[:10]}... (truncated for security)")
81
+ print(f"Extracted Refresh Token: {self.refresh_token[:10]}... (truncated for security)")
82
+ print(f"Extracted User ID: {self.user_id}")
83
+ print(f"Extracted Company ID: {self.company_id}")
84
+ if self.token and self.user_id and self.company_id:
85
+ print(f"Login successful for {self.email}. Token and user info retrieved.")
86
+ return True
87
+ else:
88
+ print("Login successful but failed to extract required fields.")
89
+ return False
90
+ except requests.exceptions.RequestException as e:
91
+ print(f"Login failed for {self.email}: {e}")
92
+ return False
93
+
94
+ def refresh_token_if_needed(self) -> bool:
95
+ """Refresh token if it is expired or invalid."""
96
+ if not self.token or not self.refresh_token:
97
+ print("No token or refresh token available. Please log in first.")
98
+ return False
99
+
100
+ url = f"{self.base_url}/auth/user/refresh_token"
101
+ payload = {
102
+ "data": {
103
+ "token": self.token,
104
+ "refreshToken": self.refresh_token
105
+ }
106
+ }
107
+ headers = {
108
+ 'Content-Type': "application/json"
109
+ }
110
+
111
+ try:
112
+ response = requests.post(url, data=json.dumps(payload), headers=headers)
113
+ response.raise_for_status()
114
+ data = response.json()
115
+ print("Raw response from refresh_token:", json.dumps(data, indent=2))
116
+ self.token = data.get('data', {}).get('token', '')
117
+ self.refresh_token = data.get('data', {}).get('refreshToken', '')
118
+ print(f"New Token: {self.token[:10]}... (truncated for security)")
119
+ print("Token refreshed successfully.")
120
+ return True
121
+ except requests.exceptions.RequestException as e:
122
+ print(f"Token refresh failed: {e}")
123
+ return False
124
+
125
+ def create_session(self, external_user_id: str = "user-app-12345") -> Optional[str]:
126
+ """Create a new session for chat."""
127
+ if not self.token or not self.user_id or not self.company_id:
128
+ print("No token or user info available. Please log in or refresh token.")
129
+ return None
130
+
131
+ url = f"{self.chat_base_url}/sessions"
132
+ payload = {
133
+ "externalUserId": external_user_id,
134
+ "pluginIds": []
135
+ }
136
+ headers = {
137
+ 'Content-Type': "application/json",
138
+ 'Authorization': f"Bearer {self.token}",
139
+ 'x-company-id': self.company_id,
140
+ 'x-user-id': self.user_id
141
+ }
142
+ print(f"Creating session with company_id: {self.company_id}, user_id: {self.user_id}")
143
+
144
+ try:
145
+ response = requests.post(url, data=json.dumps(payload), headers=headers)
146
+ if response.status_code == 401:
147
+ print("Token expired, refreshing...")
148
+ if self.refresh_token_if_needed():
149
+ headers['Authorization'] = f"Bearer {self.token}"
150
+ response = requests.post(url, data=json.dumps(payload), headers=headers)
151
+ response.raise_for_status()
152
+ data = response.json()
153
+ print("Raw response from create_session:", json.dumps(data, indent=2))
154
+ self.session_id = data.get('data', {}).get('id', '')
155
+ print(f"Session created successfully. Session ID: {self.session_id}")
156
+ return self.session_id
157
+ except requests.exceptions.RequestException as e:
158
+ print(f"Session creation failed: {e}")
159
+ return None
160
+
161
+ def send_query(self, query: str, endpoint_id: str = "predefined-claude-3.7-sonnet", stream: bool = False) -> Dict:
162
+ """Send a query to the chat session and handle streaming or non-streaming response."""
163
+ if not self.session_id or not self.token:
164
+ print("No session ID or token available. Please create a session first.")
165
+ return {"error": "No session or token available"}
166
+
167
+ url = f"{self.chat_base_url}/sessions/{self.session_id}/query"
168
+ payload = {
169
+ "endpointId": endpoint_id,
170
+ "query": query,
171
+ "pluginIds": [],
172
+ "reasoningMode": "high",
173
+ "responseMode": "stream" if stream else "sync",
174
+ "debugMode": "on",
175
+ "modelConfigs": {
176
+ "fulfillmentPrompt": "",
177
+ "stopTokens": [],
178
+ "maxTokens": 0,
179
+ "temperature": 0,
180
+ "presencePenalty": 0,
181
+ "frequencyPenalty": 0,
182
+ "topP": 1
183
+ },
184
+ "fulfillmentOnly": False
185
+ }
186
+ headers = {
187
+ 'Content-Type': "application/json",
188
+ 'Authorization': f"Bearer {self.token}",
189
+ 'x-company-id': self.company_id
190
+ }
191
+
192
+ try:
193
+ if stream:
194
+ response = requests.post(url, data=json.dumps(payload), headers=headers, stream=True)
195
+ if response.status_code == 401:
196
+ print("Token expired, refreshing...")
197
+ if self.refresh_token_if_needed():
198
+ headers['Authorization'] = f"Bearer {self.token}"
199
+ response = requests.post(url, data=json.dumps(payload), headers=headers, stream=True)
200
+ response.raise_for_status()
201
+ return {"stream": True, "response": response}
202
+ else:
203
+ response = requests.post(url, data=json.dumps(payload), headers=headers)
204
+ if response.status_code == 401:
205
+ print("Token expired, refreshing...")
206
+ if self.refresh_token_if_needed():
207
+ headers['Authorization'] = f"Bearer {self.token}"
208
+ response = requests.post(url, data=json.dumps(payload), headers=headers)
209
+ response.raise_for_status()
210
+ full_answer = ""
211
+ for line in response.iter_lines():
212
+ if line:
213
+ decoded_line = line.decode('utf-8')
214
+ if decoded_line.startswith("data:"):
215
+ json_str = decoded_line[len("data:"):]
216
+ if json_str == "[DONE]":
217
+ break
218
+ try:
219
+ event_data = json.loads(json_str)
220
+ if event_data.get("eventType", "") == "fulfillment":
221
+ full_answer += event_data.get("answer", "")
222
+ except json.JSONDecodeError:
223
+ continue
224
+ return {"stream": False, "content": full_answer}
225
+ except requests.exceptions.RequestException as e:
226
+ print(f"Query failed: {e}")
227
+ return {"error": str(e)}
228
+
229
+
230
+ # Initialize the first client with the first account
231
+ def get_next_client():
232
+ global current_account_index
233
+ account = ACCOUNTS[current_account_index]
234
+ email = account.get('email')
235
+ password = account.get('password')
236
+ print(f"Using account: {email}")
237
+ current_account_index = (current_account_index + 1) % len(ACCOUNTS) # Round-robin to next account
238
+ return OnDemandAPIClient(email, password)
239
+
240
+
241
+ # Current client (will be replaced when switching accounts)
242
+ current_client = get_next_client()
243
+
244
+ # Global variable to track initialization
245
+ initialized = False
246
+
247
+
248
+ @app.before_request
249
+ def initialize_client():
250
+ global initialized, current_client
251
+ if not initialized:
252
+ if current_client.sign_in():
253
+ current_client.create_session()
254
+ initialized = True
255
+ else:
256
+ print("Initialization failed. Switching to next account.")
257
+ current_client = get_next_client()
258
+ initialize_client() # Recursive call with new client
259
+
260
+
261
+ @app.route('/v1/models', methods=['GET'])
262
+ def get_models():
263
+ """Return a list of available models in OpenAI format."""
264
+ models_response = {
265
+ "object": "list",
266
+ "data": [
267
+ {
268
+ "id": "gpto3-mini",
269
+ "object": "model",
270
+ "created": int(time.time()),
271
+ "owned_by": "on-demand.io"
272
+ },
273
+ {
274
+ "id": "gpt-4o",
275
+ "object": "model",
276
+ "created": int(time.time()),
277
+ "owned_by": "on-demand.io"
278
+ },
279
+ {
280
+ "id": "gpt-4.1",
281
+ "object": "model",
282
+ "created": int(time.time()),
283
+ "owned_by": "on-demand.io"
284
+ },
285
+ {
286
+ "id": "gpt-4.1-mini",
287
+ "object": "model",
288
+ "created": int(time.time()),
289
+ "owned_by": "on-demand.io"
290
+ },
291
+ {
292
+ "id": "gpt-4.1-nano",
293
+ "object": "model",
294
+ "created": int(time.time()),
295
+ "owned_by": "on-demand.io"
296
+ },
297
+ {
298
+ "id": "gpt-4o-mini",
299
+ "object": "model",
300
+ "created": int(time.time()),
301
+ "owned_by": "on-demand.io"
302
+ },
303
+ {
304
+ "id": "deepseek-v3",
305
+ "object": "model",
306
+ "created": int(time.time()),
307
+ "owned_by": "on-demand.io"
308
+ },
309
+ {
310
+ "id": "deepseek-r1",
311
+ "object": "model",
312
+ "created": int(time.time()),
313
+ "owned_by": "on-demand.io"
314
+ },
315
+ {
316
+ "id": "claude-3.7-sonnet",
317
+ "object": "model",
318
+ "created": int(time.time()),
319
+ "owned_by": "on-demand.io"
320
+ },
321
+ {
322
+ "id": "gemini-2.0-flash",
323
+ "object": "model",
324
+ "created": int(time.time()),
325
+ "owned_by": "on-demand.io"
326
+ }
327
+ ]
328
+ }
329
+ return models_response
330
+
331
+
332
+ @app.route('/v1/chat/completions', methods=['POST'])
333
  def chat_completions():
334
+ global current_client
335
+ data = request.get_json()
336
+ print("Received OpenAI request:", json.dumps(data, indent=2))
337
+
338
+ # Extract client ID (use IP address as a simple identifier for different clients)
339
+ client_id = request.remote_addr # Alternatively, use a unique ID from request if provided by Cherry Studio
340
+
341
+ # Check last interaction time for this client
342
+ current_time = datetime.now()
343
+ if client_id in CLIENT_SESSIONS:
344
+ last_time = CLIENT_SESSIONS[client_id].get("last_time")
345
+ if last_time and current_time - last_time > timedelta(minutes=10):
346
+ print(f"Client {client_id} inactive for over 10 minutes. Switching session or account.")
347
+ # Option 1: Create new session with current account
348
+ new_session = current_client.create_session()
349
+ if new_session:
350
+ CLIENT_SESSIONS[client_id]["session_id"] = new_session
351
+ print(f"New session created for client {client_id}: {new_session}")
352
+ else:
353
+ # Option 2: If session creation fails, switch account
354
+ print("Failed to create new session. Switching to next account.")
355
+ current_client = get_next_client()
356
+ if current_client.sign_in():
357
+ new_session = current_client.create_session()
358
+ if new_session:
359
+ CLIENT_SESSIONS[client_id] = {
360
+ "session_id": new_session,
361
+ "last_time": current_time,
362
+ "user_id": current_client.user_id,
363
+ "company_id": current_client.company_id,
364
+ "token": current_client.token
365
+ }
366
+ print(f"Switched account and created new session for client {client_id}: {new_session}")
367
+ else:
368
+ return {"error": "Failed to create session with new account"}, 500
369
+ else:
370
+ return {"error": "Failed to login with new account"}, 500
371
+ else:
372
+ # New client, use current session or create one
373
+ if not current_client.session_id:
374
+ if not current_client.sign_in() or not current_client.create_session():
375
+ return {"error": "Failed to initialize client session"}, 500
376
+ CLIENT_SESSIONS[client_id] = {
377
+ "session_id": current_client.session_id,
378
+ "last_time": current_time,
379
+ "user_id": current_client.user_id,
380
+ "company_id": current_client.company_id,
381
+ "token": current_client.token
382
+ }
383
+
384
+ # Update last interaction time
385
+ CLIENT_SESSIONS[client_id]["last_time"] = current_time
386
 
387
+ # Extract parameters from OpenAI request
388
+ messages = data.get('messages', [])
389
+ stream = data.get('stream', False)
390
+ model = data.get('model', 'claude-3.7-sonnet')
391
 
392
+ if not messages:
393
+ return {"error": "No messages found in request"}, 400
394
+
395
+ # Extract only the latest user message as the query (rely on session_id for context)
396
+ latest_user_query = ""
397
  for msg in reversed(messages):
398
+ if msg.get('role', '') == 'user':
399
+ latest_user_query = msg.get('content', '')
400
  break
401
+ if not latest_user_query:
402
+ return {"error": "No user message found in request"}, 400
403
+
404
+ # Add explicit instruction to reply in Chinese and be direct
405
+ query = f"请用英文思考,用中文回答以下问题,不要提及上下文或推理过程:{latest_user_query}"
406
+ print(f"Constructed Query for on-demand.io (relying on session_id for context, with Chinese instruction): {query}")
407
+
408
+ # Map the model ID to on-demand.io endpoint ID
409
+ model_mapping = {
410
+ "gpto3-mini": "predefined-openai-gpto3-mini",
411
+ "gpt-4o": "predefined-openai-gpt4o",
412
+ "gpt-4.1": "predefined-openai-gpt4.1",
413
+ "gpt-4.1-mini": "predefined-openai-gpt4.1-mini",
414
+ "gpt-4.1-nano": "predefined-openai-gpt4.1-nano",
415
+ "gpt-4o-mini": "predefined-openai-gpt4o-mini",
416
+ "deepseek-v3": "predefined-deepseek-v3",
417
+ "deepseek-r1": "predefined-deepseek-r1",
418
+ "claude-3.7-sonnet": "predefined-claude-3.7-sonnet",
419
+ "gemini-2.0-flash": "predefined-gemini-2.0-flash"
420
+ }
421
+ endpoint_id = model_mapping.get(model, "predefined-claude-3.7-sonnet") # Default to Claude if model not found
422
+
423
+ # Send query to OnDemand API
424
+ result = current_client.send_query(query, endpoint_id=endpoint_id, stream=stream)
425
+
426
+ if "error" in result:
427
+ return {"error": result["error"]}, 500
428
+
429
+ if stream:
430
+ def generate_stream():
431
+ for line in result["response"].iter_lines():
432
+ if line:
433
+ decoded_line = line.decode('utf-8')
434
+ if decoded_line.startswith("data:"):
435
+ json_str = decoded_line[len("data:"):]
436
+ if json_str == "[DONE]":
437
+ yield "data: [DONE]\n\n"
438
+ break
439
+ try:
440
+ event_data = json.loads(json_str)
441
+ if event_data.get("eventType", "") == "fulfillment":
442
+ content = event_data.get("answer", "")
443
+ stream_response = {
444
+ "id": f"chatcmpl-{int(time.time())}",
445
+ "object": "chat.completion.chunk",
446
+ "created": int(time.time()),
447
+ "model": model,
448
+ "choices": [
449
+ {
450
+ "delta": {"content": content},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
  "index": 0,
452
  "finish_reason": None
453
+ }
454
+ ]
455
+ }
456
+ yield f"data: {json.dumps(stream_response)}\n\n"
457
+ except json.JSONDecodeError:
458
+ continue
459
+
460
+ return Response(stream_with_context(generate_stream()), content_type='text/event-stream')
461
+ else:
462
+ response = {
463
+ "id": f"chatcmpl-{int(time.time())}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  "object": "chat.completion",
465
  "created": int(time.time()),
466
+ "model": model,
467
  "choices": [
468
  {
469
+ "message": {
470
+ "role": "assistant",
471
+ "content": result["content"]
472
+ },
473
+ "finish_reason": "stop",
474
+ "index": 0
475
  }
476
  ],
477
+ "usage": {
478
+ "prompt_tokens": 0, # Placeholder, can be updated if metrics are available
479
+ "completion_tokens": 0,
480
+ "total_tokens": 0
481
+ }
482
  }
483
+ return response
484
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
 
486
  if __name__ == "__main__":
487
+ # Get port from environment variable (Hugging Face Spaces uses PORT env var)
488
+ port = int(os.getenv("PORT", 7860))
489
+ print(f"Starting Flask app on port {port}")
490
+ # Run the Flask app with host 0.0.0.0 to be accessible in Docker
491
+ app.run(host='0.0.0.0', port=port, debug=False)