wynai commited on
Commit
8e8bc41
·
verified ·
1 Parent(s): 1f185a0

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +78 -88
main.py CHANGED
@@ -3,7 +3,6 @@ import json
3
  import uuid
4
  import base64
5
  import time
6
- import asyncio
7
  from fastapi import FastAPI, Request
8
  from fastapi.responses import StreamingResponse
9
  from typing import AsyncGenerator, Dict, Any
@@ -14,23 +13,18 @@ GPT_OSS_URL = "https://api.gpt-oss.com/chatkit"
14
  AVAILABLE_MODELS = ["gpt-oss-20b", "gpt-oss-120b"]
15
 
16
  def get_cookie():
17
- # Tạo user_id ngẫu nhiên mỗi lần gọi
18
  user_id = str(uuid.uuid4())
19
-
20
- # Sinh session "giả lập" ngẫu nhiên, encode dạng base64 urlsafe
21
  session_payload = {
22
  "hf_access_token": f"hf_oauth_{uuid.uuid4().hex}",
23
- "hf_exp": time.time() + 3600, # hết hạn sau 1 giờ
24
- "hf_username": f"user_{uuid.uuid4().hex[:8]}"
25
  }
26
  session_json = json.dumps(session_payload, separators=(",", ":"))
27
  session_b64 = base64.urlsafe_b64encode(session_json.encode()).decode()
28
-
29
- # Trả về cookie theo định dạng cũ nhưng là dữ liệu random
30
  return f"user_id={user_id}; session={session_b64}"
31
 
32
  def get_headers(model: str):
33
- headers = {
34
  "authority": "api.gpt-oss.com",
35
  "accept": "text/event-stream",
36
  "accept-language": "vi-VN,vi;q=0.9,fr-FR;q=0.8,fr;q=0.7,en-US;q=0.6,en;q=0.5",
@@ -47,9 +41,8 @@ def get_headers(model: str):
47
  "user-agent": "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Mobile Safari/537.36",
48
  "x-reasoning-effort": "high",
49
  "x-selected-model": model,
50
- "x-show-reasoning": "true"
51
  }
52
- return headers
53
 
54
  def build_prompt(messages: list):
55
  prompt = ""
@@ -65,18 +58,44 @@ def build_prompt(messages: list):
65
  return prompt.strip()
66
 
67
  def build_payload(prompt: str):
68
- payload = {
69
  "op": "threads.create",
70
  "params": {
71
  "input": {
72
  "text": prompt,
73
  "content": [{"type": "input_text", "text": prompt}],
74
  "quoted_text": "",
75
- "attachments": []
76
  }
77
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  }
79
- return payload
 
 
 
 
 
 
80
 
81
  async def stream_gpt_oss_response(request_data: Dict[str, Any]) -> AsyncGenerator[str, None]:
82
  model = request_data["model"]
@@ -85,10 +104,9 @@ async def stream_gpt_oss_response(request_data: Dict[str, Any]) -> AsyncGenerato
85
  payload = build_payload(prompt)
86
 
87
  thoughts = []
88
- text_buffer = "" # Buffer tích lũy toàn bộ text
89
- words_sent = 0 # Số từ đã gửi
90
  in_assistant = False
91
- think_sent = False
 
92
 
93
  try:
94
  with requests.post(GPT_OSS_URL, headers=headers, data=json.dumps(payload), stream=True, timeout=120) as r:
@@ -100,76 +118,46 @@ async def stream_gpt_oss_response(request_data: Dict[str, Any]) -> AsyncGenerato
100
  data = json.loads(line[5:].strip())
101
  event_type = data.get("type")
102
 
103
- # Thu thập CoT (thought/recap) như cũ
104
- if event_type == "thread.item_updated" and "update" in data and data["update"].get("type") == "cot.entry_added":
105
  entry = data["update"]["entry"]
106
  if entry["type"] in ["thought", "recap"]:
107
  thoughts.append(entry["content"])
108
 
109
- # Khi CoT hoàn tất, gửi một lần
110
  elif event_type == "thread.item_done" and data["item"].get("type") == "cot":
111
  think_str = " ".join(thoughts).strip()
112
- if think_str and not think_sent:
113
- chunk = {"choices": [{"delta": {"content": f"<think>{think_str}</think> "}, "index": 0, "finish_reason": None}]}
114
- yield f"data: {json.dumps(chunk)}\n\n"
115
- think_sent = True
116
 
117
- # Bắt đầu phase assistant message
118
  elif event_type == "thread.item_added" and data["item"].get("type") == "assistant_message":
119
  in_assistant = True
120
-
121
- # Tích lũy text và gửi từng từ
122
- elif event_type == "thread.item_updated" and "update" in data and data["update"].get("type") == "assistant_message.content_part.text_delta":
 
 
 
 
 
 
 
 
123
  if in_assistant:
124
  delta = data["update"]["delta"]
125
- text_buffer += delta
126
-
127
- # Tách thành các từ (bao gồm khoảng trắng)
128
- import re
129
- words = re.findall(r'\S+|\s+', text_buffer)
130
-
131
- # Gửi các từ mới (chưa được gửi)
132
- while words_sent < len(words):
133
- word = words[words_sent]
134
- # Chỉ gửi từ thực sự (không phải khoảng trắng đơn thuần)
135
- # hoặc nếu là từ cuối cùng trong list hiện tại
136
- if word.strip() or words_sent == len(words) - 1:
137
- # Nếu là từ thực sự, gửi kèm khoảng trắng tiếp theo nếu có
138
- if word.strip() and words_sent + 1 < len(words) and words[words_sent + 1].isspace():
139
- word_to_send = word + words[words_sent + 1]
140
- words_sent += 2
141
- else:
142
- word_to_send = word
143
- words_sent += 1
144
-
145
- chunk = {"choices": [{"delta": {"content": word_to_send}, "index": 0, "finish_reason": None}]}
146
- yield f"data: {json.dumps(chunk)}\n\n"
147
- # Delay giữa các từ
148
- await asyncio.sleep(0.1)
149
- else:
150
- words_sent += 1
151
-
152
- # Kết thúc assistant message
153
  elif event_type == "thread.item_done" and data["item"].get("type") == "assistant_message":
154
- # Gửi bất kỳ từ cuối cùng nào còn lại
155
- import re
156
- words = re.findall(r'\S+|\s+', text_buffer)
157
- while words_sent < len(words):
158
- word = words[words_sent]
159
- if word.strip(): # Chỉ gửi từ có nội dung
160
- chunk = {"choices": [{"delta": {"content": word}, "index": 0, "finish_reason": None}]}
161
- yield f"data: {json.dumps(chunk)}\n\n"
162
- words_sent += 1
163
-
164
  yield "data: [DONE]\n\n"
165
  break
166
-
167
- except Exception as e:
168
  continue
169
  except Exception as e:
170
- yield f"data: {json.dumps({'error': str(e)})}\n\n"
171
-
172
- # Xóa function không cần thiết
173
 
174
  @app.post("/v1/chat/completions")
175
  async def chat_completions(request: Request):
@@ -179,17 +167,24 @@ async def chat_completions(request: Request):
179
  return {"error": f"Model must be one of {AVAILABLE_MODELS}"}, 400
180
 
181
  stream = request_data.get("stream", False)
182
-
183
  if stream:
184
- return StreamingResponse(stream_gpt_oss_response(request_data), media_type="text/event-stream")
 
 
 
 
 
 
 
 
 
185
  else:
186
- # Non-stream: thu gom toàn bộ response
187
  headers = get_headers(model)
188
  prompt = build_prompt(request_data["messages"])
189
  payload = build_payload(prompt)
190
-
191
- thoughts = []
192
- content = ""
193
 
194
  try:
195
  with requests.post(GPT_OSS_URL, headers=headers, data=json.dumps(payload), stream=True, timeout=120) as r:
@@ -200,16 +195,12 @@ async def chat_completions(request: Request):
200
  try:
201
  data = json.loads(line[5:].strip())
202
  event_type = data.get("type")
203
-
204
- if event_type == "thread.item_updated" and "update" in data and data["update"].get("type") == "cot.entry_added":
205
  entry = data["update"]["entry"]
206
  if entry["type"] in ["thought", "recap"]:
207
  thoughts.append(entry["content"])
208
-
209
- elif event_type == "thread.item_updated" and "update" in data and data["update"].get("type") == "assistant_message.content_part.text_delta":
210
- delta = data["update"]["delta"]
211
- content += delta
212
-
213
  elif event_type == "thread.item_done" and data["item"].get("type") == "assistant_message":
214
  break
215
  except Exception:
@@ -218,16 +209,15 @@ async def chat_completions(request: Request):
218
  return {"error": str(e)}
219
 
220
  think_str = " ".join(thoughts).strip()
221
- full_content = f"<think>{think_str}</think> {content}".strip() if think_str else content.strip()
222
 
223
- response = {
224
  "choices": [{
225
  "message": {"content": full_content},
226
  "index": 0,
227
  "finish_reason": "stop"
228
  }]
229
  }
230
- return response
231
 
232
  if __name__ == "__main__":
233
  import uvicorn
 
3
  import uuid
4
  import base64
5
  import time
 
6
  from fastapi import FastAPI, Request
7
  from fastapi.responses import StreamingResponse
8
  from typing import AsyncGenerator, Dict, Any
 
13
  AVAILABLE_MODELS = ["gpt-oss-20b", "gpt-oss-120b"]
14
 
15
  def get_cookie():
 
16
  user_id = str(uuid.uuid4())
 
 
17
  session_payload = {
18
  "hf_access_token": f"hf_oauth_{uuid.uuid4().hex}",
19
+ "hf_exp": time.time() + 3600,
20
+ "hf_username": f"user_{uuid.uuid4().hex[:8]}",
21
  }
22
  session_json = json.dumps(session_payload, separators=(",", ":"))
23
  session_b64 = base64.urlsafe_b64encode(session_json.encode()).decode()
 
 
24
  return f"user_id={user_id}; session={session_b64}"
25
 
26
  def get_headers(model: str):
27
+ return {
28
  "authority": "api.gpt-oss.com",
29
  "accept": "text/event-stream",
30
  "accept-language": "vi-VN,vi;q=0.9,fr-FR;q=0.8,fr;q=0.7,en-US;q=0.6,en;q=0.5",
 
41
  "user-agent": "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Mobile Safari/537.36",
42
  "x-reasoning-effort": "high",
43
  "x-selected-model": model,
44
+ "x-show-reasoning": "true",
45
  }
 
46
 
47
  def build_prompt(messages: list):
48
  prompt = ""
 
58
  return prompt.strip()
59
 
60
  def build_payload(prompt: str):
61
+ return {
62
  "op": "threads.create",
63
  "params": {
64
  "input": {
65
  "text": prompt,
66
  "content": [{"type": "input_text", "text": prompt}],
67
  "quoted_text": "",
68
+ "attachments": [],
69
  }
70
+ },
71
+ }
72
+
73
+ def _openai_chunk(model: str, delta_content: str = "", role: str = None, finish: str = None):
74
+ """
75
+ Trả về 1 chunk theo định dạng OpenAI stream.
76
+ - role: chỉ gửi 1 lần đầu ("assistant")
77
+ - finish: "stop" khi kết thúc
78
+ """
79
+ obj = {
80
+ "id": f"chatcmpl-{uuid.uuid4().hex}",
81
+ "object": "chat.completion.chunk",
82
+ "created": int(time.time()),
83
+ "model": model,
84
+ "choices": [
85
+ {
86
+ "index": 0,
87
+ "delta": {},
88
+ "finish_reason": None,
89
+ }
90
+ ],
91
  }
92
+ if role is not None:
93
+ obj["choices"][0]["delta"]["role"] = role
94
+ if delta_content:
95
+ obj["choices"][0]["delta"]["content"] = delta_content
96
+ if finish is not None:
97
+ obj["choices"][0]["finish_reason"] = finish
98
+ return f"data: {json.dumps(obj, ensure_ascii=False)}\n\n"
99
 
100
  async def stream_gpt_oss_response(request_data: Dict[str, Any]) -> AsyncGenerator[str, None]:
101
  model = request_data["model"]
 
104
  payload = build_payload(prompt)
105
 
106
  thoughts = []
 
 
107
  in_assistant = False
108
+ role_sent = False
109
+ think_buffer = None # giữ CoT để gửi ngay trước content
110
 
111
  try:
112
  with requests.post(GPT_OSS_URL, headers=headers, data=json.dumps(payload), stream=True, timeout=120) as r:
 
118
  data = json.loads(line[5:].strip())
119
  event_type = data.get("type")
120
 
121
+ # Thu thập CoT
122
+ if event_type == "thread.item_updated" and data.get("update", {}).get("type") == "cot.entry_added":
123
  entry = data["update"]["entry"]
124
  if entry["type"] in ["thought", "recap"]:
125
  thoughts.append(entry["content"])
126
 
 
127
  elif event_type == "thread.item_done" and data["item"].get("type") == "cot":
128
  think_str = " ".join(thoughts).strip()
129
+ if think_str:
130
+ think_buffer = f"<think>{think_str}</think> "
 
 
131
 
 
132
  elif event_type == "thread.item_added" and data["item"].get("type") == "assistant_message":
133
  in_assistant = True
134
+ # gửi role 1 lần
135
+ if not role_sent:
136
+ yield _openai_chunk(model, role="assistant")
137
+ role_sent = True
138
+ # nếu có CoT, đẩy ra trước khi stream chữ
139
+ if think_buffer:
140
+ for ch in think_buffer:
141
+ yield _openai_chunk(model, delta_content=ch)
142
+ think_buffer = None
143
+
144
+ elif event_type == "thread.item_updated" and data.get("update", {}).get("type") == "assistant_message.content_part.text_delta":
145
  if in_assistant:
146
  delta = data["update"]["delta"]
147
+ # stream từng KÝ TỰ theo chuẩn OpenAI
148
+ for ch in delta:
149
+ yield _openai_chunk(model, delta_content=ch)
150
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  elif event_type == "thread.item_done" and data["item"].get("type") == "assistant_message":
152
+ # chunk kết thúc
153
+ yield _openai_chunk(model, finish="stop")
 
 
 
 
 
 
 
 
154
  yield "data: [DONE]\n\n"
155
  break
156
+ except Exception:
 
157
  continue
158
  except Exception as e:
159
+ err = {"error": str(e)}
160
+ yield f"data: {json.dumps(err, ensure_ascii=False)}\n\n"
 
161
 
162
  @app.post("/v1/chat/completions")
163
  async def chat_completions(request: Request):
 
167
  return {"error": f"Model must be one of {AVAILABLE_MODELS}"}, 400
168
 
169
  stream = request_data.get("stream", False)
170
+
171
  if stream:
172
+ # Thêm header giúp proxy không buffer
173
+ return StreamingResponse(
174
+ stream_gpt_oss_response(request_data),
175
+ media_type="text/event-stream",
176
+ headers={
177
+ "Cache-Control": "no-cache",
178
+ "X-Accel-Buffering": "no",
179
+ "Connection": "keep-alive",
180
+ },
181
+ )
182
  else:
183
+ # Non-stream (giữ nguyên)
184
  headers = get_headers(model)
185
  prompt = build_prompt(request_data["messages"])
186
  payload = build_payload(prompt)
187
+ thoughts, content = [], ""
 
 
188
 
189
  try:
190
  with requests.post(GPT_OSS_URL, headers=headers, data=json.dumps(payload), stream=True, timeout=120) as r:
 
195
  try:
196
  data = json.loads(line[5:].strip())
197
  event_type = data.get("type")
198
+ if event_type == "thread.item_updated" and data.get("update", {}).get("type") == "cot.entry_added":
 
199
  entry = data["update"]["entry"]
200
  if entry["type"] in ["thought", "recap"]:
201
  thoughts.append(entry["content"])
202
+ elif event_type == "thread.item_updated" and data.get("update", {}).get("type") == "assistant_message.content_part.text_delta":
203
+ content += data["update"]["delta"]
 
 
 
204
  elif event_type == "thread.item_done" and data["item"].get("type") == "assistant_message":
205
  break
206
  except Exception:
 
209
  return {"error": str(e)}
210
 
211
  think_str = " ".join(thoughts).strip()
212
+ full_content = (f"<think>{think_str}</think> " if think_str else "") + content.strip()
213
 
214
+ return {
215
  "choices": [{
216
  "message": {"content": full_content},
217
  "index": 0,
218
  "finish_reason": "stop"
219
  }]
220
  }
 
221
 
222
  if __name__ == "__main__":
223
  import uvicorn