adowu commited on
Commit
8e47a66
·
verified ·
1 Parent(s): d6def15

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +28 -13
main.py CHANGED
@@ -124,17 +124,13 @@ def _condense_messages(messages: list[Message], max_tokens: int) -> str:
124
  system_msgs = [m for m in messages if m.role == "system"]
125
  user_assistant = [m for m in messages if m.role in ("user", "assistant")]
126
 
127
- # Budujemy prompt w kolejności: system + user/assistant
128
  condensed_parts = []
129
 
130
- # system zawsze pełny
131
  for m in system_msgs:
132
  condensed_parts.append(_content_str(m))
133
 
134
- # dynamiczne skracanie starszych user/assistant
135
  tokens_so_far = sum(_token_count(part) for part in condensed_parts)
136
 
137
- # jeśli wchodzimy w limity
138
  for m in user_assistant:
139
  text = _content_str(m)
140
  tcount = _token_count(text)
@@ -144,7 +140,7 @@ def _condense_messages(messages: list[Message], max_tokens: int) -> str:
144
  if remaining_tokens <= 0:
145
  continue
146
  approx_chars = remaining_tokens * AVG_CHARS_PER_TOKEN
147
- text = text[-approx_chars:] # zachowujemy ostatnią część
148
  tcount = _token_count(text)
149
 
150
  condensed_parts.append(text)
@@ -228,17 +224,18 @@ async def _call_falcon_once(prompt: str, req: ChatCompletionRequest) -> str:
228
  "top_p": req.top_p,
229
  }
230
 
231
- # inicjalizacja nowego chatu z promptem
232
  await asyncio.to_thread(
233
  client.predict,
234
- input_value=prompt,
235
  settings_form_value=settings,
236
  api_name="/new_chat",
237
  )
238
 
 
239
  result = await asyncio.to_thread(
240
  client.predict,
241
- input_value=prompt,
242
  settings_form_value=settings,
243
  api_name="/add_message",
244
  )
@@ -254,12 +251,22 @@ async def _stream_sse(text: str, req: ChatCompletionRequest) -> AsyncGenerator[s
254
  cid = f"chatcmpl-{uuid.uuid4().hex}"
255
  created = int(time.time())
256
  for i in range(0, len(text), 8):
257
- chunk = {"id": cid, "object": "chat.completion.chunk", "created": created,
258
- "model": req.model, "choices": [{"index": 0, "delta": {"content": text[i:i+8]}, "finish_reason": None}]}
 
 
 
 
 
259
  yield f"data: {json.dumps(chunk)}\n\n"
260
  await asyncio.sleep(0.01)
261
- final_chunk = {"id": cid, "object": "chat.completion.chunk", "created": created,
262
- "model": req.model, "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]}
 
 
 
 
 
263
  yield f"data: {json.dumps(final_chunk)}\n\n"
264
  yield "data: [DONE]\n\n"
265
 
@@ -291,7 +298,15 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
291
 
292
  @app.get("/")
293
  async def root():
294
- return {"service": "FOC API", "version": "5.0.0", "endpoints": {"health": "/health", "models": "/v1/models", "chat": "/v1/chat/completions"}}
 
 
 
 
 
 
 
 
295
 
296
 
297
  @app.get("/health")
 
124
  system_msgs = [m for m in messages if m.role == "system"]
125
  user_assistant = [m for m in messages if m.role in ("user", "assistant")]
126
 
 
127
  condensed_parts = []
128
 
 
129
  for m in system_msgs:
130
  condensed_parts.append(_content_str(m))
131
 
 
132
  tokens_so_far = sum(_token_count(part) for part in condensed_parts)
133
 
 
134
  for m in user_assistant:
135
  text = _content_str(m)
136
  tcount = _token_count(text)
 
140
  if remaining_tokens <= 0:
141
  continue
142
  approx_chars = remaining_tokens * AVG_CHARS_PER_TOKEN
143
+ text = text[-approx_chars:]
144
  tcount = _token_count(text)
145
 
146
  condensed_parts.append(text)
 
224
  "top_p": req.top_p,
225
  }
226
 
227
+ # inicjalizacja nowego chatu z promptem (jeśli endpoint wymaga)
228
  await asyncio.to_thread(
229
  client.predict,
230
+ prompt, # pierwszy argument podawany pozycyjnie
231
  settings_form_value=settings,
232
  api_name="/new_chat",
233
  )
234
 
235
+ # dodanie wiadomości
236
  result = await asyncio.to_thread(
237
  client.predict,
238
+ prompt, # pierwszy argument podawany pozycyjnie
239
  settings_form_value=settings,
240
  api_name="/add_message",
241
  )
 
251
  cid = f"chatcmpl-{uuid.uuid4().hex}"
252
  created = int(time.time())
253
  for i in range(0, len(text), 8):
254
+ chunk = {
255
+ "id": cid,
256
+ "object": "chat.completion.chunk",
257
+ "created": created,
258
+ "model": req.model,
259
+ "choices": [{"index": 0, "delta": {"content": text[i:i+8]}, "finish_reason": None}]
260
+ }
261
  yield f"data: {json.dumps(chunk)}\n\n"
262
  await asyncio.sleep(0.01)
263
+ final_chunk = {
264
+ "id": cid,
265
+ "object": "chat.completion.chunk",
266
+ "created": created,
267
+ "model": req.model,
268
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
269
+ }
270
  yield f"data: {json.dumps(final_chunk)}\n\n"
271
  yield "data: [DONE]\n\n"
272
 
 
298
 
299
  @app.get("/")
300
  async def root():
301
+ return {
302
+ "service": "FOC API",
303
+ "version": "5.0.0",
304
+ "endpoints": {
305
+ "health": "/health",
306
+ "models": "/v1/models",
307
+ "chat": "/v1/chat/completions"
308
+ }
309
+ }
310
 
311
 
312
  @app.get("/health")