AnatoliiG commited on
Commit
afbbaeb
·
1 Parent(s): c65ca6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -16
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import json
2
 
3
  import uvicorn
@@ -11,7 +12,8 @@ from model import engine
11
  from ui import create_ui
12
  from utils import get_clean_text
13
 
14
- # --- FastAPI Setup ---
 
15
  app = FastAPI()
16
  app.add_middleware(
17
  CORSMiddleware,
@@ -22,9 +24,11 @@ app.add_middleware(
22
  )
23
 
24
 
25
- # --- API Endpoints ---
26
  @app.post("/v1/chat/completions")
27
  async def chat_completions(request: Request):
 
 
 
28
  if not engine.llm:
29
  return JSONResponse(content={"error": "Model not loaded"}, status_code=500)
30
 
@@ -45,22 +49,30 @@ async def chat_completions(request: Request):
45
  temperature = data.get("temperature", config.DEFAULT_TEMP)
46
  max_tokens = data.get("max_tokens", config.DEFAULT_MAX_TOKENS)
47
 
48
- output = engine.generate(
49
- messages=messages,
50
- max_tokens=max_tokens,
51
- temperature=temperature,
52
- stream=stream,
53
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  if stream:
56
-
57
- def iter_content():
58
- for chunk in output:
59
- yield f"data: {json.dumps(chunk)}\n\n"
60
- yield "data: [DONE]\n\n"
61
-
62
  return StreamingResponse(
63
- iter_content(),
64
  media_type="text/event-stream",
65
  headers={
66
  "Cache-Control": "no-cache",
@@ -68,9 +80,20 @@ async def chat_completions(request: Request):
68
  "X-Accel-Buffering": "no",
69
  },
70
  )
 
 
 
 
 
 
 
 
 
71
 
72
- return JSONResponse(content=output)
73
  except Exception as e:
 
 
 
74
  return JSONResponse(content={"error": str(e)}, status_code=500)
75
 
76
 
 
1
+ import asyncio # <--- Добавили импорт
2
  import json
3
 
4
  import uvicorn
 
12
  from ui import create_ui
13
  from utils import get_clean_text
14
 
15
+ model_lock = asyncio.Lock()
16
+
17
  app = FastAPI()
18
  app.add_middleware(
19
  CORSMiddleware,
 
24
  )
25
 
26
 
 
27
  @app.post("/v1/chat/completions")
28
  async def chat_completions(request: Request):
29
+ if model_lock.locked():
30
+ pass
31
+
32
  if not engine.llm:
33
  return JSONResponse(content={"error": "Model not loaded"}, status_code=500)
34
 
 
49
  temperature = data.get("temperature", config.DEFAULT_TEMP)
50
  max_tokens = data.get("max_tokens", config.DEFAULT_MAX_TOKENS)
51
 
52
+ async def iter_content_locked():
53
+ async with model_lock:
54
+ try:
55
+ output = engine.generate(
56
+ messages=messages,
57
+ max_tokens=max_tokens,
58
+ temperature=temperature,
59
+ stream=True,
60
+ )
61
+
62
+ for chunk in output:
63
+ if "model" not in chunk:
64
+ chunk["model"] = config.REPO_ID
65
+ yield f"data: {json.dumps(chunk)}\n\n"
66
+ await asyncio.sleep(0)
67
+
68
+ yield "data: [DONE]\n\n"
69
+ except Exception as e:
70
+ print(f"Streaming error: {e}")
71
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
72
 
73
  if stream:
 
 
 
 
 
 
74
  return StreamingResponse(
75
+ iter_content_locked(),
76
  media_type="text/event-stream",
77
  headers={
78
  "Cache-Control": "no-cache",
 
80
  "X-Accel-Buffering": "no",
81
  },
82
  )
83
+ else:
84
+ async with model_lock:
85
+ output = engine.generate(
86
+ messages=messages,
87
+ max_tokens=max_tokens,
88
+ temperature=temperature,
89
+ stream=False,
90
+ )
91
+ return JSONResponse(content=output)
92
 
 
93
  except Exception as e:
94
+ import traceback
95
+
96
+ traceback.print_exc()
97
  return JSONResponse(content={"error": str(e)}, status_code=500)
98
 
99