dedlepexa commited on
Commit
ebaea68
·
verified ·
1 Parent(s): 6e368de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -17
app.py CHANGED
@@ -16,18 +16,16 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
  model = AutoModelForCausalLM.from_pretrained(model_name)
17
  model.eval()
18
 
19
- # 🔹 настройки
20
  MAX_HISTORY = 40
21
  NUM_WORKERS = 3
22
 
23
- db = OrderedDict() # message -> {status, reply}
24
  queue = []
25
 
26
  class Message(BaseModel):
27
  message: str
28
 
29
 
30
- # 🔥 STREAMING GENERATION
31
  def generate_ai_stream(message: str):
32
  prompt = f"User: {message}\nAssistant: Answer clearly and fully:\n"
33
 
@@ -51,24 +49,22 @@ def generate_ai_stream(message: str):
51
 
52
  partial = ""
53
 
 
54
  for text in streamer:
55
  partial += text
56
 
57
- # 🔥 обновляем ответ в реальном времени
58
  if message in db:
59
  db[message]["reply"] = partial
60
 
61
-
62
-
63
- final_text = partial.strip() + " full generated"
64
 
65
  if message in db:
66
  db[message]["reply"] = final_text
67
- db[message]["status"] = "done"
68
 
69
  return final_text
70
 
71
- # 🔥 WORKER
72
  def worker():
73
  while True:
74
  if queue:
@@ -86,7 +82,6 @@ def worker():
86
  time.sleep(0.01)
87
 
88
 
89
- # 🔥 запускаем 3 воркера (ускорение x2-x3)
90
  for _ in range(NUM_WORKERS):
91
  threading.Thread(target=worker, daemon=True).start()
92
 
@@ -96,19 +91,14 @@ async def root():
96
  return PlainTextResponse("AI server работает")
97
 
98
 
99
- # 🔹 ASK
100
  @app.get("/ask")
101
  async def ask(message: str):
102
 
103
- # кеш
104
  if message in db and db[message]["status"] == "done":
105
  return PlainTextResponse("cached")
106
 
107
  if message not in db:
108
- db[message] = {
109
- "status": "pending",
110
- "reply": ""
111
- }
112
  queue.append(message)
113
 
114
  if len(db) > MAX_HISTORY:
@@ -117,7 +107,6 @@ async def ask(message: str):
117
  return PlainTextResponse("accepted")
118
 
119
 
120
- # 🔹 GET (визуальный стриминг)
121
  @app.get("/get")
122
  async def get(message: str):
123
 
 
16
  model = AutoModelForCausalLM.from_pretrained(model_name)
17
  model.eval()
18
 
 
19
  MAX_HISTORY = 40
20
  NUM_WORKERS = 3
21
 
22
+ db = OrderedDict()
23
  queue = []
24
 
25
  class Message(BaseModel):
26
  message: str
27
 
28
 
 
29
  def generate_ai_stream(message: str):
30
  prompt = f"User: {message}\nAssistant: Answer clearly and fully:\n"
31
 
 
49
 
50
  partial = ""
51
 
52
+ # 🔥 STREAMING
53
  for text in streamer:
54
  partial += text
55
 
 
56
  if message in db:
57
  db[message]["reply"] = partial
58
 
59
+ # 🔥 КОНЕЦ ГЕНЕРАЦИИ (ВОТ ТУТ ДЕТЕКТ)
60
+ final_text = partial.strip() + " full generated"
 
61
 
62
  if message in db:
63
  db[message]["reply"] = final_text
 
64
 
65
  return final_text
66
 
67
+
68
  def worker():
69
  while True:
70
  if queue:
 
82
  time.sleep(0.01)
83
 
84
 
 
85
  for _ in range(NUM_WORKERS):
86
  threading.Thread(target=worker, daemon=True).start()
87
 
 
91
  return PlainTextResponse("AI server работает")
92
 
93
 
 
94
  @app.get("/ask")
95
  async def ask(message: str):
96
 
 
97
  if message in db and db[message]["status"] == "done":
98
  return PlainTextResponse("cached")
99
 
100
  if message not in db:
101
+ db[message] = {"status": "pending", "reply": ""}
 
 
 
102
  queue.append(message)
103
 
104
  if len(db) > MAX_HISTORY:
 
107
  return PlainTextResponse("accepted")
108
 
109
 
 
110
  @app.get("/get")
111
  async def get(message: str):
112