Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -28,27 +28,27 @@ class Message(BaseModel):
|
|
| 28 |
|
| 29 |
|
| 30 |
def generate_ai(message: str):
|
| 31 |
-
|
|
|
|
|
|
|
| 32 |
inputs = tokenizer(prompt, return_tensors="pt")
|
| 33 |
|
| 34 |
with torch.no_grad():
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
)
|
| 44 |
-
|
|
|
|
| 45 |
input_length = inputs.input_ids.shape[1]
|
| 46 |
-
generated_tokens = outputs[0][input_length:]
|
| 47 |
-
|
| 48 |
-
reply = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
|
| 49 |
|
| 50 |
-
|
| 51 |
-
reply = reply.split("Assistant:")[-1].strip()
|
| 52 |
|
| 53 |
return reply
|
| 54 |
|
|
@@ -59,15 +59,13 @@ def worker():
|
|
| 59 |
if queue:
|
| 60 |
message = queue.pop(0)
|
| 61 |
|
| 62 |
-
# Генерация
|
| 63 |
reply = generate_ai(message)
|
| 64 |
|
| 65 |
-
# Сохраняем результат
|
| 66 |
if message in db:
|
| 67 |
db[message]["status"] = "done"
|
| 68 |
db[message]["reply"] = reply
|
| 69 |
|
| 70 |
-
time.sleep(0.
|
| 71 |
|
| 72 |
|
| 73 |
# запускаем поток
|
|
@@ -83,14 +81,13 @@ async def root():
|
|
| 83 |
@app.get("/ask")
|
| 84 |
async def ask(message: str):
|
| 85 |
|
| 86 |
-
# если уже есть — не добавляем повторно
|
| 87 |
if message not in db:
|
| 88 |
db[message] = {"status": "pending", "reply": ""}
|
| 89 |
queue.append(message)
|
| 90 |
|
| 91 |
# ограничение до 40
|
| 92 |
if len(db) > MAX_HISTORY:
|
| 93 |
-
db.popitem(last=False)
|
| 94 |
|
| 95 |
return PlainTextResponse("accepted")
|
| 96 |
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
def generate_ai(message: str):
|
| 31 |
+
# 🔥 Улучшенный prompt
|
| 32 |
+
prompt = f"User: {message}\nAssistant: Answer clearly and fully:\n"
|
| 33 |
+
|
| 34 |
inputs = tokenizer(prompt, return_tensors="pt")
|
| 35 |
|
| 36 |
with torch.no_grad():
|
| 37 |
+
outputs = model.generate(
|
| 38 |
+
**inputs,
|
| 39 |
+
max_new_tokens=60,
|
| 40 |
+
min_new_tokens=20, # 🔥 чтобы не обрывал
|
| 41 |
+
do_sample=True,
|
| 42 |
+
temperature=0.7,
|
| 43 |
+
top_p=0.9,
|
| 44 |
+
eos_token_id=tokenizer.eos_token_id
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
# 🔥 ВАЖНО: декодим только НОВЫЕ токены
|
| 48 |
input_length = inputs.input_ids.shape[1]
|
| 49 |
+
generated_tokens = outputs[0][input_length:]
|
|
|
|
|
|
|
| 50 |
|
| 51 |
+
reply = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
|
|
|
|
| 52 |
|
| 53 |
return reply
|
| 54 |
|
|
|
|
| 59 |
if queue:
|
| 60 |
message = queue.pop(0)
|
| 61 |
|
|
|
|
| 62 |
reply = generate_ai(message)
|
| 63 |
|
|
|
|
| 64 |
if message in db:
|
| 65 |
db[message]["status"] = "done"
|
| 66 |
db[message]["reply"] = reply
|
| 67 |
|
| 68 |
+
time.sleep(0.05) # чуть быстрее
|
| 69 |
|
| 70 |
|
| 71 |
# запускаем поток
|
|
|
|
| 81 |
@app.get("/ask")
|
| 82 |
async def ask(message: str):
|
| 83 |
|
|
|
|
| 84 |
if message not in db:
|
| 85 |
db[message] = {"status": "pending", "reply": ""}
|
| 86 |
queue.append(message)
|
| 87 |
|
| 88 |
# ограничение до 40
|
| 89 |
if len(db) > MAX_HISTORY:
|
| 90 |
+
db.popitem(last=False)
|
| 91 |
|
| 92 |
return PlainTextResponse("accepted")
|
| 93 |
|