Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ llm = Llama(
|
|
| 9 |
n_ctx=2048,
|
| 10 |
n_threads=2,
|
| 11 |
n_batch=128,
|
| 12 |
-
verbose=False
|
| 13 |
)
|
| 14 |
|
| 15 |
class Message(BaseModel):
|
|
@@ -21,21 +21,21 @@ class ChatRequest(BaseModel):
|
|
| 21 |
|
| 22 |
@app.post("/v1/chat")
|
| 23 |
def chat(req: ChatRequest):
|
| 24 |
-
prompt = ""
|
| 25 |
for m in req.messages:
|
| 26 |
-
prompt += f"{m.role
|
| 27 |
-
prompt += "
|
| 28 |
|
| 29 |
output = llm(
|
| 30 |
prompt,
|
| 31 |
max_tokens=256,
|
| 32 |
temperature=0.7,
|
| 33 |
top_p=0.9,
|
| 34 |
-
stop=["
|
| 35 |
)
|
| 36 |
|
| 37 |
return {
|
| 38 |
-
"model": "
|
| 39 |
"text": output["choices"][0]["text"].strip(),
|
| 40 |
"tokens": output["usage"]["total_tokens"],
|
| 41 |
}
|
|
|
|
| 9 |
n_ctx=2048,
|
| 10 |
n_threads=2,
|
| 11 |
n_batch=128,
|
| 12 |
+
verbose=False
|
| 13 |
)
|
| 14 |
|
| 15 |
class Message(BaseModel):
|
|
|
|
| 21 |
|
| 22 |
@app.post("/v1/chat")
|
| 23 |
def chat(req: ChatRequest):
|
| 24 |
+
prompt = "<|system|>\nYou are a helpful, concise chatbot.\n"
|
| 25 |
for m in req.messages:
|
| 26 |
+
prompt += f"<|{m.role}|>\n{m.content}\n"
|
| 27 |
+
prompt += "<|assistant|>\n"
|
| 28 |
|
| 29 |
output = llm(
|
| 30 |
prompt,
|
| 31 |
max_tokens=256,
|
| 32 |
temperature=0.7,
|
| 33 |
top_p=0.9,
|
| 34 |
+
stop=["<|user|>", "<|system|>"]
|
| 35 |
)
|
| 36 |
|
| 37 |
return {
|
| 38 |
+
"model": "edyx-convo",
|
| 39 |
"text": output["choices"][0]["text"].strip(),
|
| 40 |
"tokens": output["usage"]["total_tokens"],
|
| 41 |
}
|