Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ from fastapi.responses import HTMLResponse
|
|
| 6 |
from llama_cpp import Llama
|
| 7 |
from pydantic import BaseModel
|
| 8 |
import uvicorn
|
|
|
|
| 9 |
|
| 10 |
# Configuration
|
| 11 |
MODEL_URL = "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
|
|
@@ -132,17 +133,25 @@ class ChatCompletionResponse(BaseModel):
|
|
| 132 |
choices: list[dict]
|
| 133 |
usage: dict
|
| 134 |
|
| 135 |
-
@app.
|
| 136 |
-
async def chat_completion(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
try:
|
| 138 |
-
|
|
|
|
|
|
|
| 139 |
prompt += "\nassistant:"
|
| 140 |
|
| 141 |
response = llm(
|
| 142 |
prompt=prompt,
|
| 143 |
-
max_tokens=
|
| 144 |
-
temperature=
|
| 145 |
-
top_p=
|
| 146 |
stop=["</s>"]
|
| 147 |
)
|
| 148 |
|
|
|
|
| 6 |
from llama_cpp import Llama
|
| 7 |
from pydantic import BaseModel
|
| 8 |
import uvicorn
|
| 9 |
+
import json
|
| 10 |
|
| 11 |
# Configuration
|
| 12 |
MODEL_URL = "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
|
|
|
|
| 133 |
choices: list[dict]
|
| 134 |
usage: dict
|
| 135 |
|
| 136 |
+
@app.get("/v1/chat/completions")
|
| 137 |
+
async def chat_completion(
|
| 138 |
+
messages: str,
|
| 139 |
+
max_tokens: int = 128,
|
| 140 |
+
temperature: float = 0.7,
|
| 141 |
+
top_p: float = 0.9,
|
| 142 |
+
stream: bool = False
|
| 143 |
+
):
|
| 144 |
try:
|
| 145 |
+
messages_list = json.loads(messages)
|
| 146 |
+
|
| 147 |
+
prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages_list])
|
| 148 |
prompt += "\nassistant:"
|
| 149 |
|
| 150 |
response = llm(
|
| 151 |
prompt=prompt,
|
| 152 |
+
max_tokens=max_tokens,
|
| 153 |
+
temperature=temperature,
|
| 154 |
+
top_p=top_p,
|
| 155 |
stop=["</s>"]
|
| 156 |
)
|
| 157 |
|