Adi362 commited on
Commit
f97ce08
·
verified ·
1 Parent(s): 389c652

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from llama_cpp import Llama
4
+
5
+ app = FastAPI()
6
+
7
+ llm = Llama(
8
+ model_path="/models/model.gguf",
9
+ n_ctx=2048,
10
+ n_threads=2,
11
+ n_batch=128,
12
+ verbose=False,
13
+ )
14
+
15
+ class Message(BaseModel):
16
+ role: str
17
+ content: str
18
+
19
+ class ChatRequest(BaseModel):
20
+ messages: list[Message]
21
+
22
+ @app.post("/v1/chat")
23
+ def chat(req: ChatRequest):
24
+ prompt = ""
25
+ for m in req.messages:
26
+ prompt += f"{m.role.upper()}: {m.content}\n"
27
+ prompt += "ASSISTANT:"
28
+
29
+ output = llm(
30
+ prompt,
31
+ max_tokens=256,
32
+ temperature=0.7,
33
+ top_p=0.9,
34
+ stop=["USER:"]
35
+ )
36
+
37
+ return {
38
+ "model": "tinyllama-1.1b-chat-q4_k_m",
39
+ "text": output["choices"][0]["text"].strip(),
40
+ "tokens": output["usage"]["total_tokens"],
41
+ }