Jadyro commited on
Commit
ced62fa
·
verified ·
1 Parent(s): 0b12103

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from typing import List
4
+ from transformers import AutoTokenizer, pipeline
5
+
6
+ MODEL_ID = "Equall/Saul-7B-Instruct-v1"
7
+
8
+ # Load tokenizer + model pipeline
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
10
+ pipe = pipeline(
11
+ "text-generation",
12
+ model=MODEL_ID,
13
+ tokenizer=tokenizer,
14
+ device_map="auto", # will use GPU if available, CPU otherwise
15
+ max_new_tokens=512,
16
+ )
17
+
18
+ class Message(BaseModel):
19
+ role: str
20
+ content: str
21
+
22
+ class ChatRequest(BaseModel):
23
+ messages: List[Message]
24
+
25
+ class ChatResponse(BaseModel):
26
+ reply: str
27
+
28
+ app = FastAPI()
29
+
30
+ @app.get("/")
31
+ def root():
32
+ return {"status": "ok", "model": MODEL_ID}
33
+
34
+ @app.post("/chat", response_model=ChatResponse)
35
+ def chat(req: ChatRequest):
36
+ # Convert Pydantic objects into plain dicts for the tokenizer
37
+ messages = [m.dict() for m in req.messages]
38
+
39
+ # Use the model's chat template as recommended on the model card
40
+ prompt = tokenizer.apply_chat_template(
41
+ messages,
42
+ tokenize=False,
43
+ add_generation_prompt=True,
44
+ )
45
+
46
+ outputs = pipe(
47
+ prompt,
48
+ max_new_tokens=512,
49
+ do_sample=False,
50
+ temperature=0.0,
51
+ top_p=1.0,
52
+ )
53
+
54
+ full = outputs[0]["generated_text"]
55
+ # Strip the prompt from the beginning
56
+ reply = full[len(prompt):].strip()
57
+ return ChatResponse(reply=reply)