aryo100 commited on
Commit
28de333
·
1 Parent(s): 4ec3486

update app

Browse files
Files changed (1) hide show
  1. app.py +30 -31
app.py CHANGED
@@ -1,49 +1,48 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import torch, os, uvicorn
5
 
6
  app = FastAPI()
7
 
8
- model_name = "Qwen/Qwen-1_8B-Chat"
9
-
10
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
11
- # Pasang template manual kalau tidak tersedia
12
- if tokenizer.chat_template is None:
13
- tokenizer.chat_template = """{% for message in messages %}
14
- {% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '\n' }}
15
- {% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '\n' }}
16
- {% elif message['role'] == 'assistant' %}{{ '<|assistant|>\n' + message['content'] + '\n' }}
17
- {% endif %}
18
- {% endfor %}<|assistant|>
19
- """
20
-
21
  model = AutoModelForCausalLM.from_pretrained(
22
- model_name,
23
- trust_remote_code=True,
24
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
25
- device_map="auto" if torch.cuda.is_available() else "cpu"
26
  )
27
 
 
28
  class ChatRequest(BaseModel):
29
- prompt: str
30
  max_new_tokens: int = 128
31
 
32
  @app.post("/chat")
33
  def chat(req: ChatRequest):
34
- messages = [
35
- {"role": "system", "content": "You are a helpful AI assistant."},
36
- {"role": "user", "content": req.prompt},
37
- ]
 
 
38
 
39
- text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
40
  inputs = tokenizer(text, return_tensors="pt").to(model.device)
41
 
42
- outputs = model.generate(**inputs, max_new_tokens=req.max_new_tokens)
43
- reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
44
 
45
- return {"reply": reply}
46
 
47
- if __name__ == "__main__":
48
- port = int(os.environ.get("PORT", 7860))
49
- uvicorn.run("app:app", host="0.0.0.0", port=port)
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
 
6
  app = FastAPI()
7
 
8
+ # Load model & tokenizer sekali saat startup
9
+ MODEL_NAME = "Qwen/Qwen-1_8B-Chat"
10
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
11
  model = AutoModelForCausalLM.from_pretrained(
12
+ MODEL_NAME,
13
+ torch_dtype=torch.float16,
14
+ device_map="auto"
 
15
  )
16
 
17
+ # Request schema
18
  class ChatRequest(BaseModel):
19
+ messages: list # format [{"role": "user", "content": "halo"}]
20
  max_new_tokens: int = 128
21
 
22
  @app.post("/chat")
23
  def chat(req: ChatRequest):
24
+ # Format input sesuai template Qwen
25
+ text = tokenizer.apply_chat_template(
26
+ req.messages,
27
+ tokenize=False,
28
+ add_generation_prompt=True
29
+ )
30
 
 
31
  inputs = tokenizer(text, return_tensors="pt").to(model.device)
32
 
33
+ # Generate
34
+ outputs = model.generate(
35
+ **inputs,
36
+ max_new_tokens=req.max_new_tokens,
37
+ do_sample=True,
38
+ top_p=0.9,
39
+ temperature=0.7
40
+ )
41
+
42
+ response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
43
 
44
+ return {"response": response}
45
 
46
+ @app.get("/")
47
+ def root():
48
+ return {"message": "Qwen FastAPI running 🚀"}