Valtry commited on
Commit
c9b1f83
·
verified ·
1 Parent(s): f1419e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -34
app.py CHANGED
@@ -1,8 +1,13 @@
1
- import gradio as gr
 
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
4
 
5
- MODEL_ID = "microsoft/phi-2" # lighter → IMPORTANT
 
 
 
6
 
7
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
8
  model = AutoModelForCausalLM.from_pretrained(
@@ -14,16 +19,21 @@ model = AutoModelForCausalLM.from_pretrained(
14
 
15
  torch.set_num_threads(2)
16
 
17
- def generate_reply(message, history):
18
- # Convert history to prompt
19
- prompt = ""
20
- for msg in history:
21
- if msg["role"] == "user":
22
- prompt += f"User: {msg['content']}\n"
23
- else:
24
- prompt += f"Assistant: {msg['content']}\n"
25
 
26
- prompt += f"User: {message}\nAssistant:"
 
 
 
 
 
 
 
 
 
27
 
28
  inputs = tokenizer(prompt, return_tensors="pt")
29
 
@@ -37,27 +47,10 @@ def generate_reply(message, history):
37
  reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
38
  reply = reply.split("Assistant:")[-1].strip()
39
 
40
- return reply
41
-
42
- def chat(message, history):
43
- if history is None:
44
- history = []
45
-
46
- history.append({"role": "user", "content": message})
47
- reply = generate_reply(message, history)
48
- history.append({"role": "assistant", "content": reply})
49
-
50
- return history, "" # ← also clear the textbox
51
-
52
-
53
- with gr.Blocks() as demo:
54
- gr.Markdown("## ⚡ Fast Phi-2 Chatbot (HF Free Tier)")
55
-
56
- chatbot = gr.Chatbot()
57
- msg = gr.Textbox(placeholder="Type something...")
58
- clear = gr.Button("Clear")
59
-
60
- msg.submit(chat, [msg, chatbot], [chatbot, msg]) # ← outputs include msg to clear it
61
- clear.click(lambda: ([], ""), None, [chatbot, msg])
62
 
63
- demo.launch()
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import uvicorn
6
 
7
+ # -----------------------
8
+ # LOAD MODEL
9
+ # -----------------------
10
+ MODEL_ID = "microsoft/phi-2"
11
 
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
13
  model = AutoModelForCausalLM.from_pretrained(
 
19
 
20
  torch.set_num_threads(2)
21
 
22
+ # -----------------------
23
+ # FASTAPI
24
+ # -----------------------
25
+ app = FastAPI()
 
 
 
 
26
 
27
+ class ChatRequest(BaseModel):
28
+ message: str
29
+
30
+ @app.get("/")
31
+ def home():
32
+ return {"status": "API running 🚀"}
33
+
34
+ @app.post("/chat")
35
+ def chat(req: ChatRequest):
36
+ prompt = f"User: {req.message}\nAssistant:"
37
 
38
  inputs = tokenizer(prompt, return_tensors="pt")
39
 
 
47
  reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
  reply = reply.split("Assistant:")[-1].strip()
49
 
50
+ return {"response": reply}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ # -----------------------
53
+ # START SERVER DIRECTLY
54
+ # -----------------------
55
+ if __name__ == "__main__":
56
+ uvicorn.run(app, host="0.0.0.0", port=7860)