CooLLaMACEO commited on
Commit
0187888
·
verified ·
1 Parent(s): a30aa25

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+
6
+ # Point to the local folder created in the Dockerfile
7
+ MODEL_PATH = "/app/model"
8
+
9
+ print("Loading Overflow-111.7B from Local Docker Storage...")
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
12
+
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ MODEL_PATH,
15
+ trust_remote_code=True,
16
+ device_map={"": "cpu"},
17
+ torch_dtype=torch.bfloat16,
18
+ low_cpu_mem_usage=True
19
+ )
20
+
21
+ def respond(message, history):
22
+ inputs = tokenizer(message, return_tensors="pt")
23
+ with torch.no_grad():
24
+ output_tokens = model.generate(**inputs, max_new_tokens=30)
25
+ return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
26
+
27
+ demo = gr.ChatInterface(respond)
28
+ if __name__ == "__main__":
29
+ demo.launch(server_name="0.0.0.0", server_port=7860)