izuemon commited on
Commit
b350db1
·
verified ·
1 Parent(s): 818a53c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
+
5
+ app = Flask(__name__)
6
+
7
+ # モデルロード(起動時1回)
8
+ torch.random.manual_seed(0)
9
+
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ "microsoft/Phi-3-mini-4k-instruct",
12
+ device_map="cuda",
13
+ torch_dtype="auto",
14
+ trust_remote_code=True
15
+ )
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained(
18
+ "microsoft/Phi-3-mini-4k-instruct"
19
+ )
20
+
21
+ pipe = pipeline(
22
+ "text-generation",
23
+ model=model,
24
+ tokenizer=tokenizer
25
+ )
26
+
27
+ generation_args = {
28
+ "max_new_tokens": 500,
29
+ "return_full_text": False,
30
+ "temperature": 0.0,
31
+ "do_sample": False,
32
+ }
33
+
34
+
35
+ @app.route("/v1/chat/completions", methods=["POST"])
36
+ def chat_completions():
37
+ data = request.json
38
+
39
+ messages = data.get("messages", [])
40
+
41
+ result = pipe(messages, **generation_args)
42
+ text = result[0]["generated_text"]
43
+
44
+ response = {
45
+ "id": "chatcmpl-local",
46
+ "object": "chat.completion",
47
+ "choices": [
48
+ {
49
+ "index": 0,
50
+ "message": {
51
+ "role": "assistant",
52
+ "content": text
53
+ },
54
+ "finish_reason": "stop"
55
+ }
56
+ ]
57
+ }
58
+
59
+ return jsonify(response)
60
+
61
+
62
+ if __name__ == "__main__":
63
+ app.run(host="0.0.0.0", port=7860)