phi-3 / app.py
izuemon's picture
Update app.py
a79f08d verified
from flask import Flask, request, jsonify
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
app = Flask(__name__)
# モデルロード(起動時1回)
torch.random.manual_seed(0)
model = AutoModelForCausalLM.from_pretrained(
"microsoft/Phi-3-mini-4k-instruct",
device_map="cpu",
torch_dtype="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
"microsoft/Phi-3-mini-4k-instruct"
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer
)
generation_args = {
"max_new_tokens": 500,
"return_full_text": False,
"temperature": 0.0,
"do_sample": False,
}
# -----------------------
# ルートページ (HTML)
# -----------------------
@app.route("/")
def index():
return """
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Local LLM Chat</title>
<style>
body{
font-family: Arial;
background:#111;
color:white;
margin:0;
}
#chat{
height:80vh;
overflow-y:auto;
padding:20px;
}
.message{
margin-bottom:12px;
}
.user{
color:#6cf;
}
.assistant{
color:#9f9;
}
#inputArea{
position:fixed;
bottom:0;
width:100%;
background:#222;
padding:10px;
}
#input{
width:80%;
padding:10px;
font-size:16px;
}
button{
padding:10px;
font-size:16px;
}
</style>
</head>
<body>
<h2 style="padding:10px;">Local Phi-3 Chat</h2>
<div id="chat"></div>
<div id="inputArea">
<input id="input" placeholder="メッセージを入力..." />
<button onclick="send()">送信</button>
</div>
<script>
let messages = [
{role:"system",content:"You are a helpful assistant."}
]
function add(role,text){
const chat=document.getElementById("chat")
const div=document.createElement("div")
div.className="message "+role
div.innerText=role+": "+text
chat.appendChild(div)
chat.scrollTop=chat.scrollHeight
}
async function send(){
const input=document.getElementById("input")
const text=input.value
if(!text) return
input.value=""
add("user",text)
messages.push({
role:"user",
content:text
})
const res=await fetch("/v1/chat/completions",{
method:"POST",
headers:{
"Content-Type":"application/json"
},
body:JSON.stringify({
messages:messages
})
})
const data=await res.json()
const reply=data.choices[0].message.content
add("assistant",reply)
messages.push({
role:"assistant",
content:reply
})
}
document.getElementById("input").addEventListener("keypress",function(e){
if(e.key==="Enter"){
send()
}
})
</script>
</body>
</html>
"""
# -----------------------
# OpenAI互換API
# -----------------------
@app.route("/v1/chat/completions", methods=["POST"])
def chat_completions():
data = request.json
messages = data.get("messages", [])
result = pipe(messages, **generation_args)
text = result[0]["generated_text"]
response = {
"id": "chatcmpl-local",
"object": "chat.completion",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": text
},
"finish_reason": "stop"
}
]
}
return jsonify(response)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)