Spaces:

izuemon
/

phi-3

Running

File size: 3,477 Bytes

from flask import Flask, request, jsonify
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

app = Flask(__name__)

# モデルロード（起動時1回）
torch.random.manual_seed(0)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="cpu",
    torch_dtype="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct"
)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer
)

generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False,
    "temperature": 0.0,
    "do_sample": False,
}

# -----------------------
# ルートページ (HTML)
# -----------------------
@app.route("/")
def index():
    return """
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Local LLM Chat</title>

<style>
body{
    font-family: Arial;
    background:#111;
    color:white;
    margin:0;
}

#chat{
    height:80vh;
    overflow-y:auto;
    padding:20px;
}

.message{
    margin-bottom:12px;
}

.user{
    color:#6cf;
}

.assistant{
    color:#9f9;
}

#inputArea{
    position:fixed;
    bottom:0;
    width:100%;
    background:#222;
    padding:10px;
}

#input{
    width:80%;
    padding:10px;
    font-size:16px;
}

button{
    padding:10px;
    font-size:16px;
}
</style>
</head>

<body>

<h2 style="padding:10px;">Local Phi-3 Chat</h2>

<div id="chat"></div>

<div id="inputArea">
<input id="input" placeholder="メッセージを入力..." />
<button onclick="send()">送信</button>
</div>

<script>

let messages = [
    {role:"system",content:"You are a helpful assistant."}
]

function add(role,text){

    const chat=document.getElementById("chat")

    const div=document.createElement("div")
    div.className="message "+role

    div.innerText=role+": "+text

    chat.appendChild(div)
    chat.scrollTop=chat.scrollHeight
}

async function send(){

    const input=document.getElementById("input")
    const text=input.value

    if(!text) return

    input.value=""

    add("user",text)

    messages.push({
        role:"user",
        content:text
    })

    const res=await fetch("/v1/chat/completions",{
        method:"POST",
        headers:{
            "Content-Type":"application/json"
        },
        body:JSON.stringify({
            messages:messages
        })
    })

    const data=await res.json()

    const reply=data.choices[0].message.content

    add("assistant",reply)

    messages.push({
        role:"assistant",
        content:reply
    })
}

document.getElementById("input").addEventListener("keypress",function(e){
    if(e.key==="Enter"){
        send()
    }
})

</script>

</body>
</html>
"""

# -----------------------
# OpenAI互換API
# -----------------------
@app.route("/v1/chat/completions", methods=["POST"])
def chat_completions():

    data = request.json
    messages = data.get("messages", [])

    result = pipe(messages, **generation_args)
    text = result[0]["generated_text"]

    response = {
        "id": "chatcmpl-local",
        "object": "chat.completion",
        "choices": [
            {
                "index": 0,
                "message": {
                    "role": "assistant",
                    "content": text
                },
                "finish_reason": "stop"
            }
        ]
    }

    return jsonify(response)


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)