| | from flask import Flask, request, jsonify |
| | import torch |
| | from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
| |
|
| | app = Flask(__name__) |
| |
|
| | |
| | torch.random.manual_seed(0) |
| |
|
| | model = AutoModelForCausalLM.from_pretrained( |
| | "microsoft/Phi-3-mini-4k-instruct", |
| | device_map="cpu", |
| | torch_dtype="auto", |
| | trust_remote_code=True |
| | ) |
| |
|
| | tokenizer = AutoTokenizer.from_pretrained( |
| | "microsoft/Phi-3-mini-4k-instruct" |
| | ) |
| |
|
| | pipe = pipeline( |
| | "text-generation", |
| | model=model, |
| | tokenizer=tokenizer |
| | ) |
| |
|
| | generation_args = { |
| | "max_new_tokens": 500, |
| | "return_full_text": False, |
| | "temperature": 0.0, |
| | "do_sample": False, |
| | } |
| |
|
| | |
| | |
| | |
| | @app.route("/") |
| | def index(): |
| | return """ |
| | <!DOCTYPE html> |
| | <html> |
| | <head> |
| | <meta charset="utf-8"> |
| | <title>Local LLM Chat</title> |
| | |
| | <style> |
| | body{ |
| | font-family: Arial; |
| | background:#111; |
| | color:white; |
| | margin:0; |
| | } |
| | |
| | #chat{ |
| | height:80vh; |
| | overflow-y:auto; |
| | padding:20px; |
| | } |
| | |
| | .message{ |
| | margin-bottom:12px; |
| | } |
| | |
| | .user{ |
| | color:#6cf; |
| | } |
| | |
| | .assistant{ |
| | color:#9f9; |
| | } |
| | |
| | #inputArea{ |
| | position:fixed; |
| | bottom:0; |
| | width:100%; |
| | background:#222; |
| | padding:10px; |
| | } |
| | |
| | #input{ |
| | width:80%; |
| | padding:10px; |
| | font-size:16px; |
| | } |
| | |
| | button{ |
| | padding:10px; |
| | font-size:16px; |
| | } |
| | </style> |
| | </head> |
| | |
| | <body> |
| | |
| | <h2 style="padding:10px;">Local Phi-3 Chat</h2> |
| | |
| | <div id="chat"></div> |
| | |
| | <div id="inputArea"> |
| | <input id="input" placeholder="メッセージを入力..." /> |
| | <button onclick="send()">送信</button> |
| | </div> |
| | |
| | <script> |
| | |
| | let messages = [ |
| | {role:"system",content:"You are a helpful assistant."} |
| | ] |
| | |
| | function add(role,text){ |
| | |
| | const chat=document.getElementById("chat") |
| | |
| | const div=document.createElement("div") |
| | div.className="message "+role |
| | |
| | div.innerText=role+": "+text |
| | |
| | chat.appendChild(div) |
| | chat.scrollTop=chat.scrollHeight |
| | } |
| | |
| | async function send(){ |
| | |
| | const input=document.getElementById("input") |
| | const text=input.value |
| | |
| | if(!text) return |
| | |
| | input.value="" |
| | |
| | add("user",text) |
| | |
| | messages.push({ |
| | role:"user", |
| | content:text |
| | }) |
| | |
| | const res=await fetch("/v1/chat/completions",{ |
| | method:"POST", |
| | headers:{ |
| | "Content-Type":"application/json" |
| | }, |
| | body:JSON.stringify({ |
| | messages:messages |
| | }) |
| | }) |
| | |
| | const data=await res.json() |
| | |
| | const reply=data.choices[0].message.content |
| | |
| | add("assistant",reply) |
| | |
| | messages.push({ |
| | role:"assistant", |
| | content:reply |
| | }) |
| | } |
| | |
| | document.getElementById("input").addEventListener("keypress",function(e){ |
| | if(e.key==="Enter"){ |
| | send() |
| | } |
| | }) |
| | |
| | </script> |
| | |
| | </body> |
| | </html> |
| | """ |
| |
|
| | |
| | |
| | |
| | @app.route("/v1/chat/completions", methods=["POST"]) |
| | def chat_completions(): |
| |
|
| | data = request.json |
| | messages = data.get("messages", []) |
| |
|
| | result = pipe(messages, **generation_args) |
| | text = result[0]["generated_text"] |
| |
|
| | response = { |
| | "id": "chatcmpl-local", |
| | "object": "chat.completion", |
| | "choices": [ |
| | { |
| | "index": 0, |
| | "message": { |
| | "role": "assistant", |
| | "content": text |
| | }, |
| | "finish_reason": "stop" |
| | } |
| | ] |
| | } |
| |
|
| | return jsonify(response) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | app.run(host="0.0.0.0", port=7860) |