Spaces:

izuemon
/

phi-3

Running

App Files Files Community

phi-3 / app.py

izuemon

Update app.py

a79f08d verified about 13 hours ago

raw

history blame contribute delete

3.48 kB

	from flask import Flask, request, jsonify
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	app = Flask(__name__)

	# モデルロード（起動時1回）
	torch.random.manual_seed(0)

	model = AutoModelForCausalLM.from_pretrained(
	"microsoft/Phi-3-mini-4k-instruct",
	device_map="cpu",
	torch_dtype="auto",
	trust_remote_code=True
	)

	tokenizer = AutoTokenizer.from_pretrained(
	"microsoft/Phi-3-mini-4k-instruct"
	)

	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer
	)

	generation_args = {
	"max_new_tokens": 500,
	"return_full_text": False,
	"temperature": 0.0,
	"do_sample": False,
	}

	# -----------------------
	# ルートページ (HTML)
	# -----------------------
	@app.route("/")
	def index():
	return """
	<!DOCTYPE html>
	<html>
	<head>
	<meta charset="utf-8">
	<title>Local LLM Chat</title>

	<style>
	body{
	font-family: Arial;
	background:#111;
	color:white;
	margin:0;
	}

	#chat{
	height:80vh;
	overflow-y:auto;
	padding:20px;
	}

	.message{
	margin-bottom:12px;
	}

	.user{
	color:#6cf;
	}

	.assistant{
	color:#9f9;
	}

	#inputArea{
	position:fixed;
	bottom:0;
	width:100%;
	background:#222;
	padding:10px;
	}

	#input{
	width:80%;
	padding:10px;
	font-size:16px;
	}

	button{
	padding:10px;
	font-size:16px;
	}
	</style>
	</head>

	<body>

	<h2 style="padding:10px;">Local Phi-3 Chat</h2>

	<div id="chat"></div>

	<div id="inputArea">
	<input id="input" placeholder="メッセージを入力..." />
	<button onclick="send()">送信</button>
	</div>

	<script>

	let messages = [
	{role:"system",content:"You are a helpful assistant."}
	]

	function add(role,text){

	const chat=document.getElementById("chat")

	const div=document.createElement("div")
	div.className="message "+role

	div.innerText=role+": "+text

	chat.appendChild(div)
	chat.scrollTop=chat.scrollHeight
	}

	async function send(){

	const input=document.getElementById("input")
	const text=input.value

	if(!text) return

	input.value=""

	add("user",text)

	messages.push({
	role:"user",
	content:text
	})

	const res=await fetch("/v1/chat/completions",{
	method:"POST",
	headers:{
	"Content-Type":"application/json"
	},
	body:JSON.stringify({
	messages:messages
	})
	})

	const data=await res.json()

	const reply=data.choices[0].message.content

	add("assistant",reply)

	messages.push({
	role:"assistant",
	content:reply
	})
	}

	document.getElementById("input").addEventListener("keypress",function(e){
	if(e.key==="Enter"){
	send()
	}
	})

	</script>

	</body>
	</html>
	"""

	# -----------------------
	# OpenAI互換API
	# -----------------------
	@app.route("/v1/chat/completions", methods=["POST"])
	def chat_completions():

	data = request.json
	messages = data.get("messages", [])

	result = pipe(messages, **generation_args)
	text = result[0]["generated_text"]

	response = {
	"id": "chatcmpl-local",
	"object": "chat.completion",
	"choices": [
	{
	"index": 0,
	"message": {
	"role": "assistant",
	"content": text
	},
	"finish_reason": "stop"
	}
	]
	}

	return jsonify(response)


	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)