Spaces:

stevendhasoi
/

phi2223

Sleeping

phi2223 / app.py

Update app.py

01d26b4 verified 2 months ago

717 Bytes

	#

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	# Download your model automatically
	model_path = hf_hub_download(
	repo_id="stevendhasoi/phi_2223",
	filename="model_q4_k_m.gguf"
	)

	# Load GGUF model
	llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=4,
	)

	def chat_fn(message, history):
	prompt = ""
	for user, bot in history:
	prompt += f"User: {user}\nAssistant: {bot}\n"

	prompt += f"User: {message}\nAssistant:"

	output = llm(
	prompt,
	max_tokens=256,
	stop=["User:"],
	echo=False
	)

	reply = output["choices"][0]["text"].strip()
	return reply

	gr.ChatInterface(chat_fn).launch()