llama-cpp-python

Runtime error

App Files Files Community

llama-cpp-python / app.py

zac

Update app.py

687dd74 over 2 years ago

raw

history blame

1.75 kB

	import gradio as gr
	import time
	import ctypes #to run on C api directly
	import llama_cpp
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download #load from huggingfaces


	llm = Llama(model_path= hf_hub_download(repo_id="TheBloke/StableBeluga-7B-GGML", filename="stablebeluga-7b.ggmlv3.q6_K.bin"))

	def generate_text(input_text):
	output = llm(f"Q: {input_text} A:", max_tokens=521, stop=["Q:", "\n"], echo=True)
	return output['choices'][0]['text']

	input_text = user_message
	output_text = gr.outputs.Textbox(label="Output text")

	description = "bro neil it currently dosent work two people sending it request at the same time so going to fix that but currently running ggml models with llama.cpp implementation in python [https://github.com/abetlen/llama-cpp-python]"

	examples = [
	["What is the capital of France? ", "The capital of France is Paris."],
	["Who wrote the novel 'Pride and Prejudice'?", "The novel 'Pride and Prejudice' was written by Jane Austen."],
	["What is the square root of 64?", "The square root of 64 is 8."]
	]

	with gr.Blocks() as demo:
	chatbot = gr.Chatbot()
	msg = gr.Textbox()
	clear = gr.Button("Clear")

	def user(user_message, history):
	return "", history + [[user_message, None]]

	def bot(history):
	bot_message = gr.outputs.Textbox(label="Output text")
	history[-1][1] = ""
	for character in bot_message:
	history[-1][1] += character
	time.sleep(0.05)
	yield history

	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	bot, chatbot, chatbot
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.queue()
	if __name__ == "__main__":
	demo.launch()