Spaces:

GaboDataScientist
/

Open_LLM

Runtime error

Open_LLM / app.py

Update app.py

e9fb1a9 verified about 2 years ago

1.6 kB

	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama
	import gradio as gr

	model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
	model_basename = "llama-2-13b-chat.ggmlv3.q2_K.bin" # the model is in bin format #llama-2-13b-chat.ggmlv3.q2_K.bin -> menos pesado \| "llama-2-13b-chat.ggmlv3.q5_1.bin" -> más pesado

	model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

	# GPU
	lcpp_llm = None
	lcpp_llm = Llama(
	model_path=model_path,
	n_threads=2, # CPU cores
	n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
	n_gpu_layers=32 # Change this value based on your model and your GPU VRAM pool.
	)

	def CustomChatGPT(Ask):
	prompt = Ask
	prompt_template=f'''SYSTEM: You are an NBA expert that helps answering questions about the NBA, its teams and its players summarizing the most important information limiting to no more that one hundred tokens.

	USER: {prompt}

	ASSISTANT:
	'''
	response=lcpp_llm(prompt=prompt_template, max_tokens=100, temperature=0.3, top_p=0.95,
	repeat_penalty=1.2, top_k=150,
	echo=True)

	#reply=response["choices"][0]["text"]
	reply = response["choices"][0]["text"].split("ASSISTANT:")[1].strip()
	return reply

	gui=gr.Interface(fn=CustomChatGPT, inputs="text", outputs="text", examples=["Who is the greatest basketball player in NBA history","What is the winning record in a season?"] , title="Ask the AI coach", description="Ask the AI coach all you want about NBA Teams and Players:")

	gui.launch()