Spaces:
Runtime error
Runtime error
File size: 1,598 Bytes
acaa163 e9fb1a9 acaa163 4cc4918 acaa163 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
model_basename = "llama-2-13b-chat.ggmlv3.q2_K.bin" # the model is in bin format #llama-2-13b-chat.ggmlv3.q2_K.bin -> menos pesado | "llama-2-13b-chat.ggmlv3.q5_1.bin" -> más pesado
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
# GPU
lcpp_llm = None
lcpp_llm = Llama(
model_path=model_path,
n_threads=2, # CPU cores
n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
n_gpu_layers=32 # Change this value based on your model and your GPU VRAM pool.
)
def CustomChatGPT(Ask):
prompt = Ask
prompt_template=f'''SYSTEM: You are an NBA expert that helps answering questions about the NBA, its teams and its players summarizing the most important information limiting to no more that one hundred tokens.
USER: {prompt}
ASSISTANT:
'''
response=lcpp_llm(prompt=prompt_template, max_tokens=100, temperature=0.3, top_p=0.95,
repeat_penalty=1.2, top_k=150,
echo=True)
#reply=response["choices"][0]["text"]
reply = response["choices"][0]["text"].split("ASSISTANT:")[1].strip()
return reply
gui=gr.Interface(fn=CustomChatGPT, inputs="text", outputs="text", examples=["Who is the greatest basketball player in NBA history","What is the winning record in a season?"] , title="Ask the AI coach", description="Ask the AI coach all you want about NBA Teams and Players:")
gui.launch() |