Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import hf_hub_download
|
| 2 |
+
from llama_cpp import Llama
|
| 3 |
+
import gradio as gr
|
| 4 |
+
|
| 5 |
+
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
|
| 6 |
+
model_basename = "llama-2-13b-chat.ggmlv3.q2_K.bin" # the model is in bin format #llama-2-13b-chat.ggmlv3.q2_K.bin -> menos pesado | "llama-2-13b-chat.ggmlv3.q5_1.bin" -> más pesado
|
| 7 |
+
|
| 8 |
+
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
|
| 9 |
+
|
| 10 |
+
# GPU
|
| 11 |
+
lcpp_llm = None
|
| 12 |
+
lcpp_llm = Llama(
|
| 13 |
+
model_path=model_path,
|
| 14 |
+
n_threads=2, # CPU cores
|
| 15 |
+
n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
| 16 |
+
n_gpu_layers=32 # Change this value based on your model and your GPU VRAM pool.
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
def CustomChatGPT(Ask):
|
| 20 |
+
prompt = Ask
|
| 21 |
+
prompt_template=f'''SYSTEM: You are an NBA expert that helps answering questions about the NBA, its teams and its players summarizing the most important information limiting to no more that one hundred tokens.
|
| 22 |
+
|
| 23 |
+
USER: {prompt}
|
| 24 |
+
|
| 25 |
+
ASSISTANT:
|
| 26 |
+
'''
|
| 27 |
+
response=lcpp_llm(prompt=prompt_template, max_tokens=100, temperature=0.3, top_p=0.95,
|
| 28 |
+
repeat_penalty=1.2, top_k=150,
|
| 29 |
+
echo=True)
|
| 30 |
+
|
| 31 |
+
reply=response["choices"][0]["text"]
|
| 32 |
+
# Split the response by lines
|
| 33 |
+
#response_lines = reply.split("\n")
|
| 34 |
+
|
| 35 |
+
# Find the index of the ASSISTANT line
|
| 36 |
+
#assistant_index = response_lines.index("ASSISTANT:")
|
| 37 |
+
|
| 38 |
+
# Extract the assistant response by joining the lines after the ASSISTANT line
|
| 39 |
+
#assistant_response = "\n".join(response_lines[assistant_index + 1:])
|
| 40 |
+
return reply
|
| 41 |
+
|
| 42 |
+
gui=gr.Interface(fn=CustomChatGPT, inputs="text", outputs="text", examples=["Who is the greatest basketball player in NBA history","What is the winning record in a season?"] , title="Ask the AI coach", description="Ask the AI coach all you want about NBA Teams and Players:")
|
| 43 |
+
|
| 44 |
+
gui.launch()
|