chat-llm-api / app.py
lilagi5822's picture
Create app.py
ba466bf verified
raw
history blame contribute delete
538 Bytes
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# Download model
model_path = hf_hub_download(
"mradermacher/Falcon-H1-Tiny-R-90M-GGUF",
"Falcon-H1-Tiny-R-90M.Q2_K.gguf"
)
llm = Llama(model_path, n_ctx=512, n_threads=2)
def chat(message):
response = llm(
f"User: {message}\nAssistant:",
max_tokens=50,
temperature=0.7,
stop=["User:"]
)
return response['choices'][0]['text'].strip()
demo = gr.Interface(chat, "text", "text")
demo.launch()