Spaces:
Sleeping
Sleeping
| # | |
| import gradio as gr | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| # Download your model automatically | |
| model_path = hf_hub_download( | |
| repo_id="stevendhasoi/phi_2223", | |
| filename="model_q4_k_m.gguf" | |
| ) | |
| # Load GGUF model | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=2048, | |
| n_threads=4, | |
| ) | |
| def chat_fn(message, history): | |
| prompt = "" | |
| for user, bot in history: | |
| prompt += f"User: {user}\nAssistant: {bot}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| output = llm( | |
| prompt, | |
| max_tokens=256, | |
| stop=["User:"], | |
| echo=False | |
| ) | |
| reply = output["choices"][0]["text"].strip() | |
| return reply | |
| gr.ChatInterface(chat_fn).launch() |