File size: 1,115 Bytes
34b15a6 5c4dbe9 34b15a6 5c4dbe9 34b15a6 5c4dbe9 8e9202e 5c4dbe9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import gradio as gr
from llama_cpp import Llama
# 1. Load the Model
# This automatically downloads the "DeepSeek-R1-Distill-Llama-8B" (GGUF version)
# We use the Q4_K_M version because it fits in the FREE 16GB RAM tier.
print("⏳ Downloading & Loading Model... (This takes 1-2 mins on first run)")
llm = Llama.from_pretrained(
repo_id="bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF",
filename="DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf",
verbose=True,
n_ctx=4096 # Context window (memory of the conversation)
)
# 2. Define the Chat Function
def chat_with_deepseek(message, history):
# Format the prompt for DeepSeek
# It expects: User: <msg> \n Assistant:
prompt = f"User: {message}\nAssistant:"
# Generate response
output = llm(
prompt,
max_tokens=512, # How long the answer can be
stop=["User:", "\n\n"], # Stop it from talking to itself
echo=False
)
return output['choices'][0]['text']
# 3. Launch the Chat Interface
# We use ChatInterface because it handles the UI automatically
gr.ChatInterface(chat_with_deepseek).launch()
|