iris / app.py
Datangtang's picture
继续修改bug,没有回复
d658b72 verified
raw
history blame
3.22 kB
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
# ----------------------------------------
# Global model cache
# ----------------------------------------
loaded_models = {}
current_model_name = None
MODEL_CONFIGS = {
"1B Model (Datangtang/GGUF1B)": {
"repo_id": "Datangtang/GGUF1B",
"filename": "llama-3.2-1b-instruct.Q4_K_M.gguf"
},
"3B Model (Datangtang/GGUF3B)": {
"repo_id": "Datangtang/GGUF3B",
"filename": "llama-3.2-3b-instruct.Q4_K_M.gguf"
}
}
# ----------------------------------------
# Load model function
# ----------------------------------------
def load_model(model_choice):
global loaded_models, current_model_name
if model_choice in loaded_models:
return loaded_models[model_choice]
cfg = MODEL_CONFIGS[model_choice]
model_path = hf_hub_download(
repo_id=cfg["repo_id"],
filename=cfg["filename"],
local_dir="./model",
token=os.environ["HF_TOKEN"]
)
llm = Llama(
model_path=model_path,
n_ctx=1024,
n_threads=6,
n_batch=512,
n_gpu_layers=0,
use_mmap=True,
use_mlock=True,
verbose=False,
)
loaded_models[model_choice] = llm
current_model_name = model_choice
return llm
# ----------------------------------------
# Chat function (Gradio 4.x message format)
# ----------------------------------------
def chat(messages, model_choice):
llm = load_model(model_choice)
# Construct conversation
conversation = "System: You are a helpful assistant.\n"
for msg in messages[-3:]:
role = msg["role"]
text = msg["content"]
if role == "user":
conversation += f"User: {text}\n"
elif role == "assistant":
conversation += f"Assistant: {text}\n"
conversation += "Assistant:"
# LLM output
response = llm(
conversation,
max_tokens=128,
temperature=0.7,
top_p=0.9,
top_k=40,
repeat_penalty=1.1,
stop=["User:", "Assistant:"]
)
return response["choices"][0]["text"].strip()
# ----------------------------------------
# Gradio UI (Gradio 4.x messages format)
# ----------------------------------------
with gr.Blocks() as demo:
gr.Markdown("# 🦙 Datangtang GGUF Model Demo (Gradio 4.x Compatible)")
model_choice = gr.Dropdown(
label="Select Model",
choices=list(MODEL_CONFIGS.keys()),
value="1B Model (Datangtang/GGUF1B)",
)
chatbot = gr.Chatbot(label="Chat", type="messages")
msg_box = gr.Textbox(label="Message")
# User sends message
def add_user_message(user_msg, messages):
messages = messages + [{"role": "user", "content": user_msg}]
return messages, ""
# Bot replies
def add_bot_reply(messages, model_choice):
reply = chat(messages, model_choice)
messages = messages + [{"role": "assistant", "content": reply}]
return messages
msg_box.submit(
add_user_message, [msg_box, chatbot], [chatbot, msg_box]
).then(
add_bot_reply, [chatbot, model_choice], chatbot
)
demo.launch()