Spaces:
Sleeping
Sleeping
File size: 3,381 Bytes
b49ddd4 8c66df6 795fb06 b49ddd4 795fb06 006fe32 795fb06 8c66df6 795fb06 b1d598a 0414a9d 795fb06 8c66df6 795fb06 e5a7c21 795fb06 e5a7c21 795fb06 e5a7c21 795fb06 e5a7c21 795fb06 e5a7c21 0414a9d 795fb06 b49ddd4 e5a7c21 b1d598a b49ddd4 0414a9d b1d598a b49ddd4 795fb06 e5a7c21 795fb06 b1d598a 795fb06 b49ddd4 795fb06 b1d598a 795fb06 8c66df6 795fb06 8c66df6 0414a9d 795fb06 8c66df6 b49ddd4 795fb06 b49ddd4 795fb06 b49ddd4 b1d598a e5a7c21 b1d598a 1960bbc b1d598a 0414a9d b1d598a b49ddd4 54fe7ee b49ddd4 8c66df6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import gradio as gr
from llama_cpp import Llama
from transformers import AutoTokenizer
MODEL_REPO = "simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit"
MODEL_FILE = "Llama-3.2-1B.Q4_K_M.gguf"
TOKENIZER_ID = "chthees/lora_model_full_finetome-tokenizer"
print("Loading Tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_ID)
print("Loading Model...")
llm = Llama.from_pretrained(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
n_ctx=2048,
n_threads=2,
verbose=False
)
# --- SYSTEM PROMPT LOGIC ---
def get_system_prompt(style_mode):
base_instruction = "You are a helpful and intelligent AI assistant."
prompts = {
"Normal": f"{base_instruction} Answer clearly and concisely.",
"Professional": (
f"{base_instruction} You are a senior corporate executive. "
"Your tone is strictly professional, polite, and business-oriented."
),
"Shakespeare": (
f"{base_instruction} You are William Shakespeare. "
"Speak only in Early Modern English (thee, thou, hath). Be poetic and dramatic."
),
"Funny/Ironic": (
f"{base_instruction} You are a sarcastic comedian. "
"Wrap your answers in dry humor, irony, and witty remarks."
)
}
return prompts.get(style_mode, prompts["Normal"])
# --- CORE RESPONSE FUNCTION ---
def respond(
message,
history: list[dict],
system_message_dummy,
max_tokens,
temperature,
top_p,
repetition_penalty,
style_mode,
):
messages = []
# Add System Persona
system_prompt = get_system_prompt(style_mode)
messages.append({"role": "system", "content": system_prompt})
# Add Conversation History
# We slice to the last 10 turns to keep the context window manageable
for turn in history[-10:]:
messages.append({"role": turn['role'], "content": turn['content']})
# Add Current User Message
messages.append({"role": "user", "content": message})
prompt_str = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# 3. Generate Response
output = llm(
prompt_str,
max_tokens=int(max_tokens),
temperature=float(temperature),
top_p=float(top_p),
repeat_penalty=float(repetition_penalty),
stop=[tokenizer.eos_token, "<|eot_id|>"],
echo=False
)
return output["choices"][0]["text"].strip()
# --- GUI SETUP ---
chatbot = gr.ChatInterface(
respond,
type="messages",
additional_inputs=[
gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
gr.Slider(minimum=1, maximum=1024, value=512, label="Max New Tokens"),
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p"),
gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.05, label="Repetition Penalty"),
gr.Dropdown(
choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
value="Normal",
label="Choose the Style / Tone"
)
],
)
with gr.Blocks() as demo:
gr.Markdown("# Styled Chat Bot")
with gr.Sidebar():
gr.LoginButton()
chatbot.render()
if __name__ == "__main__":
demo.launch() |