Spaces:
Sleeping
Sleeping
File size: 3,369 Bytes
b49ddd4 83cbea2 e5a7c21 83cbea2 e5a7c21 83cbea2 0414a9d 83cbea2 b49ddd4 e5a7c21 83cbea2 b49ddd4 0414a9d b1d598a b49ddd4 83cbea2 14d11ec 83cbea2 14d11ec 83cbea2 8c66df6 83cbea2 8c66df6 b49ddd4 83cbea2 b49ddd4 83cbea2 b49ddd4 b1d598a e5a7c21 83cbea2 b49ddd4 83cbea2 b49ddd4 8c66df6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import gradio as gr
from llama_cpp import Llama
from transformers import AutoTokenizer
MODEL_REPO = "simonper/Llama-3.2-1B-bnb-4bit_untrained_gguf_4bit"
MODEL_FILE = "Llama-3.2-1B.Q4_K_M.gguf"
TOKENIZER_ID = "chthees/lora_model_full_finetome-tokenizer"
print("Loading Tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_ID)
print("Loading Model...")
llm = Llama.from_pretrained(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
n_ctx=2048,
n_threads=2,
verbose=False
)
# --- SYSTEM PROMPT LOGIC ---
def get_system_prompt(style_mode):
base_instruction = "You are a helpful and intelligent AI assistant."
prompts = {
"Normal": f"{base_instruction} Answer clearly and concisely.",
"Professional": (
f"{base_instruction} You are a senior corporate executive. "
"Your tone is strictly professional, polite, and business-oriented."
),
"Shakespeare": (
f"{base_instruction} You are William Shakespeare. "
"Speak only in Early Modern English (thee, thou, hath). Be poetic and dramatic."
),
"Funny/Ironic": (
f"{base_instruction} You are a sarcastic comedian. "
"Wrap your answers in dry humor, irony, and witty remarks."
)
}
return prompts.get(style_mode, prompts["Normal"])
# --- CORE RESPONSE FUNCTION ---
def respond(
message,
history: list[dict],
system_message_dummy,
max_tokens,
temperature,
top_p,
repetition_penalty,
style_mode,
):
messages = []
# Add System Persona
system_prompt = get_system_prompt(style_mode)
messages.append({"role": "system", "content": system_prompt})
# Add Conversation History
# We slice to the last 10 turns to keep the context window manageable
for turn in history[-10:]:
messages.append({"role": turn['role'], "content": turn['content']})
# Add Current User Message
messages.append({"role": "user", "content": message})
prompt_str = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# 3. Generate Response
output = llm(
prompt_str,
max_tokens=int(max_tokens),
temperature=float(temperature),
top_p=float(top_p),
repeat_penalty=float(repetition_penalty),
stop=[tokenizer.eos_token, "<|eot_id|>"],
echo=False
)
return output["choices"][0]["text"].strip()
# --- GUI SETUP ---
chatbot = gr.ChatInterface(
respond,
type="messages",
additional_inputs=[
gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
gr.Slider(minimum=1, maximum=1024, value=512, label="Max New Tokens"),
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p"),
gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.05, label="Repetition Penalty"),
gr.Dropdown(
choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
value="Normal",
label="Choose the Style / Tone"
)
],
)
with gr.Blocks() as demo:
gr.Markdown("# Styled Chat Bot")
with gr.Sidebar():
gr.LoginButton()
chatbot.render()
if __name__ == "__main__":
demo.launch() |