Spaces:

chthees
/

plain

Sleeping

File size: 3,381 Bytes

b49ddd4
8c66df6
795fb06
b49ddd4
795fb06
 
 
 
 
006fe32
795fb06
 
 
 
 
8c66df6
795fb06
 
b1d598a
0414a9d
795fb06
8c66df6
 
795fb06
e5a7c21
 
 
 
795fb06
e5a7c21
 
795fb06
e5a7c21
 
 
795fb06
e5a7c21
 
795fb06
 
e5a7c21
 
 
0414a9d
795fb06
b49ddd4
 
e5a7c21
b1d598a
b49ddd4
 
 
0414a9d
b1d598a
b49ddd4
795fb06
 
 
e5a7c21
795fb06
 
 
 
 
 
b1d598a
795fb06
 
b49ddd4
795fb06
 
 
 
 
b1d598a
795fb06
8c66df6
795fb06
8c66df6
 
 
0414a9d
795fb06
8c66df6
 
b49ddd4
795fb06
b49ddd4
795fb06
b49ddd4
 
 
 
b1d598a
e5a7c21
 
 
 
b1d598a
1960bbc
b1d598a
0414a9d
b1d598a
b49ddd4
 
 
 
54fe7ee
b49ddd4
 
 
 
 
8c66df6

import gradio as gr
from llama_cpp import Llama
from transformers import AutoTokenizer


MODEL_REPO = "simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit"
MODEL_FILE = "Llama-3.2-1B.Q4_K_M.gguf"


TOKENIZER_ID = "chthees/lora_model_full_finetome-tokenizer" 

print("Loading Tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_ID)

print("Loading Model...")
llm = Llama.from_pretrained(
    repo_id=MODEL_REPO,
    filename=MODEL_FILE,
    n_ctx=2048,
    n_threads=2,
    verbose=False
)

# --- SYSTEM PROMPT LOGIC ---
def get_system_prompt(style_mode):
    base_instruction = "You are a helpful and intelligent AI assistant."
    
    prompts = {
        "Normal": f"{base_instruction} Answer clearly and concisely.",
        "Professional": (
            f"{base_instruction} You are a senior corporate executive. "
            "Your tone is strictly professional, polite, and business-oriented."
        ),
        "Shakespeare": (
            f"{base_instruction} You are William Shakespeare. "
            "Speak only in Early Modern English (thee, thou, hath). Be poetic and dramatic."
        ),
        "Funny/Ironic": (
            f"{base_instruction} You are a sarcastic comedian. "
            "Wrap your answers in dry humor, irony, and witty remarks."
        )
    }
    return prompts.get(style_mode, prompts["Normal"])

# --- CORE RESPONSE FUNCTION ---
def respond(
    message,
    history: list[dict],
    system_message_dummy,
    max_tokens,
    temperature,
    top_p,
    repetition_penalty,
    style_mode,
):
    messages = []
    
    # Add System Persona
    system_prompt = get_system_prompt(style_mode)
    messages.append({"role": "system", "content": system_prompt})

    # Add Conversation History
    # We slice to the last 10 turns to keep the context window manageable
    for turn in history[-10:]:
        messages.append({"role": turn['role'], "content": turn['content']})

    # Add Current User Message
    messages.append({"role": "user", "content": message})

    prompt_str = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
    )

    # 3. Generate Response
    output = llm(
        prompt_str,
        max_tokens=int(max_tokens),
        temperature=float(temperature),
        top_p=float(top_p),
        repeat_penalty=float(repetition_penalty),
        stop=[tokenizer.eos_token, "<|eot_id|>"],
        echo=False
    )

    return output["choices"][0]["text"].strip()

# --- GUI SETUP ---
chatbot = gr.ChatInterface(
    respond,
    type="messages",
    additional_inputs=[
        gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
        gr.Slider(minimum=1, maximum=1024, value=512, label="Max New Tokens"),
        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p"),
        gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.05, label="Repetition Penalty"),
        gr.Dropdown(
            choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
            value="Normal",
            label="Choose the Style / Tone"
        )
    ],
)

with gr.Blocks() as demo:
    gr.Markdown("# Styled Chat Bot")
    with gr.Sidebar():
        gr.LoginButton()
    chatbot.render()

if __name__ == "__main__":
    demo.launch()