Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from llama_cpp import Llama | |
| from transformers import AutoTokenizer | |
| MODEL_REPO = "simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit" | |
| MODEL_FILE = "Llama-3.2-1B.Q4_K_M.gguf" | |
| TOKENIZER_ID = "chthees/lora_model_full_finetome-tokenizer" | |
| print("Loading Tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_ID) | |
| print("Loading Model...") | |
| llm = Llama.from_pretrained( | |
| repo_id=MODEL_REPO, | |
| filename=MODEL_FILE, | |
| n_ctx=2048, | |
| n_threads=2, | |
| verbose=False | |
| ) | |
| # --- SYSTEM PROMPT LOGIC --- | |
| def get_system_prompt(style_mode): | |
| base_instruction = "You are a helpful and intelligent AI assistant." | |
| prompts = { | |
| "Normal": f"{base_instruction} Answer clearly and concisely.", | |
| "Professional": ( | |
| f"{base_instruction} You are a senior corporate executive. " | |
| "Your tone is strictly professional, polite, and business-oriented." | |
| ), | |
| "Shakespeare": ( | |
| f"{base_instruction} You are William Shakespeare. " | |
| "Speak only in Early Modern English (thee, thou, hath). Be poetic and dramatic." | |
| ), | |
| "Funny/Ironic": ( | |
| f"{base_instruction} You are a sarcastic comedian. " | |
| "Wrap your answers in dry humor, irony, and witty remarks." | |
| ) | |
| } | |
| return prompts.get(style_mode, prompts["Normal"]) | |
| # --- CORE RESPONSE FUNCTION --- | |
| def respond( | |
| message, | |
| history: list[dict], | |
| system_message_dummy, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| repetition_penalty, | |
| style_mode, | |
| ): | |
| messages = [] | |
| # Add System Persona | |
| system_prompt = get_system_prompt(style_mode) | |
| messages.append({"role": "system", "content": system_prompt}) | |
| # Add Conversation History | |
| # We slice to the last 10 turns to keep the context window manageable | |
| for turn in history[-10:]: | |
| messages.append({"role": turn['role'], "content": turn['content']}) | |
| # Add Current User Message | |
| messages.append({"role": "user", "content": message}) | |
| prompt_str = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| # 3. Generate Response | |
| output = llm( | |
| prompt_str, | |
| max_tokens=int(max_tokens), | |
| temperature=float(temperature), | |
| top_p=float(top_p), | |
| repeat_penalty=float(repetition_penalty), | |
| stop=[tokenizer.eos_token, "<|eot_id|>"], | |
| echo=False | |
| ) | |
| return output["choices"][0]["text"].strip() | |
| # --- GUI SETUP --- | |
| chatbot = gr.ChatInterface( | |
| respond, | |
| type="messages", | |
| additional_inputs=[ | |
| gr.Textbox(value="", label="System Prompt (Hidden)", visible=False), | |
| gr.Slider(minimum=1, maximum=1024, value=512, label="Max New Tokens"), | |
| gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature"), | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p"), | |
| gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.05, label="Repetition Penalty"), | |
| gr.Dropdown( | |
| choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"], | |
| value="Normal", | |
| label="Choose the Style / Tone" | |
| ) | |
| ], | |
| ) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Styled Chat Bot") | |
| with gr.Sidebar(): | |
| gr.LoginButton() | |
| chatbot.render() | |
| if __name__ == "__main__": | |
| demo.launch() |