| import gradio as gr |
| from unsloth import FastLanguageModel |
| import torch |
|
|
| |
| max_seq_length = 2048 |
| dtype = None |
| load_in_4bit = True |
|
|
| print("Loading model...") |
| model, tokenizer = FastLanguageModel.from_pretrained( |
| model_name="FlameF0X/anwgpt4-1.2b", |
| max_seq_length=max_seq_length, |
| dtype=dtype, |
| load_in_4bit=load_in_4bit, |
| ) |
|
|
| |
| FastLanguageModel.for_inference(model) |
| print("Model loaded successfully!") |
|
|
| def generate_response(message, history, max_tokens=256, temperature=0.7, top_p=0.9): |
| """ |
| Generate a response using the fine-tuned model. |
| |
| Args: |
| message: Current user message |
| history: Chat history as list of [user_msg, assistant_msg] pairs |
| max_tokens: Maximum number of tokens to generate |
| temperature: Sampling temperature |
| top_p: Nucleus sampling parameter |
| """ |
| |
| conversation = [] |
| for user_msg, assistant_msg in history: |
| conversation.append({"role": "user", "content": user_msg}) |
| conversation.append({"role": "assistant", "content": assistant_msg}) |
| |
| |
| conversation.append({"role": "user", "content": message}) |
| |
| |
| formatted_input = tokenizer.apply_chat_template( |
| conversation, |
| tokenize=False, |
| add_generation_prompt=True |
| ) |
| |
| |
| inputs = tokenizer([formatted_input], return_tensors="pt").to(model.device) |
| |
| |
| with torch.no_grad(): |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=max_tokens, |
| temperature=temperature, |
| top_p=top_p, |
| do_sample=temperature > 0, |
| use_cache=True, |
| pad_token_id=tokenizer.pad_token_id, |
| eos_token_id=tokenizer.eos_token_id, |
| ) |
| |
| |
| full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| |
| |
| |
| response = full_response.split(message)[-1].strip() |
| |
| return response |
|
|
| |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: |
| gr.Markdown( |
| """ |
| # 🤖 AnwGPT 4-1.2B Chat |
| |
| Fine-tuned LFM2.5-1.2B model on the Databricks Dolly-15k dataset. |
| Ask questions, request information, or have a conversation! |
| |
| **Model:** FlameF0X/anwgpt4-1.2b |
| """ |
| ) |
| |
| chatbot = gr.Chatbot( |
| label="Chat", |
| height=500, |
| show_copy_button=True, |
| ) |
| |
| with gr.Row(): |
| msg = gr.Textbox( |
| label="Your message", |
| placeholder="Type your message here...", |
| scale=4, |
| ) |
| submit = gr.Button("Send", variant="primary", scale=1) |
| |
| with gr.Accordion("⚙️ Generation Settings", open=False): |
| max_tokens = gr.Slider( |
| minimum=32, |
| maximum=512, |
| value=256, |
| step=32, |
| label="Max Tokens", |
| info="Maximum number of tokens to generate" |
| ) |
| temperature = gr.Slider( |
| minimum=0.0, |
| maximum=1.5, |
| value=0.7, |
| step=0.1, |
| label="Temperature", |
| info="Higher = more creative, Lower = more focused" |
| ) |
| top_p = gr.Slider( |
| minimum=0.0, |
| maximum=1.0, |
| value=0.9, |
| step=0.05, |
| label="Top P", |
| info="Nucleus sampling threshold" |
| ) |
| |
| with gr.Row(): |
| clear = gr.Button("🗑️ Clear Chat") |
| |
| gr.Examples( |
| examples=[ |
| "What is the capital of France?", |
| "Explain quantum computing in simple terms.", |
| "Write a short poem about technology.", |
| "What are the benefits of exercise?", |
| "How does photosynthesis work?", |
| ], |
| inputs=msg, |
| label="Example Questions" |
| ) |
| |
| |
| def respond(message, chat_history, max_tok, temp, top_p_val): |
| if not message.strip(): |
| return "", chat_history |
| |
| bot_message = generate_response(message, chat_history, max_tok, temp, top_p_val) |
| chat_history.append((message, bot_message)) |
| return "", chat_history |
| |
| msg.submit(respond, [msg, chatbot, max_tokens, temperature, top_p], [msg, chatbot]) |
| submit.click(respond, [msg, chatbot, max_tokens, temperature, top_p], [msg, chatbot]) |
| clear.click(lambda: None, None, chatbot, queue=False) |
| |
| gr.Markdown( |
| """ |
| --- |
| ### About |
| This model was fine-tuned using LoRA on the Databricks Dolly-15k instruction dataset. |
| Base model: LiquidAI/LFM2.5-1.2B-Base |
| """ |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |