|
|
import gradio as gr |
|
|
import spaces |
|
|
import torch |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
from peft import PeftModel |
|
|
|
|
|
MODEL_ID = "GhostScientist/qwen25-coder-1.5b-codealpaca-sft" |
|
|
BASE_MODEL_ID = "Qwen/Qwen2.5-Coder-1.5B-Instruct" |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID) |
|
|
|
|
|
|
|
|
model = None |
|
|
|
|
|
def load_model(): |
|
|
"""Load and merge the model with adapter.""" |
|
|
global model |
|
|
if model is None: |
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
|
BASE_MODEL_ID, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto", |
|
|
) |
|
|
model = PeftModel.from_pretrained(base_model, MODEL_ID) |
|
|
model = model.merge_and_unload() |
|
|
return model |
|
|
|
|
|
@spaces.GPU(duration=120) |
|
|
def generate_response(message, history, system_message, max_tokens, temperature, top_p): |
|
|
"""Generate response using the fine-tuned Qwen coder model.""" |
|
|
|
|
|
model = load_model() |
|
|
|
|
|
messages = [{"role": "system", "content": system_message}] |
|
|
|
|
|
for item in history: |
|
|
if isinstance(item, (list, tuple)) and len(item) == 2: |
|
|
user_msg, assistant_msg = item |
|
|
if user_msg: |
|
|
messages.append({"role": "user", "content": user_msg}) |
|
|
if assistant_msg: |
|
|
messages.append({"role": "assistant", "content": assistant_msg}) |
|
|
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
|
|
|
|
|
text = tokenizer.apply_chat_template( |
|
|
messages, |
|
|
tokenize=False, |
|
|
add_generation_prompt=True |
|
|
) |
|
|
inputs = tokenizer([text], return_tensors="pt").to(model.device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=int(max_tokens), |
|
|
temperature=float(temperature), |
|
|
top_p=float(top_p), |
|
|
do_sample=True, |
|
|
pad_token_id=tokenizer.eos_token_id, |
|
|
) |
|
|
|
|
|
|
|
|
response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) |
|
|
return response |
|
|
|
|
|
|
|
|
SYSTEM_PROMPT = """You are an expert coding assistant. You help users write, debug, explain, and improve code. |
|
|
You provide clear, concise, and accurate responses with well-formatted code examples when appropriate. |
|
|
Always explain your reasoning and suggest best practices.""" |
|
|
|
|
|
EXAMPLES = [ |
|
|
["Write a Python function to check if a number is prime"], |
|
|
["Explain the difference between a list and a tuple in Python"], |
|
|
["How do I reverse a string in JavaScript?"], |
|
|
["Write a SQL query to find duplicate records in a table"], |
|
|
["Debug this code: def add(a, b): return a - b"], |
|
|
] |
|
|
|
|
|
demo = gr.ChatInterface( |
|
|
fn=generate_response, |
|
|
title="Qwen 2.5 Coder Assistant", |
|
|
description="""A fine-tuned Qwen 2.5 Coder 1.5B model for code assistance. |
|
|
Ask me to write code, explain concepts, debug issues, or help with any programming task! |
|
|
|
|
|
**Model:** [GhostScientist/qwen25-coder-1.5b-codealpaca-sft](https://huggingface.co/GhostScientist/qwen25-coder-1.5b-codealpaca-sft) |
|
|
""", |
|
|
additional_inputs=[ |
|
|
gr.Textbox( |
|
|
value=SYSTEM_PROMPT, |
|
|
label="System Prompt", |
|
|
lines=3 |
|
|
), |
|
|
gr.Slider( |
|
|
minimum=64, |
|
|
maximum=2048, |
|
|
value=512, |
|
|
step=64, |
|
|
label="Max Tokens" |
|
|
), |
|
|
gr.Slider( |
|
|
minimum=0.1, |
|
|
maximum=1.5, |
|
|
value=0.7, |
|
|
step=0.1, |
|
|
label="Temperature" |
|
|
), |
|
|
gr.Slider( |
|
|
minimum=0.1, |
|
|
maximum=1.0, |
|
|
value=0.95, |
|
|
step=0.05, |
|
|
label="Top-p" |
|
|
), |
|
|
], |
|
|
examples=EXAMPLES, |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|