|
|
import gradio as gr |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import torch |
|
|
|
|
|
print("Downloading the model ...") |
|
|
|
|
|
model_name = "samzito12/lora_model3" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
tokenizer.padding_side = "left" |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
device_map="cpu", |
|
|
torch_dtype=torch.float32, |
|
|
low_cpu_mem_usage=True |
|
|
) |
|
|
|
|
|
print("✅ Downloaded model with CPU optimisations") |
|
|
model.eval() |
|
|
|
|
|
SYSTEM_PROMPT = """You are a helpful AI coding assistant based on Meta's Llama-3.2-3B model. |
|
|
Your task is to assist users with programming-related questions: write code snippets, debug code, explain concepts clearly, and provide best practices. |
|
|
Always respond in a concise, clear, and friendly manner, and adapt your explanations to the user's level.""" |
|
|
|
|
|
|
|
|
def chat(message, history, temperature=1.5, max_tokens=128): |
|
|
|
|
|
conversation = f"System: {SYSTEM_PROMPT}\n\n" |
|
|
|
|
|
for user_msg, assistant_msg in history: |
|
|
conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n" |
|
|
|
|
|
conversation += f"User: {message}\nAssistant:" |
|
|
|
|
|
|
|
|
inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024, padding=True) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=256, |
|
|
temperature=0.7, |
|
|
do_sample=True, |
|
|
use_cache=True, |
|
|
pad_token_id=tokenizer.eos_token_id, |
|
|
eos_token_id=tokenizer.eos_token_id |
|
|
) |
|
|
|
|
|
|
|
|
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
if "Assistant:" in full_response: |
|
|
response = full_response.split("Assistant:")[-1].strip() |
|
|
else: |
|
|
response = full_response[len(conversation):].strip() |
|
|
|
|
|
return response |
|
|
|
|
|
demo = gr.ChatInterface( |
|
|
chat, |
|
|
title="Your Coding Assistant", |
|
|
description=""" |
|
|
**Model:** This chatbot was fine-tuned to provide a free coding service, designed to assist users in writing, debugging, and optimizing code across various programming languages. |
|
|
""", |
|
|
examples=[ |
|
|
["What model are you?", 0.7, 128], |
|
|
["Explain machine learning in simple terms", 0.7, 128], |
|
|
["Write a Python function to reverse a string", 0.7, 128] |
|
|
], |
|
|
additional_inputs=[ |
|
|
gr.Slider(minimum=0, maximum=2, value=0.7, step=0.1, label="Temperature"), |
|
|
gr.Slider(minimum=32, maximum=512, value=128, step=16, label="Max Tokens") |
|
|
], |
|
|
theme="soft", |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |