File size: 2,708 Bytes
9646204 503ff85 9646204 3603079 9646204 3603079 503ff85 27a8bb1 503ff85 5558821 e41281f 27a8bb1 3603079 27a8bb1 246f6b2 9646204 3603079 503ff85 3cbcab1 503ff85 3cbcab1 503ff85 27a8bb1 3cbcab1 503ff85 a8e01ad 503ff85 27a8bb1 503ff85 3cbcab1 503ff85 9646204 3cbcab1 3603079 3cbcab1 3603079 3cbcab1 c5ec3aa 9646204 c5ec3aa 3603079 9853f62 27a8bb1 9646204 1205c6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
print("Downloading the model ...")
model_name = "samzito12/lora_model3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="cpu",
torch_dtype=torch.float32,
low_cpu_mem_usage=True
)
print("✅ Downloaded model with CPU optimisations")
model.eval()
SYSTEM_PROMPT = """You are a helpful AI coding assistant based on Meta's Llama-3.2-3B model.
Your task is to assist users with programming-related questions: write code snippets, debug code, explain concepts clearly, and provide best practices.
Always respond in a concise, clear, and friendly manner, and adapt your explanations to the user's level."""
def chat(message, history, temperature=1.5, max_tokens=128):
# Build conversation
conversation = f"System: {SYSTEM_PROMPT}\n\n"
for user_msg, assistant_msg in history:
conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
conversation += f"User: {message}\nAssistant:"
# Tokenize
inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024, padding=True)
# Generate
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
do_sample=True,
use_cache=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
# Decode
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract assistant's response
if "Assistant:" in full_response:
response = full_response.split("Assistant:")[-1].strip()
else:
response = full_response[len(conversation):].strip()
return response
demo = gr.ChatInterface(
chat,
title="Your Coding Assistant",
description="""
**Model:** This chatbot was fine-tuned to provide a free coding service, designed to assist users in writing, debugging, and optimizing code across various programming languages.
""",
examples=[
["What model are you?", 0.7, 128],
["Explain machine learning in simple terms", 0.7, 128],
["Write a Python function to reverse a string", 0.7, 128]
],
additional_inputs=[
gr.Slider(minimum=0, maximum=2, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=32, maximum=512, value=128, step=16, label="Max Tokens")
],
theme="soft",
)
if __name__ == "__main__":
demo.launch() |