iris / app.py
samzito12's picture
Update app.py
246f6b2 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
print("Downloading the model ...")
model_name = "samzito12/lora_model3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="cpu",
torch_dtype=torch.float32,
low_cpu_mem_usage=True
)
print("✅ Downloaded model with CPU optimisations")
model.eval()
SYSTEM_PROMPT = """You are a helpful AI coding assistant based on Meta's Llama-3.2-3B model.
Your task is to assist users with programming-related questions: write code snippets, debug code, explain concepts clearly, and provide best practices.
Always respond in a concise, clear, and friendly manner, and adapt your explanations to the user's level."""
def chat(message, history, temperature=1.5, max_tokens=128):
# Build conversation
conversation = f"System: {SYSTEM_PROMPT}\n\n"
for user_msg, assistant_msg in history:
conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
conversation += f"User: {message}\nAssistant:"
# Tokenize
inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024, padding=True)
# Generate
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
do_sample=True,
use_cache=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
# Decode
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract assistant's response
if "Assistant:" in full_response:
response = full_response.split("Assistant:")[-1].strip()
else:
response = full_response[len(conversation):].strip()
return response
demo = gr.ChatInterface(
chat,
title="Your Coding Assistant",
description="""
**Model:** This chatbot was fine-tuned to provide a free coding service, designed to assist users in writing, debugging, and optimizing code across various programming languages.
""",
examples=[
["What model are you?", 0.7, 128],
["Explain machine learning in simple terms", 0.7, 128],
["Write a Python function to reverse a string", 0.7, 128]
],
additional_inputs=[
gr.Slider(minimum=0, maximum=2, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=32, maximum=512, value=128, step=16, label="Max Tokens")
],
theme="soft",
)
if __name__ == "__main__":
demo.launch()