Iris / app.py
Amal200's picture
Update app.py
65c0526 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
print("Loading the model...")
model_name = "samzito12/lora_model2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="cpu",
torch_dtype=torch.float32,
low_cpu_mem_usage=True
)
print("✅ Modèle chargé avec optimisations CPU")
model.eval()
SYSTEM_PROMPT = "You are a helpful AI assistant based on Meta's Llama-3.2-3B model, fine-tuned on a code dataset."
def chat(message, history, temperature=0.7, max_tokens=128):
# Build conversation
conversation = f"System: {SYSTEM_PROMPT}\n\n"
for user_msg, assistant_msg in history:
conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
conversation += f"User: {message}\nAssistant:"
# Tokenize
inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024, padding=True)
# Generate
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
do_sample=True,
use_cache=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
# Decode
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract assistant's response
if "Assistant:" in full_response:
response = full_response.split("Assistant:")[-1].strip()
else:
response = full_response[len(conversation):].strip()
return response
demo = gr.ChatInterface(
chat,
title="🦙 My Fine-Tuned Llama-3.2-3B Chatbot",
description="""
**Model:** Llama-3.2-3B fine-tuned on a code dataset
it's a custom fine-tuned model for ID2223 Lab 2.
""",
examples=[
["What model are you?"],
["Explain machine learning in simple terms"],
["Write a Python function to reverse a string"]
],
additional_inputs=[
gr.Slider(minimum=0, maximum=2, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=32, maximum=256, value=128, step=16, label="Max Tokens")
],
)
if __name__ == "__main__":
demo.launch()