| import gradio as gr |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
| import torch |
|
|
| print("Loading the model...") |
|
|
| model_name = "samzito12/lora_model2" |
|
|
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| tokenizer.pad_token = tokenizer.eos_token |
| tokenizer.padding_side = "left" |
|
|
| model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| device_map="cpu", |
| torch_dtype=torch.float32, |
| low_cpu_mem_usage=True |
| ) |
|
|
| print("✅ Modèle chargé avec optimisations CPU") |
| model.eval() |
|
|
| SYSTEM_PROMPT = "You are a helpful AI assistant based on Meta's Llama-3.2-3B model, fine-tuned on a code dataset." |
|
|
| def chat(message, history, temperature=0.7, max_tokens=128): |
| |
| conversation = f"System: {SYSTEM_PROMPT}\n\n" |
| |
| for user_msg, assistant_msg in history: |
| conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n" |
| |
| conversation += f"User: {message}\nAssistant:" |
| |
| |
| inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024, padding=True) |
| |
| |
| with torch.no_grad(): |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=256, |
| temperature=0.7, |
| do_sample=True, |
| use_cache=True, |
| pad_token_id=tokenizer.eos_token_id, |
| eos_token_id=tokenizer.eos_token_id |
| ) |
| |
| |
| full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| |
| |
| if "Assistant:" in full_response: |
| response = full_response.split("Assistant:")[-1].strip() |
| else: |
| response = full_response[len(conversation):].strip() |
| |
| return response |
|
|
| demo = gr.ChatInterface( |
| chat, |
| title="🦙 My Fine-Tuned Llama-3.2-3B Chatbot", |
| description=""" |
| **Model:** Llama-3.2-3B fine-tuned on a code dataset |
| |
| it's a custom fine-tuned model for ID2223 Lab 2. |
| """, |
| examples=[ |
| ["What model are you?"], |
| ["Explain machine learning in simple terms"], |
| ["Write a Python function to reverse a string"] |
| ], |
| additional_inputs=[ |
| gr.Slider(minimum=0, maximum=2, value=0.7, step=0.1, label="Temperature"), |
| gr.Slider(minimum=32, maximum=256, value=128, step=16, label="Max Tokens") |
| ], |
| |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |