import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch print("Downloading the model ...") model_name = "samzito12/lora_model3" tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "left" model = AutoModelForCausalLM.from_pretrained( model_name, device_map="cpu", torch_dtype=torch.float32, low_cpu_mem_usage=True ) print("✅ Downloaded model with CPU optimisations") model.eval() SYSTEM_PROMPT = """You are a helpful AI coding assistant based on Meta's Llama-3.2-3B model. Your task is to assist users with programming-related questions: write code snippets, debug code, explain concepts clearly, and provide best practices. Always respond in a concise, clear, and friendly manner, and adapt your explanations to the user's level.""" def chat(message, history, temperature=1.5, max_tokens=128): # Build conversation conversation = f"System: {SYSTEM_PROMPT}\n\n" for user_msg, assistant_msg in history: conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n" conversation += f"User: {message}\nAssistant:" # Tokenize inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024, padding=True) # Generate with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=256, temperature=0.7, do_sample=True, use_cache=True, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id ) # Decode full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract assistant's response if "Assistant:" in full_response: response = full_response.split("Assistant:")[-1].strip() else: response = full_response[len(conversation):].strip() return response demo = gr.ChatInterface( chat, title="Your Coding Assistant", description=""" **Model:** This chatbot was fine-tuned to provide a free coding service, designed to assist users in writing, debugging, and optimizing code across various programming languages. """, examples=[ ["What model are you?", 0.7, 128], ["Explain machine learning in simple terms", 0.7, 128], ["Write a Python function to reverse a string", 0.7, 128] ], additional_inputs=[ gr.Slider(minimum=0, maximum=2, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=32, maximum=512, value=128, step=16, label="Max Tokens") ], theme="soft", ) if __name__ == "__main__": demo.launch()