import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel import torch # 1. Setup Model IDs base_model_id = "unsloth/Qwen2.5-3B-Instruct" lora_model_id = "10Aizen01/qwen-2.5-3b-engine-simulator-beta" # 2. Load Tokenizer and Base Model tokenizer = AutoTokenizer.from_pretrained(base_model_id) # We use float32 and force CPU for the free Hugging Face tier base_model = AutoModelForCausalLM.from_pretrained( base_model_id, torch_dtype=torch.float32, device_map={"": "cpu"}, low_cpu_mem_usage=True ) # 3. Load your LoRA adapters model = PeftModel.from_pretrained(base_model, lora_model_id) def generate_engine_code(prompt): # Removed .to("cuda") here inputs = tokenizer(f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", return_tensors="pt") # Generate on CPU outputs = model.generate( **inputs, max_new_tokens=512, do_sample=True, temperature=0.7 ) return tokenizer.decode(outputs[0], skip_special_tokens=True) # 4. Create the Web UI demo = gr.Interface( fn=generate_engine_code, inputs=gr.Textbox(label="Describe your engine (e.g., V8, 4.0L, 9000 RPM)"), outputs=gr.Code(label="Generated .mr Script", language="cpp"), title="Engine Simulator AI Assistant" ) demo.launch()