import gradio as gr import re import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from peft import PeftModel BASE_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct" ADAPTER_REPO = "haifasyn/output_dpo" try: tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, trust_remote_code=True) base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_ID, device_map="cpu", torch_dtype=torch.float32, trust_remote_code=True, attn_implementation="eager" ) model = PeftModel.from_pretrained(base_model, ADAPTER_REPO) model.eval() except Exception as e: print(f"Error load model: {e}") raise e def predict(message, history): system_prompt = """ Kamu adalah asisten AI BRKS. Instruction: Jawablah pertanyaan user menggunakan informasi yang telah kamu pelajari sebelumnya dengan singkat dan jelas. ATURAN: 1. Hanya gunakan informasi yang telah kamu pelajari sebelumnya. 2. Jangan menggunakan pengetahuan dari luar. 3. Jika informasi tidak ditemukan, katakan yang sebenarnya bahwa informasi tidak tersedia. """ messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content":message} ] text_prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer(text_prompt, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=512, temperature=0.5, top_p=0.95, repetition_penalty=1.15, do_sample=True, eos_token_id=tokenizer.eos_token_id, use_cache=True ) full_output = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=False) think_content = "" final_response = full_output match = re.search(r'(.*?)', full_output, re.DOTALL) if match: think_content = match.group(1).strip() final_response = full_output.split('')[-1].strip() final_response = re.sub(r'<\|.*?\|>', '', final_response).strip() # response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True) return final_response demo = gr.ChatInterface( fn=predict, title="Chatbot BRKS", description="MOdel ini dari hasil fine tuning (Qwen)", examples=["Dimana alamat cabang brks?"] ) if __name__ == "__main__": demo.launch()