chatbot-llm / app.py
haifasyn's picture
Update app.py
569f565 verified
import gradio as gr
import re
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
BASE_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
# Ganti ke path adapter DPO kamu jika berbeda
ADAPTER_REPO = "haifasyn/output_dpo"
try:
# Gunakan tokenizer dari base model agar lebih aman
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_ID,
device_map={"": "cpu"}, # Paksa ke CPU
torch_dtype=torch.float32,
trust_remote_code=True,
low_cpu_mem_usage=True, # Penting untuk Space gratis
attn_implementation="eager"
)
model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
model.eval()
except Exception as e:
print(f"Error load model: {e}")
raise e
def predict(message, history):
try:
system_prompt = "Kamu adalah asisten AI BRKS. Jawablah dengan singkat dan jelas berdasarkan informasi yang dipelajari."
# Memasukkan history agar chatbot punya ingatan
messages = [{"role": "system", "content": system_prompt}]
if history:
for interaction in history:
if isinstance(interaction, dict):
# Format Gradio 4+ (Dictionary)
role = interaction.get("role", "user")
content = interaction.get("content", "")
# Jika content adalah list (multimodal), ambil elemen pertama
if isinstance(content, list): content = str(content[0])
messages.append({"role": role, "content": str(content)})
elif isinstance(interaction, (list, tuple)):
# Format Gradio Legacy [user, assistant]
u_msg = interaction[0] if len(interaction) > 0 else ""
a_msg = interaction[1] if len(interaction) > 1 else ""
# Paksa jadi string
if isinstance(u_msg, list): u_msg = str(u_msg[0])
if isinstance(a_msg, list): a_msg = str(a_msg[0])
messages.append({"role": "user", "content": str(u_msg)})
messages.append({"role": "assistant", "content": str(a_msg)})
user_input = message.get("text", "") if isinstance(message, dict) else str(message)
messages.append({"role": "user", "content": user_input})
# 4. Terapkan Template & Tokenisasi
text_prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(text_prompt, return_tensors="pt").to("cpu")
with torch.no_grad():
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_new_tokens=512,
temperature=0.5,
top_p=0.95,
repetition_penalty=1.15,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
input_length = inputs["input_ids"].shape[1]
full_output = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
# Logika pembersihan tag <think> untuk model RLHF/Reasoning
final_response = re.sub(r'<think>.*?</think>', '', full_output, flags=re.DOTALL).strip()
return final_response
except Exception as e:
return f"Terjadi kesalahan teknis: {str(e)}"
demo = gr.ChatInterface(
fn=predict,
title="Chatbot BRKS - DPO Edition",
description="Model Qwen2.5 yang telah di-fine-tune dengan metode RLHF/DPO",
)
if __name__ == "__main__":
demo.launch()