Spaces:

haifasyn
/

chatbot-llm

Sleeping

App Files Files Community

chatbot-llm / app.py

haifasyn

Update app.py

569f565 verified 3 months ago

raw

history blame contribute delete

3.81 kB

	import gradio as gr
	import re
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel

	BASE_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
	# Ganti ke path adapter DPO kamu jika berbeda
	ADAPTER_REPO = "haifasyn/output_dpo"

	try:
	# Gunakan tokenizer dari base model agar lebih aman
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)

	base_model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL_ID,
	device_map={"": "cpu"}, # Paksa ke CPU
	torch_dtype=torch.float32,
	trust_remote_code=True,
	low_cpu_mem_usage=True, # Penting untuk Space gratis
	attn_implementation="eager"
	)

	model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
	model.eval()
	except Exception as e:
	print(f"Error load model: {e}")
	raise e

	def predict(message, history):
	try:
	system_prompt = "Kamu adalah asisten AI BRKS. Jawablah dengan singkat dan jelas berdasarkan informasi yang dipelajari."

	# Memasukkan history agar chatbot punya ingatan
	messages = [{"role": "system", "content": system_prompt}]

	if history:
	for interaction in history:
	if isinstance(interaction, dict):
	# Format Gradio 4+ (Dictionary)
	role = interaction.get("role", "user")
	content = interaction.get("content", "")
	# Jika content adalah list (multimodal), ambil elemen pertama
	if isinstance(content, list): content = str(content[0])
	messages.append({"role": role, "content": str(content)})
	elif isinstance(interaction, (list, tuple)):
	# Format Gradio Legacy [user, assistant]
	u_msg = interaction[0] if len(interaction) > 0 else ""
	a_msg = interaction[1] if len(interaction) > 1 else ""
	# Paksa jadi string
	if isinstance(u_msg, list): u_msg = str(u_msg[0])
	if isinstance(a_msg, list): a_msg = str(a_msg[0])
	messages.append({"role": "user", "content": str(u_msg)})
	messages.append({"role": "assistant", "content": str(a_msg)})

	user_input = message.get("text", "") if isinstance(message, dict) else str(message)
	messages.append({"role": "user", "content": user_input})

	# 4. Terapkan Template & Tokenisasi
	text_prompt = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)
	inputs = tokenizer(text_prompt, return_tensors="pt").to("cpu")

	with torch.no_grad():
	outputs = model.generate(
	input_ids=inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	max_new_tokens=512,
	temperature=0.5,
	top_p=0.95,
	repetition_penalty=1.15,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)

	input_length = inputs["input_ids"].shape[1]
	full_output = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)

	# Logika pembersihan tag <think> untuk model RLHF/Reasoning
	final_response = re.sub(r'<think>.*?</think>', '', full_output, flags=re.DOTALL).strip()

	return final_response
	except Exception as e:
	return f"Terjadi kesalahan teknis: {str(e)}"

	demo = gr.ChatInterface(
	fn=predict,
	title="Chatbot BRKS - DPO Edition",
	description="Model Qwen2.5 yang telah di-fine-tune dengan metode RLHF/DPO",
	)

	if __name__ == "__main__":
	demo.launch()