brks / app.py
haifasyn's picture
Update app.py
d966550 verified
import gradio as gr
import re
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
BASE_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
ADAPTER_REPO = "haifasyn/output_dpo"
try:
tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, trust_remote_code=True)
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_ID,
device_map="cpu",
torch_dtype=torch.float32,
trust_remote_code=True,
attn_implementation="eager"
)
model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
model.eval()
except Exception as e:
print(f"Error load model: {e}")
raise e
def predict(message, history):
system_prompt = """
Kamu adalah asisten AI BRKS.
Instruction:
Jawablah pertanyaan user menggunakan informasi yang telah kamu pelajari sebelumnya dengan singkat dan jelas.
ATURAN:
1. Hanya gunakan informasi yang telah kamu pelajari sebelumnya.
2. Jangan menggunakan pengetahuan dari luar.
3. Jika informasi tidak ditemukan, katakan yang sebenarnya bahwa informasi tidak tersedia.
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content":message}
]
text_prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(text_prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.5,
top_p=0.95,
repetition_penalty=1.15,
do_sample=True,
eos_token_id=tokenizer.eos_token_id,
use_cache=True
)
full_output = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=False)
think_content = ""
final_response = full_output
match = re.search(r'<think>(.*?)</think>', full_output, re.DOTALL)
if match:
think_content = match.group(1).strip()
final_response = full_output.split('</think>')[-1].strip()
final_response = re.sub(r'<\|.*?\|>', '', final_response).strip()
# response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
return final_response
demo = gr.ChatInterface(
fn=predict,
title="Chatbot BRKS",
description="MOdel ini dari hasil fine tuning (Qwen)",
examples=["Dimana alamat cabang brks?"]
)
if __name__ == "__main__":
demo.launch()