Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import re | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from peft import PeftModel | |
| BASE_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct" | |
| # Ganti ke path adapter DPO kamu jika berbeda | |
| ADAPTER_REPO = "haifasyn/output_dpo" | |
| try: | |
| # Gunakan tokenizer dari base model agar lebih aman | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, trust_remote_code=True) | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL_ID, | |
| device_map={"": "cpu"}, # Paksa ke CPU | |
| torch_dtype=torch.float32, | |
| trust_remote_code=True, | |
| low_cpu_mem_usage=True, # Penting untuk Space gratis | |
| attn_implementation="eager" | |
| ) | |
| model = PeftModel.from_pretrained(base_model, ADAPTER_REPO) | |
| model.eval() | |
| except Exception as e: | |
| print(f"Error load model: {e}") | |
| raise e | |
| def predict(message, history): | |
| try: | |
| system_prompt = "Kamu adalah asisten AI BRKS. Jawablah dengan singkat dan jelas berdasarkan informasi yang dipelajari." | |
| # Memasukkan history agar chatbot punya ingatan | |
| messages = [{"role": "system", "content": system_prompt}] | |
| if history: | |
| for interaction in history: | |
| if isinstance(interaction, dict): | |
| # Format Gradio 4+ (Dictionary) | |
| role = interaction.get("role", "user") | |
| content = interaction.get("content", "") | |
| # Jika content adalah list (multimodal), ambil elemen pertama | |
| if isinstance(content, list): content = str(content[0]) | |
| messages.append({"role": role, "content": str(content)}) | |
| elif isinstance(interaction, (list, tuple)): | |
| # Format Gradio Legacy [user, assistant] | |
| u_msg = interaction[0] if len(interaction) > 0 else "" | |
| a_msg = interaction[1] if len(interaction) > 1 else "" | |
| # Paksa jadi string | |
| if isinstance(u_msg, list): u_msg = str(u_msg[0]) | |
| if isinstance(a_msg, list): a_msg = str(a_msg[0]) | |
| messages.append({"role": "user", "content": str(u_msg)}) | |
| messages.append({"role": "assistant", "content": str(a_msg)}) | |
| user_input = message.get("text", "") if isinstance(message, dict) else str(message) | |
| messages.append({"role": "user", "content": user_input}) | |
| # 4. Terapkan Template & Tokenisasi | |
| text_prompt = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| inputs = tokenizer(text_prompt, return_tensors="pt").to("cpu") | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| input_ids=inputs["input_ids"], | |
| attention_mask=inputs["attention_mask"], | |
| max_new_tokens=512, | |
| temperature=0.5, | |
| top_p=0.95, | |
| repetition_penalty=1.15, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| input_length = inputs["input_ids"].shape[1] | |
| full_output = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True) | |
| # Logika pembersihan tag <think> untuk model RLHF/Reasoning | |
| final_response = re.sub(r'<think>.*?</think>', '', full_output, flags=re.DOTALL).strip() | |
| return final_response | |
| except Exception as e: | |
| return f"Terjadi kesalahan teknis: {str(e)}" | |
| demo = gr.ChatInterface( | |
| fn=predict, | |
| title="Chatbot BRKS - DPO Edition", | |
| description="Model Qwen2.5 yang telah di-fine-tune dengan metode RLHF/DPO", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |