import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import gradio as gr # Setup model dan tokenizer torch.random.manual_seed(0) model = AutoModelForCausalLM.from_pretrained( "microsoft/Phi-3-mini-128k-instruct", device_map="cpu", # Gunakan 'cpu' jika tidak ada GPU torch_dtype="auto", trust_remote_code=True, attn_implementation="eager" # Menggunakan eager untuk menghindari masalah flash-attention ) tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct") # Pipeline untuk text-generation pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) # Fungsi untuk menghasilkan respons def generate_response(input_text): messages = [ {"role": "system", "content": "You are a helpful AI assistant."}, {"role": "user", "content": input_text} ] generation_args = { "max_new_tokens": 500, "return_full_text": False, "temperature": 0.7, # Bisa disesuaikan untuk variasi output "do_sample": True, # Mengaktifkan sampling untuk variasi output } output = pipe(messages, **generation_args) return output[0]['generated_text'] # Membuat antarmuka menggunakan Gradio Blocks with gr.Blocks() as demo: gr.Markdown("# AI Chatbot Assistant\nTanya apapun, saya siap membantu!") # Pesan pemberitahuan untuk penggunaan CPU gr.Markdown( "### ⚠ Sorry for the inconvenience. The Space is currently running on the CPU, which might affect performance. We appreciate your understanding." ) # Tata letak output di atas input with gr.Row(): output_box = gr.Textbox( label="AI Response", placeholder="Respons akan muncul di sini...", lines=10, interactive=False # Tidak dapat diisi manual ) with gr.Row(): input_box = gr.Textbox(label="Ask me anything!", placeholder="Tanyakan sesuatu...") with gr.Row(): submit_button = gr.Button("Submit") # Aksi untuk submit submit_button.click(generate_response, inputs=input_box, outputs=output_box) # Menjalankan antarmuka demo.launch()