import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch # Load tokenizer dan model (bisa diganti model lainnya dari Hugging Face) model_id = "mistralai/Mistral-7B-Instruct-v0.1" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16, device_map="auto" ) # Buat pipeline pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) # Prompt template agar jawaban sesuai instruksi SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER] YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.""" # Fungsi generate def answer_question(user_question): full_prompt = f"{SYSTEM_PROMPT}\n\nQuestion: {user_question}\nAnswer:" output = pipe(full_prompt, max_new_tokens=200, do_sample=True, temperature=0.7)[0]['generated_text'] # Ambil hanya jawaban akhir dari hasil full response result = output.split("Answer:")[-1].strip() return result # Buat UI dengan Gradio demo = gr.Interface( fn=answer_question, inputs=gr.Textbox(label="Your Question"), outputs=gr.Textbox(label="LLM Response"), title="LLM Agent - FINAL ANSWER Format", description="Ask anything. Model will reason and finish with: FINAL ANSWER: [YOUR FINAL ANSWER]" ) if __name__ == "__main__": demo.launch()