import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# Load tokenizer dan model (bisa diganti model lainnya dari Hugging Face)
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Buat pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Prompt template agar jawaban sesuai instruksi
SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
FINAL ANSWER: [YOUR FINAL ANSWER]
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""

# Fungsi generate
def answer_question(user_question):
    full_prompt = f"{SYSTEM_PROMPT}\n\nQuestion: {user_question}\nAnswer:"
    output = pipe(full_prompt, max_new_tokens=200, do_sample=True, temperature=0.7)[0]['generated_text']
    
    # Ambil hanya jawaban akhir dari hasil full response
    result = output.split("Answer:")[-1].strip()
    return result

# Buat UI dengan Gradio
demo = gr.Interface(
    fn=answer_question,
    inputs=gr.Textbox(label="Your Question"),
    outputs=gr.Textbox(label="LLM Response"),
    title="LLM Agent - FINAL ANSWER Format",
    description="Ask anything. Model will reason and finish with: FINAL ANSWER: [YOUR FINAL ANSWER]"
)

if __name__ == "__main__":
    demo.launch()