import gradio as gr
from llama_cpp import Llama

# Model Configurations
model_path = "gemma-7b-it.Q2_K.gguf"
model_kwargs = {
    "n_ctx": 4096,  
    "n_threads": 4, 
    "n_gpu_layers": 0,  
}

# Instantiate model 
llm = Llama(model_path=model_path, **model_kwargs)

# Generation kwargs
generation_kwargs = {
    "max_tokens": 200, 
    "stop": ["<|endoftext|>", "</s>"],  
    "echo": False,  
    "top_k": 1   
}

# System prompt
system_prompt = "You are an AI assistant specialized in answering common Python programming questions. Provide clear, concise, and accurate responses."

def generate_response(query):
    prompt = f"{system_prompt}\nQuestion: **{query}**\nAnswer:"
    res = llm(prompt, **generation_kwargs)
    response = res["choices"][0]["text"].strip()
    return f"**Question:** '''{query}'''\n\n**Answer:** '''{response}'''"


# Gradio interface
title = "Python Question Answering"
description = "Ask common Python questions and get answers from the LLM model."

interface = gr.Interface(
    fn=generate_response,
    inputs=gr.Textbox(label="Enter your Python question"),
    outputs=gr.Textbox(label="Answer"),
    title=title,
    description=description,
    examples=[
        "How do I iterate over a list in Python?",
        "What is the difference between a list and a tuple?",
        "How do I read a file in Python?",
    ],
)

interface.launch(share=True)