import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# Automatické stažení modelu z Hugging Face Hub
# Ujistěte se, že model je veřejně přístupný a jeho repo_id a filename jsou správné.
model_file = hf_hub_download(repo_id="TheBloke/LLaMA-Pro-8B-Instruct-GGUF", filename="OpenLLaMA-7B.gguf")

# Inicializace GGUF modelu pomocí llama-cpp-python
llm = Llama(model_path=model_file, n_ctx=2048, n_threads=4)

def generate_response(prompt):
    # Zavoláme model s daným promptem a vrátíme vygenerovaný text
    output = llm(prompt, max_tokens=150, temperature=0.7)
    return output["choices"][0]["text"]

# Vytvoření Gradio rozhraní
iface = gr.Interface(
    fn=generate_response,
    inputs="text",
    outputs="text",
    title="OpenLLaMA GGUF AI Asistent",
    description="Zadej dotaz v češtině a model se pokusí odpovědět."
)

iface.launch()