|
|
import gradio as gr |
|
|
from llama_cpp import Llama |
|
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
|
|
|
|
|
|
model_file = hf_hub_download(repo_id="TheBloke/LLaMA-Pro-8B-Instruct-GGUF", filename="OpenLLaMA-7B.gguf") |
|
|
|
|
|
|
|
|
llm = Llama(model_path=model_file, n_ctx=2048, n_threads=4) |
|
|
|
|
|
def generate_response(prompt): |
|
|
|
|
|
output = llm(prompt, max_tokens=150, temperature=0.7) |
|
|
return output["choices"][0]["text"] |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=generate_response, |
|
|
inputs="text", |
|
|
outputs="text", |
|
|
title="OpenLLaMA GGUF AI Asistent", |
|
|
description="Zadej dotaz v češtině a model se pokusí odpovědět." |
|
|
) |
|
|
|
|
|
iface.launch() |