import gradio as gr from llama_cpp import Llama # Загружаем модель llm = Llama(model_path="mini_llama.gguf") def chat(message, history): output = llm(message, max_tokens=500) return output['choices'][0]['text'] # Запускаем интерфейс gr.ChatInterface( fn=chat, title="Mini Llama", description="Чат с моделью" ).launch(share=True)