import gradio as gr import subprocess LLAMA_BIN = "./llama.cpp/build/bin/main" MODEL = "./model.gguf" def generate(prompt): result = subprocess.run( [ LLAMA_BIN, "-m", MODEL, "-p", prompt, "-n", "200", "-t", "4" ], capture_output=True, text=True ) return result.stdout gr.Interface(fn=generate, inputs="text", outputs="text").launch( server_name="0.0.0.0", server_port=7860 )