| import gradio as gr |
| import subprocess |
| import os |
| from huggingface_hub import hf_hub_download |
|
|
| |
| MODELS = { |
| "Bonsai 1.7B (248MB)": {"repo": "prism-ml/Bonsai-1.7B-gguf", "file": "Bonsai-1.7B-v1.0.gguf"}, |
| "Bonsai 4B (572MB)": {"repo": "prism-ml/Bonsai-4B-gguf", "file": "Bonsai-4B-v1.0.gguf"}, |
| "Bonsai 8B (1.15GB)": {"repo": "prism-ml/Bonsai-8B-gguf", "file": "Bonsai-8B-v1.0.gguf"} |
| } |
|
|
| def chat(message, history, system_prompt, model_choice, temp): |
| |
| config = MODELS[model_choice] |
| model_path = hf_hub_download(repo_id=config["repo"], filename=config["file"]) |
| |
| |
| prompt = f"System: {system_prompt}\n" |
| for human, assistant in history: |
| prompt += f"User: {human}\nAssistant: {assistant}\n" |
| prompt += f"User: {message}\nAssistant:" |
|
|
| |
| |
| cmd = [ |
| "./llama-cli", "-m", model_path, |
| "-p", prompt, |
| "-n", "512", |
| "--threads", "4", |
| "--temp", str(temp), |
| "--repeat_penalty", "1.1", |
| "--no-display-prompt" |
| ] |
|
|
| try: |
| process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, bufsize=1) |
| response = "" |
| for line in process.stdout: |
| response += line |
| yield response |
| except Exception as e: |
| yield f"Inference Error: {str(e)}" |
|
|
| |
| with gr.Blocks(theme=gr.themes.Default()) as demo: |
| gr.Markdown("# 🌿 Bonsai 1-Bit AI Sandbox") |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| model_select = gr.Dropdown(list(MODELS.keys()), value="Bonsai 1.7B (248MB)", label="Model Selector") |
| sys_input = gr.Textbox( |
| value="You are a helpful AI assistant. Be concise and prioritize logic.", |
| label="System Prompt", lines=4 |
| ) |
| temp_slider = gr.Slider(0.1, 1.0, value=0.7, label="Temperature") |
| |
| gr.Markdown("### Standard Benchmarks") |
| btn_math = gr.Button("Logic: Math Problem") |
| btn_code = gr.Button("Code: C Implementation") |
|
|
| with gr.Column(scale=3): |
| chatbot = gr.ChatInterface( |
| fn=chat, |
| additional_inputs=[sys_input, model_select, temp_slider] |
| ) |
|
|
| |
| btn_math.click(fn=lambda: "Explain why 1+1=2 logically.", outputs=None) |
| btn_code.click(fn=lambda: "Write a C function to reverse a string in-place.", outputs=None) |
|
|
| demo.queue().launch(server_name="0.0.0.0", server_port=7860) |
|
|