bldr / app.py
lainlives's picture
Upload folder using huggingface_hub
3820d48 verified
import os
import subprocess
from sys import argv
from time import strftime, sleep
import shutil
from pathlib import Path
import gradio as gr
from huggingface_hub import snapshot_download, HfApi
from apscheduler.schedulers.background import BackgroundScheduler
from theme import blurple
# Used for restarting the space
HF_TOKEN = os.environ.get("HF_TOKEN")
HOST_REPO = "lainlives/ztestzz"
LLAMACPP_DIR = Path("./llama.cpp")
CONVERT_SCRIPT = LLAMACPP_DIR / "convert_hf_to_gguf.py"
QUANTIZE_BIN = LLAMACPP_DIR / "llama-quantize"
TARGET_QUANTS = ["Q8_0", "Q6_K", "Q5_K_M", "Q5_K_S", "Q5_0", "Q4_K_M", "Q4_K_S", "Q4_0"]
def format_log(msg):
return f"[{strftime('%H:%M:%S')}] {msg}"
def setup_ollama_keys(private_key_content):
"""
Writes the user's private key to ~/.ollama/id_ed25519
Required for 'ollama push' to work.
"""
if not private_key_content:
return False, "⚠️ No Private Key provided. Pushing will likely fail."
ollama_dir = Path(os.path.expanduser("~/.ollama"))
ollama_dir.mkdir(parents=True, exist_ok=True)
key_path = ollama_dir / "id_ed25519"
try:
# Write the key
with open(key_path, "w") as f:
f.write(private_key_content.strip())
os.chmod(key_path, 0o600)
return True, "πŸ”‘ Private Key installed successfully."
except Exception as e:
return False, f"❌ Failed to install keys: {e}"
def push_to_ollama(gguf_path, ollama_repo, tag_suffix):
ollama_tag = f"{ollama_repo}:{tag_suffix.lower()}"
# 1. Write the Modelfile to disk
# The CLI needs a physical file to point to with the '-f' flag
modelfile_path = gguf_path.parent / "Modelfile"
with open(modelfile_path, "w") as f:
f.write(f"FROM {gguf_path.resolve()}")
logs = []
logs.append(format_log(f"🐳 Creating Ollama build: {ollama_tag}"))
try:
# 2. Run 'ollama create' via CLI
# This replaces ollama.create(...)
create_cmd = ["ollama", "create", ollama_tag, "-f", str(modelfile_path)]
subprocess.run(create_cmd, check=True, capture_output=True)
# Clean up the temporary Modelfile
if modelfile_path.exists():
os.remove(modelfile_path)
logs.append(format_log(f"⬆️ Pushing to registry: {ollama_tag}..."))
# 3. Run 'ollama push' via CLI
# This replaces ollama.push(...)
push_cmd = ["ollama", "push", ollama_tag]
push_result = subprocess.run(push_cmd, capture_output=True, text=True)
if push_result.returncode == 0:
logs.append(format_log(f"βœ… Successfully pushed {ollama_tag}"))
else:
logs.append(format_log(f"❌ Push failed: {push_result.stderr}"))
# Optional: Remove the local tag to save disk space in the container
subprocess.run(["ollama", "rm", ollama_tag], stdout=subprocess.DEVNULL)
except subprocess.CalledProcessError as e:
# Captures errors from the 'check=True' on create_cmd
logs.append(format_log(f"❌ Ollama Create Error: {e}"))
except Exception as e:
logs.append(format_log(f"❌ Error on {tag_suffix}: {str(e)}"))
return logs
def start_ollama_daemon(ollama_key):
print("⏳ Starting Ollama daemon in background...")
logs.append(format_log("⏳ Starting Ollama daemon in background..."))
env = os.environ.copy()
# Auth
success, auth_msg = setup_ollama_keys(ollama_key)
logs.append(format_log(auth_msg))
yield "\n".join(logs)
ollamastd = None
ollamaerr = None
if not success:
logs.append(format_log("❌ Stopping: Authentication setup failed."))
yield "\n".join(logs)
return
subprocess.Popen(["ollama", "serve"], stdout=ollamastd, stderr=ollamaerr, env=env)
logs.append(format_log("⏳ Starting Ollama daemon in background..."))
sleep(5)
logs.append(format_log(ollamastd))
logs.append(format_log(ollamaerr))
print(format_log(ollamastd))
print(format_log(ollamaerr))
def stop_ollama_daemon():
print("⏳ Stopping Ollama daemon...")
subprocess.Popen(["pkill", "ollama"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
logs = []
work_dir = Path("conversion_work_dir")
download_dir = work_dir / "downloads"
output_dir = work_dir / "output"
if work_dir.exists():
shutil.rmtree(work_dir)
os.makedirs(download_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
try:
# Download
logs.append(format_log(f"⬇️ Downloading {hf_repo}..."))
yield "\n".join(logs)
model_path = snapshot_download(
repo_id=hf_repo,
local_dir=download_dir,
token=hf_token if hf_token else None
)
logs.append(format_log("βœ… Download complete."))
yield "\n".join(logs)
# BF16
bf16_path = output_dir / "model-bf16.gguf"
logs.append(format_log("βš™οΈ Converting to BF16..."))
yield "\n".join(logs)
cmd = ["python", str(CONVERT_SCRIPT), str(model_path), "--outtype", "bf16", "--outfile", str(bf16_path)]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
logs.extend(push_to_ollama(bf16_path, ollama_repo, "bf16"))
os.remove(bf16_path)
logs.append(format_log("🧹 Cleaned up BF16"))
else:
logs.append(format_log(f"⚠️ BF16 Conversion failed: {result.stderr}"))
yield "\n".join(logs)
# FP16
fp16_path = output_dir / "model-f16.gguf"
logs.append(format_log("βš™οΈ Converting to FP16 (Master)..."))
yield "\n".join(logs)
cmd = ["python", str(CONVERT_SCRIPT), str(model_path), "--outtype", "f16", "--outfile", str(fp16_path)]
subprocess.run(cmd, check=True, capture_output=True)
logs.extend(push_to_ollama(fp16_path, ollama_repo, "f16"))
yield "\n".join(logs)
# Quant Loop
for quant in TARGET_QUANTS:
logs.append(format_log(f"--- {quant} ---"))
yield "\n".join(logs)
final_gguf = output_dir / f"model-{quant}.gguf"
q_cmd = [str(QUANTIZE_BIN), str(fp16_path), str(final_gguf), quant]
q_result = subprocess.run(q_cmd, capture_output=True, text=True)
if q_result.returncode != 0:
logs.append(format_log(f"❌ Quantize failed: {q_result.stderr}"))
continue
logs.extend(push_to_ollama(final_gguf, ollama_repo, quant))
os.remove(final_gguf)
logs.append(format_log(f"🧹 Cleaned up {quant}"))
yield "\n".join(logs)
if fp16_path.exists():
os.remove(fp16_path)
logs.append(format_log("🧹 Cleaned up f16"))
except Exception as e:
logs.append(format_log(f"❌ CRITICAL ERROR: {str(e)}"))
finally:
if work_dir.exists():
shutil.rmtree(work_dir)
logs.append(format_log("🏁 Job Done. Workspace cleared."))
yield "\n".join(logs)
def run_pipeline(hf_repo, ollama_repo, hf_token, ollama_key, progress=gr.Progress()):
print()
start_ollama_daemon(ollama_key)
# We yield from the generator
for update in run_conversion(hf_repo, ollama_repo, hf_token, progress):
yield update
stop_ollama_daemon()
# --- UI ---
with gr.Blocks(title="HF to Ollama") as demo:
gr.Markdown("## Convert safetensor HF repos to an Ollama repo. Only creates 4bit to f16+bf16 GGUFs")
with gr.Row():
with gr.Column():
hf_input = gr.Textbox(label="Source HF Repo", placeholder="unsloth/Qwen3.5-9B", value="unsloth/Qwen3.5-0.8B")
hf_token_input = gr.Textbox(label="HF Token (for gated models and faster download)", type="password")
with gr.Column():
ollama_input = gr.Textbox(label="Destination Ollama Repo", placeholder="fervent_mcclintock/Qwen3.5-9B", value="fervent_mcclintock/Qwen3.5-9B")
ollama_key_input = gr.Textbox(label="Ollama Private Key", lines=5, type="password", placeholder="-----BEGIN OPENSSH PRIVATE KEY-----...")
btn = gr.Button("Start", variant="primary")
logs = gr.TextArea(label="Logs", interactive=False, lines=10, autoscroll=True)
btn.click(
fn=run_pipeline,
inputs=[hf_input, ollama_input, hf_token_input, ollama_key_input],
outputs=logs
)
def restart_space():
HfApi().restart_space(repo_id=HOST_REPO, token=HF_TOKEN, factory_reboot=True)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860, theme=blurple)