rahul7star's picture
Update app.py
34b694d verified
import os
import subprocess
import streamlit as st
from huggingface_hub import snapshot_download, HfApi
# ============================================================
# SESSION STATE
# ============================================================
if "quantized_models" not in st.session_state:
st.session_state.quantized_models = []
# ============================================================
# CONFIG
# ============================================================
MODELS_LIST = ['rahul7star/Qwen3-4B-Thinking-2509-Genius-Coder-AI-Full']
QUANT_TYPES = [
"Q2_K", "Q3_K_l", "Q3_K_M", "Q3_K_S",
"Q4_0", "Q4_1", "Q4_K_M", "Q4_K_S",
"Q5_0", "Q5_1", "Q5_K_M", "Q5_K_S",
"Q6_K", "Q8_0", "BF16", "F16", "F32"
]
LLAMA_CPP_PATH = "/app/llama.cpp"
CONVERT_SCRIPT = f"{LLAMA_CPP_PATH}/convert_hf_to_gguf.py"
QUANTIZE_BIN = f"{LLAMA_CPP_PATH}/build/bin/llama-quantize"
# ============================================================
# UTILS
# ============================================================
def check_dependencies():
if not os.path.exists(CONVERT_SCRIPT):
st.error("❌ convert_hf_to_gguf.py not found")
st.stop()
if not os.path.exists(QUANTIZE_BIN):
st.error("❌ llama-quantize binary not found")
st.stop()
def download_model(hf_model_name, output_dir="/tmp/models"):
st.write(f"πŸ“₯ Downloading `{hf_model_name}` ...")
model_path = snapshot_download(
repo_id=hf_model_name,
local_dir=output_dir,
local_dir_use_symlinks=False
)
st.success("βœ… Model downloaded")
return model_path
def convert_to_gguf(model_path, output_file):
st.write("πŸ”„ Converting to GGUF...")
cmd = [
"python3",
CONVERT_SCRIPT,
model_path,
"--outfile",
output_file
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
st.error(result.stderr)
raise RuntimeError("Conversion failed")
st.success("βœ… GGUF created")
def quantize_model(gguf_file, quant_type):
output_file = gguf_file.replace(".gguf", f"-{quant_type}.gguf")
st.write(f"⚑ Quantizing β†’ {quant_type}")
cmd = [
QUANTIZE_BIN,
gguf_file,
output_file,
quant_type
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
st.error(result.stderr)
return None
st.success(f"βœ… {quant_type} done")
return output_file
def upload_to_huggingface(file_path, repo_id):
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
st.error("❌ HF_TOKEN not found in environment variables")
return
api = HfApi(token=hf_token)
api.create_repo(repo_id, exist_ok=True, repo_type="model")
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=os.path.basename(file_path),
repo_id=repo_id,
)
st.success(f"πŸš€ Uploaded to https://huggingface.co/{repo_id}")
# ============================================================
# UI
# ============================================================
st.title("πŸ¦™ LLaMA.cpp Multi-Quantization Tool")
check_dependencies()
# Model selection
selected_model = st.selectbox(
"Select Hugging Face Model",
MODELS_LIST,
index=None
)
hf_model_name = selected_model or st.text_input(
"Or Enter Custom HF Model ID"
)
# Multi-checkbox quant selection
st.subheader("Select Quantization Types")
selected_quants = []
cols = st.columns(4)
for i, quant in enumerate(QUANT_TYPES):
with cols[i % 4]:
if st.checkbox(quant):
selected_quants.append(quant)
# Start button
if st.button("πŸš€ Start Quantization"):
if not hf_model_name:
st.warning("Please enter a model name")
st.stop()
if not selected_quants:
st.warning("Select at least one quant type")
st.stop()
with st.spinner("Processing..."):
try:
base_dir = "/tmp/models"
os.makedirs(base_dir, exist_ok=True)
model_path = download_model(hf_model_name, base_dir)
gguf_file = os.path.join(
base_dir,
hf_model_name.replace("/", "_") + ".gguf"
)
convert_to_gguf(model_path, gguf_file)
st.session_state.quantized_models = []
for quant in selected_quants:
quant_file = quantize_model(gguf_file, quant)
if quant_file:
st.session_state.quantized_models.append(quant_file)
st.success("πŸŽ‰ All quantizations completed")
except Exception as e:
st.error(f"❌ Error: {str(e)}")
# ============================================================
# DOWNLOAD + UPLOAD SECTION
# ============================================================
if st.session_state.quantized_models:
st.subheader("πŸ“¦ Generated Models")
for file_path in st.session_state.quantized_models:
with open(file_path, "rb") as f:
st.download_button(
label=f"⬇️ Download {os.path.basename(file_path)}",
data=f,
file_name=os.path.basename(file_path),
key=file_path
)
st.divider()
st.subheader("πŸš€ Upload to Hugging Face")
repo_id = st.text_input(
"Target Repository (e.g. username/model-quant)"
)
if st.button("πŸ“€ Upload All to HF"):
if not repo_id:
st.warning("Enter repository ID")
else:
with st.spinner("Uploading..."):
for file_path in st.session_state.quantized_models:
upload_to_huggingface(file_path, repo_id)
st.success("βœ… All files uploaded successfully")