import os import subprocess import streamlit as st from huggingface_hub import snapshot_download, HfApi # ============================================================ # SESSION STATE # ============================================================ if "quantized_models" not in st.session_state: st.session_state.quantized_models = [] # ============================================================ # CONFIG # ============================================================ MODELS_LIST = ['rahul7star/Qwen3-4B-Thinking-2509-Genius-Coder-AI-Full'] QUANT_TYPES = [ "Q2_K", "Q3_K_l", "Q3_K_M", "Q3_K_S", "Q4_0", "Q4_1", "Q4_K_M", "Q4_K_S", "Q5_0", "Q5_1", "Q5_K_M", "Q5_K_S", "Q6_K", "Q8_0", "BF16", "F16", "F32" ] LLAMA_CPP_PATH = "/app/llama.cpp" CONVERT_SCRIPT = f"{LLAMA_CPP_PATH}/convert_hf_to_gguf.py" QUANTIZE_BIN = f"{LLAMA_CPP_PATH}/build/bin/llama-quantize" # ============================================================ # UTILS # ============================================================ def check_dependencies(): if not os.path.exists(CONVERT_SCRIPT): st.error("❌ convert_hf_to_gguf.py not found") st.stop() if not os.path.exists(QUANTIZE_BIN): st.error("❌ llama-quantize binary not found") st.stop() def download_model(hf_model_name, output_dir="/tmp/models"): st.write(f"📥 Downloading `{hf_model_name}` ...") model_path = snapshot_download( repo_id=hf_model_name, local_dir=output_dir, local_dir_use_symlinks=False ) st.success("✅ Model downloaded") return model_path def convert_to_gguf(model_path, output_file): st.write("🔄 Converting to GGUF...") cmd = [ "python3", CONVERT_SCRIPT, model_path, "--outfile", output_file ] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: st.error(result.stderr) raise RuntimeError("Conversion failed") st.success("✅ GGUF created") def quantize_model(gguf_file, quant_type): output_file = gguf_file.replace(".gguf", f"-{quant_type}.gguf") st.write(f"⚡ Quantizing → {quant_type}") cmd = [ QUANTIZE_BIN, gguf_file, output_file, quant_type ] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: st.error(result.stderr) return None st.success(f"✅ {quant_type} done") return output_file def upload_to_huggingface(file_path, repo_id): hf_token = os.getenv("HF_TOKEN") if not hf_token: st.error("❌ HF_TOKEN not found in environment variables") return api = HfApi(token=hf_token) api.create_repo(repo_id, exist_ok=True, repo_type="model") api.upload_file( path_or_fileobj=file_path, path_in_repo=os.path.basename(file_path), repo_id=repo_id, ) st.success(f"🚀 Uploaded to https://huggingface.co/{repo_id}") # ============================================================ # UI # ============================================================ st.title("🦙 LLaMA.cpp Multi-Quantization Tool") check_dependencies() # Model selection selected_model = st.selectbox( "Select Hugging Face Model", MODELS_LIST, index=None ) hf_model_name = selected_model or st.text_input( "Or Enter Custom HF Model ID" ) # Multi-checkbox quant selection st.subheader("Select Quantization Types") selected_quants = [] cols = st.columns(4) for i, quant in enumerate(QUANT_TYPES): with cols[i % 4]: if st.checkbox(quant): selected_quants.append(quant) # Start button if st.button("🚀 Start Quantization"): if not hf_model_name: st.warning("Please enter a model name") st.stop() if not selected_quants: st.warning("Select at least one quant type") st.stop() with st.spinner("Processing..."): try: base_dir = "/tmp/models" os.makedirs(base_dir, exist_ok=True) model_path = download_model(hf_model_name, base_dir) gguf_file = os.path.join( base_dir, hf_model_name.replace("/", "_") + ".gguf" ) convert_to_gguf(model_path, gguf_file) st.session_state.quantized_models = [] for quant in selected_quants: quant_file = quantize_model(gguf_file, quant) if quant_file: st.session_state.quantized_models.append(quant_file) st.success("🎉 All quantizations completed") except Exception as e: st.error(f"❌ Error: {str(e)}") # ============================================================ # DOWNLOAD + UPLOAD SECTION # ============================================================ if st.session_state.quantized_models: st.subheader("📦 Generated Models") for file_path in st.session_state.quantized_models: with open(file_path, "rb") as f: st.download_button( label=f"⬇️ Download {os.path.basename(file_path)}", data=f, file_name=os.path.basename(file_path), key=file_path ) st.divider() st.subheader("🚀 Upload to Hugging Face") repo_id = st.text_input( "Target Repository (e.g. username/model-quant)" ) if st.button("📤 Upload All to HF"): if not repo_id: st.warning("Enter repository ID") else: with st.spinner("Uploading..."): for file_path in st.session_state.quantized_models: upload_to_huggingface(file_path, repo_id) st.success("✅ All files uploaded successfully")