Spaces:
Running
Running
| import os | |
| import subprocess | |
| import streamlit as st | |
| from huggingface_hub import snapshot_download, HfApi | |
| # ============================================================ | |
| # SESSION STATE | |
| # ============================================================ | |
| if "quantized_models" not in st.session_state: | |
| st.session_state.quantized_models = [] | |
| # ============================================================ | |
| # CONFIG | |
| # ============================================================ | |
| MODELS_LIST = ['rahul7star/Qwen3-4B-Thinking-2509-Genius-Coder-AI-Full'] | |
| QUANT_TYPES = [ | |
| "Q2_K", "Q3_K_l", "Q3_K_M", "Q3_K_S", | |
| "Q4_0", "Q4_1", "Q4_K_M", "Q4_K_S", | |
| "Q5_0", "Q5_1", "Q5_K_M", "Q5_K_S", | |
| "Q6_K", "Q8_0", "BF16", "F16", "F32" | |
| ] | |
| LLAMA_CPP_PATH = "/app/llama.cpp" | |
| CONVERT_SCRIPT = f"{LLAMA_CPP_PATH}/convert_hf_to_gguf.py" | |
| QUANTIZE_BIN = f"{LLAMA_CPP_PATH}/build/bin/llama-quantize" | |
| # ============================================================ | |
| # UTILS | |
| # ============================================================ | |
| def check_dependencies(): | |
| if not os.path.exists(CONVERT_SCRIPT): | |
| st.error("β convert_hf_to_gguf.py not found") | |
| st.stop() | |
| if not os.path.exists(QUANTIZE_BIN): | |
| st.error("β llama-quantize binary not found") | |
| st.stop() | |
| def download_model(hf_model_name, output_dir="/tmp/models"): | |
| st.write(f"π₯ Downloading `{hf_model_name}` ...") | |
| model_path = snapshot_download( | |
| repo_id=hf_model_name, | |
| local_dir=output_dir, | |
| local_dir_use_symlinks=False | |
| ) | |
| st.success("β Model downloaded") | |
| return model_path | |
| def convert_to_gguf(model_path, output_file): | |
| st.write("π Converting to GGUF...") | |
| cmd = [ | |
| "python3", | |
| CONVERT_SCRIPT, | |
| model_path, | |
| "--outfile", | |
| output_file | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode != 0: | |
| st.error(result.stderr) | |
| raise RuntimeError("Conversion failed") | |
| st.success("β GGUF created") | |
| def quantize_model(gguf_file, quant_type): | |
| output_file = gguf_file.replace(".gguf", f"-{quant_type}.gguf") | |
| st.write(f"β‘ Quantizing β {quant_type}") | |
| cmd = [ | |
| QUANTIZE_BIN, | |
| gguf_file, | |
| output_file, | |
| quant_type | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode != 0: | |
| st.error(result.stderr) | |
| return None | |
| st.success(f"β {quant_type} done") | |
| return output_file | |
| def upload_to_huggingface(file_path, repo_id): | |
| hf_token = os.getenv("HF_TOKEN") | |
| if not hf_token: | |
| st.error("β HF_TOKEN not found in environment variables") | |
| return | |
| api = HfApi(token=hf_token) | |
| api.create_repo(repo_id, exist_ok=True, repo_type="model") | |
| api.upload_file( | |
| path_or_fileobj=file_path, | |
| path_in_repo=os.path.basename(file_path), | |
| repo_id=repo_id, | |
| ) | |
| st.success(f"π Uploaded to https://huggingface.co/{repo_id}") | |
| # ============================================================ | |
| # UI | |
| # ============================================================ | |
| st.title("π¦ LLaMA.cpp Multi-Quantization Tool") | |
| check_dependencies() | |
| # Model selection | |
| selected_model = st.selectbox( | |
| "Select Hugging Face Model", | |
| MODELS_LIST, | |
| index=None | |
| ) | |
| hf_model_name = selected_model or st.text_input( | |
| "Or Enter Custom HF Model ID" | |
| ) | |
| # Multi-checkbox quant selection | |
| st.subheader("Select Quantization Types") | |
| selected_quants = [] | |
| cols = st.columns(4) | |
| for i, quant in enumerate(QUANT_TYPES): | |
| with cols[i % 4]: | |
| if st.checkbox(quant): | |
| selected_quants.append(quant) | |
| # Start button | |
| if st.button("π Start Quantization"): | |
| if not hf_model_name: | |
| st.warning("Please enter a model name") | |
| st.stop() | |
| if not selected_quants: | |
| st.warning("Select at least one quant type") | |
| st.stop() | |
| with st.spinner("Processing..."): | |
| try: | |
| base_dir = "/tmp/models" | |
| os.makedirs(base_dir, exist_ok=True) | |
| model_path = download_model(hf_model_name, base_dir) | |
| gguf_file = os.path.join( | |
| base_dir, | |
| hf_model_name.replace("/", "_") + ".gguf" | |
| ) | |
| convert_to_gguf(model_path, gguf_file) | |
| st.session_state.quantized_models = [] | |
| for quant in selected_quants: | |
| quant_file = quantize_model(gguf_file, quant) | |
| if quant_file: | |
| st.session_state.quantized_models.append(quant_file) | |
| st.success("π All quantizations completed") | |
| except Exception as e: | |
| st.error(f"β Error: {str(e)}") | |
| # ============================================================ | |
| # DOWNLOAD + UPLOAD SECTION | |
| # ============================================================ | |
| if st.session_state.quantized_models: | |
| st.subheader("π¦ Generated Models") | |
| for file_path in st.session_state.quantized_models: | |
| with open(file_path, "rb") as f: | |
| st.download_button( | |
| label=f"β¬οΈ Download {os.path.basename(file_path)}", | |
| data=f, | |
| file_name=os.path.basename(file_path), | |
| key=file_path | |
| ) | |
| st.divider() | |
| st.subheader("π Upload to Hugging Face") | |
| repo_id = st.text_input( | |
| "Target Repository (e.g. username/model-quant)" | |
| ) | |
| if st.button("π€ Upload All to HF"): | |
| if not repo_id: | |
| st.warning("Enter repository ID") | |
| else: | |
| with st.spinner("Uploading..."): | |
| for file_path in st.session_state.quantized_models: | |
| upload_to_huggingface(file_path, repo_id) | |
| st.success("β All files uploaded successfully") |