File size: 5,787 Bytes
45a8447
 
 
34b694d
45a8447
34b694d
 
 
cb73a75
34b694d
 
45a8447
34b694d
 
 
7490d0a
34b694d
08aff6a
34b694d
 
 
 
 
 
45a8447
34b694d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45a8447
3ec5df4
34b694d
 
 
 
 
 
 
 
 
 
 
45a8447
34b694d
 
 
 
 
45a8447
34b694d
 
 
 
 
 
 
 
 
 
45a8447
34b694d
 
 
3ec5df4
45a8447
34b694d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import os
import subprocess
import streamlit as st
from huggingface_hub import snapshot_download, HfApi

# ============================================================
# SESSION STATE
# ============================================================

if "quantized_models" not in st.session_state:
    st.session_state.quantized_models = []

# ============================================================
# CONFIG
# ============================================================

MODELS_LIST = ['rahul7star/Qwen3-4B-Thinking-2509-Genius-Coder-AI-Full']

QUANT_TYPES = [
    "Q2_K", "Q3_K_l", "Q3_K_M", "Q3_K_S",
    "Q4_0", "Q4_1", "Q4_K_M", "Q4_K_S",
    "Q5_0", "Q5_1", "Q5_K_M", "Q5_K_S",
    "Q6_K", "Q8_0", "BF16", "F16", "F32"
]

LLAMA_CPP_PATH = "/app/llama.cpp"
CONVERT_SCRIPT = f"{LLAMA_CPP_PATH}/convert_hf_to_gguf.py"
QUANTIZE_BIN = f"{LLAMA_CPP_PATH}/build/bin/llama-quantize"

# ============================================================
# UTILS
# ============================================================

def check_dependencies():
    if not os.path.exists(CONVERT_SCRIPT):
        st.error("❌ convert_hf_to_gguf.py not found")
        st.stop()
    if not os.path.exists(QUANTIZE_BIN):
        st.error("❌ llama-quantize binary not found")
        st.stop()

def download_model(hf_model_name, output_dir="/tmp/models"):
    st.write(f"πŸ“₯ Downloading `{hf_model_name}` ...")
    model_path = snapshot_download(
        repo_id=hf_model_name,
        local_dir=output_dir,
        local_dir_use_symlinks=False
    )
    st.success("βœ… Model downloaded")
    return model_path

def convert_to_gguf(model_path, output_file):
    st.write("πŸ”„ Converting to GGUF...")
    cmd = [
        "python3",
        CONVERT_SCRIPT,
        model_path,
        "--outfile",
        output_file
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        st.error(result.stderr)
        raise RuntimeError("Conversion failed")
    st.success("βœ… GGUF created")

def quantize_model(gguf_file, quant_type):
    output_file = gguf_file.replace(".gguf", f"-{quant_type}.gguf")

    st.write(f"⚑ Quantizing β†’ {quant_type}")
    cmd = [
        QUANTIZE_BIN,
        gguf_file,
        output_file,
        quant_type
    ]

    result = subprocess.run(cmd, capture_output=True, text=True)

    if result.returncode != 0:
        st.error(result.stderr)
        return None

    st.success(f"βœ… {quant_type} done")
    return output_file

def upload_to_huggingface(file_path, repo_id):
    hf_token = os.getenv("HF_TOKEN")

    if not hf_token:
        st.error("❌ HF_TOKEN not found in environment variables")
        return

    api = HfApi(token=hf_token)

    api.create_repo(repo_id, exist_ok=True, repo_type="model")

    api.upload_file(
        path_or_fileobj=file_path,
        path_in_repo=os.path.basename(file_path),
        repo_id=repo_id,
    )

    st.success(f"πŸš€ Uploaded to https://huggingface.co/{repo_id}")

# ============================================================
# UI
# ============================================================

st.title("πŸ¦™ LLaMA.cpp Multi-Quantization Tool")

check_dependencies()

# Model selection
selected_model = st.selectbox(
    "Select Hugging Face Model",
    MODELS_LIST,
    index=None
)

hf_model_name = selected_model or st.text_input(
    "Or Enter Custom HF Model ID"
)

# Multi-checkbox quant selection
st.subheader("Select Quantization Types")

selected_quants = []
cols = st.columns(4)

for i, quant in enumerate(QUANT_TYPES):
    with cols[i % 4]:
        if st.checkbox(quant):
            selected_quants.append(quant)

# Start button
if st.button("πŸš€ Start Quantization"):

    if not hf_model_name:
        st.warning("Please enter a model name")
        st.stop()

    if not selected_quants:
        st.warning("Select at least one quant type")
        st.stop()

    with st.spinner("Processing..."):
        try:
            base_dir = "/tmp/models"
            os.makedirs(base_dir, exist_ok=True)

            model_path = download_model(hf_model_name, base_dir)

            gguf_file = os.path.join(
                base_dir,
                hf_model_name.replace("/", "_") + ".gguf"
            )

            convert_to_gguf(model_path, gguf_file)

            st.session_state.quantized_models = []

            for quant in selected_quants:
                quant_file = quantize_model(gguf_file, quant)
                if quant_file:
                    st.session_state.quantized_models.append(quant_file)

            st.success("πŸŽ‰ All quantizations completed")

        except Exception as e:
            st.error(f"❌ Error: {str(e)}")

# ============================================================
# DOWNLOAD + UPLOAD SECTION
# ============================================================

if st.session_state.quantized_models:

    st.subheader("πŸ“¦ Generated Models")

    for file_path in st.session_state.quantized_models:

        with open(file_path, "rb") as f:
            st.download_button(
                label=f"⬇️ Download {os.path.basename(file_path)}",
                data=f,
                file_name=os.path.basename(file_path),
                key=file_path
            )

    st.divider()

    st.subheader("πŸš€ Upload to Hugging Face")

    repo_id = st.text_input(
        "Target Repository (e.g. username/model-quant)"
    )

    if st.button("πŸ“€ Upload All to HF"):
        if not repo_id:
            st.warning("Enter repository ID")
        else:
            with st.spinner("Uploading..."):
                for file_path in st.session_state.quantized_models:
                    upload_to_huggingface(file_path, repo_id)

                st.success("βœ… All files uploaded successfully")