new01 / app.py
codemichaeld's picture
Update app.py
39680ea verified
import gradio as gr
import os
import tempfile
import shutil
import subprocess
import re
import json
import datetime
from pathlib import Path
from huggingface_hub import HfApi, hf_hub_download
from safetensors.torch import save_file
import torch
# --- Utility: GGUF to FP8 Safetensors using gguf-connector CLI ---
def convert_gguf_to_fp8_safetensors(gguf_path, output_dir, progress=gr.Progress()):
"""
Uses gguf-connector CLI to convert a GGUF file to FP8 safetensors.
Requires 'gguf-connector' and 'torch' installed.
"""
progress(0.1, desc="Starting GGUF to FP8 conversion...")
try:
# Ensure gguf-connector is installed by checking for the 'ggc' command
subprocess.run(["ggc", "--version"], check=True, capture_output=True)
# Build command: ggc t3a (GGUF β†’ safetensors), then q8 (safetensors β†’ FP8)
temp_safetensors_dir = tempfile.mkdtemp()
safetensors_path = os.path.join(temp_safetensors_dir, "intermediate.safetensors")
fp8_safetensors_path = os.path.join(output_dir, "model.safetensors")
progress(0.3, desc="Converting GGUF to Safetensors...")
# Step 1: GGUF β†’ Safetensors
# CORRECTED: Using 't3a' subcommand and positional arguments
result1 = subprocess.run(
["ggc", "t3a", gguf_path, safetensors_path],
capture_output=True,
text=True
)
if result1.returncode != 0:
raise RuntimeError(f"GGUF to Safetensors failed: {result1.stderr}")
progress(0.6, desc="Quantizing Safetensors to FP8...")
# Step 2: Safetensors β†’ FP8 Safetensors
# CORRECTED: Using 'q8' subcommand for FP8 quantization and positional arguments
result2 = subprocess.run(
["ggc", "q8", safetensors_path, fp8_safetensors_path],
capture_output=True,
text=True
)
if result2.returncode != 0:
raise RuntimeError(f"Safetensors to FP8 failed: {result2.stderr}")
# Create minimal config.json and tokenizer.json
config_path = os.path.join(output_dir, "config.json")
with open(config_path, "w") as f:
json.dump({
"model_type": "qwen",
"quantization": "fp8",
"architectures": ["QwenForCausalLM"]
}, f)
tokenizer_path = os.path.join(output_dir, "tokenizer.json")
with open(tokenizer_path, "w") as f:
json.dump({"model_type": "qwen", "vocab_size": 152064}, f)
progress(1.0, desc="Conversion to FP8 Safetensors complete!")
return True, "Conversion successful."
except Exception as e:
return False, str(e)
finally:
if 'temp_safetensors_dir' in locals():
shutil.rmtree(temp_safetensors_dir, ignore_errors=True)
# --- Main Processing Function ---
def process_and_upload(gguf_url, hf_token, new_repo_id, private_repo, progress=gr.Progress()):
if not all([gguf_url, hf_token, new_repo_id]):
return None, "❌ Error: Please fill in all fields.", ""
if not re.match(r"^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$", new_repo_id):
return None, "❌ Error: Invalid repository ID format. Use 'username/model-name'.", ""
temp_download_dir = tempfile.mkdtemp()
final_output_dir = tempfile.mkdtemp()
try:
# Authenticate
progress(0.05, desc="Logging into Hugging Face...")
api = HfApi(token=hf_token)
user_info = api.whoami()
user_name = user_info['name']
progress(0.1, desc=f"Logged in as {user_name}.")
# Parse URL
clean_url = gguf_url.strip()
if "huggingface.co" not in clean_url:
return None, "❌ Error: URL must be from Hugging Face.", ""
parts = clean_url.replace("https://huggingface.co/", "").split("/")
if len(parts) < 3 or not parts[-1].endswith(".gguf"):
return None, "❌ Error: Invalid GGUF URL format.", ""
repo_id = "/".join(parts[:2])
filename = parts[-1]
# Download
progress(0.15, desc="Downloading GGUF file...")
gguf_path = hf_hub_download(
repo_id=repo_id,
filename=filename,
cache_dir=temp_download_dir,
resume_download=True,
token=hf_token
)
progress(0.3, desc=f"Downloaded '{filename}'.")
# Convert
success, msg = convert_gguf_to_fp8_safetensors(gguf_path, final_output_dir, progress)
if not success:
return None, f"❌ Conversion failed: {msg}", ""
progress(0.8, desc="Preparing upload...")
# Create repo
repo_url = api.create_repo(
repo_id=new_repo_id,
private=private_repo,
repo_type="model",
exist_ok=True
)
# Generate README
readme_content = f"""---
license: other
library_name: transformers
tags:
- gguf
- fp8
- safetensors
- converted-by-gradio
- gguf-to-fp8
model-index:
- name: {new_repo_id.split('/')[-1]}
results: []
---
# Model Card for {new_repo_id}
Converted from GGUF:
- **Source:** `{gguf_url}`
- **Filename:** `{filename}`
## Conversion
Dequantized from GGUF and requantized to **FP8** using `gguf-connector`.
- **Converted by:** {user_name}
- **Date:** {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
"""
with open(os.path.join(final_output_dir, "README.md"), "w", encoding="utf-8") as f:
f.write(readme_content)
# Upload
progress(0.9, desc="Uploading to Hugging Face Hub...")
api.upload_folder(
repo_id=new_repo_id,
folder_path=final_output_dir,
repo_type="model",
token=hf_token,
commit_message="Upload FP8 Safetensors model converted via gguf-connector"
)
progress(1.0, desc="βœ… Upload complete!")
result_html = f"""
βœ… Success!
Your FP8 Safetensors model is ready.
**Repository:** [{new_repo_id}](https://huggingface.co/{new_repo_id})
**Visibility:** {'Private' if private_repo else 'Public'}
"""
return gr.HTML(result_html), "βœ… Conversion and upload completed!", ""
except Exception as e:
return None, f"❌ Unexpected error: {str(e)}", ""
finally:
shutil.rmtree(temp_download_dir, ignore_errors=True)
shutil.rmtree(final_output_dir, ignore_errors=True)
# --- Gradio Interface ---
with gr.Blocks(title="GGUF β†’ FP8 Safetensors Converter") as demo:
gr.Markdown("# πŸ”„ GGUF to FP8 Safetensors Converter")
gr.Markdown("Uses `gguf-connector` to dequantize GGUF β†’ Safetensors β†’ FP8, then uploads to your Hugging Face account.")
with gr.Row():
with gr.Column():
gguf_url = gr.Textbox(
label="GGUF File URL",
placeholder="https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/qwen3-4b.Q5_K_M.gguf",
info="Must be a direct .gguf file URL from Hugging Face."
)
hf_token = gr.Textbox(
label="Hugging Face Token",
type="password",
info="Token with write access. Get it at https://huggingface.co/settings/tokens"
)
with gr.Column():
new_repo_id = gr.Textbox(
label="New Repository ID",
placeholder="your-username/qwen3-4b-fp8",
info="Format: username/model-name"
)
private_repo = gr.Checkbox(label="Make Repository Private", value=False)
convert_btn = gr.Button("πŸš€ Convert & Upload", variant="primary")
with gr.Row():
status_output = gr.Markdown()
repo_link_output = gr.HTML()
convert_btn.click(
fn=process_and_upload,
inputs=[gguf_url, hf_token, new_repo_id, private_repo],
outputs=[repo_link_output, status_output],
show_progress=True
)
gr.Examples(
examples=[
["https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/qwen3-4b.Q5_K_M.gguf"]
],
inputs=[gguf_url]
)
demo.launch()