Spaces:

rootlocalghost
/

Z-Image-Model-Quantizer

Running

App Files Files Community

rootlocalghost commited on 8 days ago

Commit

98c9be7

verified ·

1 Parent(s): 26a6e34

Create app.py

Browse files

Files changed (1) hide show

app.py +142 -0

app.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import os
+import gc
+import torch
+import shutil
+import gradio as gr
+from huggingface_hub import HfApi, hf_hub_download
+from safetensors.torch import load_file, save_file
+SOURCE_REPO = "Tongyi-MAI/Z-Image-Turbo"
+TARGET_REPO = "rootlocalghost/Z-Image-Turbo-FP8"
+TEMP_DIR = "temp_processing_dir"
+def convert_and_upload(token):
+    if not token:
+        yield "❌ Error: Please provide a valid Hugging Face Write Token."
+        return
+    api = HfApi(token=token)
+    yield f"🔄 Connecting to Hugging Face and verifying target repo: {TARGET_REPO}..."
+    # Ensure the target repo exists, create it if it doesn't
+    try:
+        api.create_repo(repo_id=TARGET_REPO, exist_ok=True, private=False)
+    except Exception as e:
+        yield f"❌ Error checking/creating repo: {str(e)}\nMake sure your token has 'Write' permissions."
+        return
+    yield "📋 Fetching file list from the source repository..."
+    try:
+        files = api.list_repo_files(SOURCE_REPO)
+    except Exception as e:
+        yield f"❌ Error fetching files: {str(e)}"
+        return
+    # Create a temporary directory for safe local processing
+    os.makedirs(TEMP_DIR, exist_ok=True)
+    for file in files:
+        yield f"⏳ Processing {file}..."
+        try:
+            # Download file locally without using the central symlink cache
+            # This is critical to prevent the 50GB Space disk from filling up
+            local_path = hf_hub_download(
+                repo_id=SOURCE_REPO,
+                filename=file,
+                local_dir=TEMP_DIR,
+                local_dir_use_symlinks=False
+            )
+            # Check if it's a safetensor file inside the target directories
+            if file.endswith(".safetensors") and ("text_encoder/" in file or "transformer/" in file):
+                yield f"🧠 Quantizing {file} to FP8 (This may take a minute)..."
+                # Load tensors into RAM
+                tensors = load_file(local_path)
+                # Cast all floating point tensors to FP8
+                keys = list(tensors.keys())
+                for k in keys:
+                    if tensors[k].is_floating_point():
+                        tensors[k] = tensors[k].to(torch.float8_e4m3fn)
+                # Save the quantized tensors to a new temp file
+                converted_path = os.path.join(TEMP_DIR, "converted.safetensors")
+                save_file(tensors, converted_path)
+                # Wipe the tensors from RAM immediately to stay under the 16GB limit
+                del tensors
+                gc.collect()
+                yield f"☁️ Uploading FP8 version of {file}..."
+                api.upload_file(
+                    path_or_fileobj=converted_path,
+                    path_in_repo=file,
+                    repo_id=TARGET_REPO,
+                    commit_message=f"Upload FP8 quantized {file}"
+                )
+                # Clean up the converted file
+                os.remove(converted_path)
+            else:
+                yield f"☁️ Copying {file} as-is..."
+                api.upload_file(
+                    path_or_fileobj=local_path,
+                    path_in_repo=file,
+                    repo_id=TARGET_REPO,
+                    commit_message=f"Copy {file} from original repo"
+                )
+            # Delete the downloaded original file to free up disk space
+            if os.path.exists(local_path):
+                os.remove(local_path)
+            # Final sweep of memory before the next file
+            gc.collect()
+        except Exception as e:
+            yield f"⚠️ Error processing {file}: {str(e)}\nSkipping to next file..."
+    # Clean up the processing directory
+    if os.path.exists(TEMP_DIR):
+        shutil.rmtree(TEMP_DIR)
+    yield "✅ All files processed and successfully uploaded to your repository!"
+# Build the Gradio Web Interface
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🚀 Z-Image-Turbo FP8 Quantizer & Uploader")
+    gr.Markdown(
+        f"This tool sequentially downloads files from `{SOURCE_REPO}`, quantizes the **text_encoder** and **transformer** "
+        f"`.safetensors` files to FP8 (`float8_e4m3fn`), and uploads everything to `{TARGET_REPO}`.\n\n"
+        "**Note:** Because we are using a free Space (2 vCPUs, 16GB RAM), this script is designed to process one file at a time "
+        "and aggressively clear memory/disk caches. It will take some time, but it won't crash."
+    )
+    with gr.Row():
+        with gr.Column(scale=2):
+            hf_token = gr.Textbox(
+                label="Hugging Face Token (Needs Write Access)",
+                type="password",
+                placeholder="hf_..."
+            )
+            start_btn = gr.Button("Start Quantization & Upload", variant="primary")
+        with gr.Column(scale=3):
+            output_log = gr.Textbox(
+                label="Operation Logs",
+                lines=15,
+                interactive=False,
+                max_lines=20
+            )
+    start_btn.click(
+        fn=convert_and_upload,
+        inputs=[hf_token],
+        outputs=[output_log]
+    )
+if __name__ == "__main__":
+    demo.launch()