Spaces:

silveroxides
/

Quick-Quantize

Running on Zero

App Files Files Community

silveroxides commited on 1 day ago

Commit

13e5408

verified ·

1 Parent(s): de0e007

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +53 -35

app.py CHANGED Viewed

@@ -17,20 +17,17 @@ def run_quantization(
     layer_filter,
     exclude_layers_regex,
     full_precision_matrix_mult,
-    oauth_token: gr.OAuthToken | None = None
 ):
-    if not oauth_token:
-        yield "Please sign in with Hugging Face using the login button in the top right.", ""
-        return
     if not all([source_repo, source_file, target_repo, target_filename_base]):
-        yield "Please fill in all repository and filename fields.", ""
         return
     try:
         # Download
-        yield f"Downloading {source_file} from {source_repo}...", ""
-        local_input_path = hf_hub_download(repo_id=source_repo, filename=source_file, token=oauth_token.token)
         # Setup quant arguments based on UI
         quant_args = {
@@ -74,40 +71,45 @@ def run_quantization(
         if exclude_layers_regex:
             quant_args["exclude-layers"] = exclude_layers_regex
-        yield f"Quantizing to {output_filename}...\nThis may take a few minutes.", ""
-        # We need to print output or capture it, convert_to_quant probably prints. We'll just run it.
         do_quantize(quant_args)
-        yield f"Uploading {output_filename} to {target_repo}...", ""
-        # Upload
-        api = HfApi(token=oauth_token.token)
-        commit_info = api.upload_file(
-            path_or_fileobj=output_path,
-            path_in_repo=output_filename,
-            repo_id=target_repo,
-            commit_message=f"Add {output_filename} quantized model",
-            create_pr=True
-        )
-        pr_url = commit_info.pr_url if hasattr(commit_info, 'pr_url') else f"https://huggingface.co/{target_repo}"
-        yield f"Complete! Uploaded to {target_repo}", f"<a href='{pr_url}' target='_blank' style='color: #3b82f6; text-decoration: underline; font-weight: bold;'>Click here to view the Pull Request</a>"
     except Exception as e:
-        yield f"Error: {str(e)}", ""
 # Build UI
-with gr.Blocks(css_paths=["assets/responsive.css"], theme=gr.themes.Soft()) as demo:
     with gr.Row(elem_id="topbar"):
         gr.Markdown("## 🤗 Model Quantizer", elem_classes=["brand"])
-        gr.LoginButton()
     with gr.Row(elem_id="main-row", equal_height=True):
         with gr.Column(scale=4, min_width=280, elem_id="input-panel"):
             gr.Markdown("### Input Model")
             source_repo = gr.Textbox(label="Source HF Repo (e.g. author/model)")
             source_file = gr.Textbox(label="Source Filename (e.g. model.safetensors)")
@@ -131,22 +133,38 @@ with gr.Blocks(css_paths=["assets/responsive.css"], theme=gr.themes.Soft()) as d
             exclude_layers = gr.Textbox(label="Exclude Layers Regex (Optional)", placeholder="(substring_1|substring_2)")
-            run_btn = gr.Button("Quantize & Upload PR", variant="primary", size="lg")
             gr.Markdown("ℹ️ *For more advanced quantization modes, install and use [convert-to-quant](https://pypi.org/project/convert-to-quant/) locally.*", elem_classes=["text-sm", "mt-4"])
         with gr.Column(scale=8, elem_id="output-panel"):
             status_text = gr.Textbox(label="Status Log", lines=10, interactive=False)
-            pr_link = gr.HTML("")
     run_btn.click(
-        fn=run_quantization,
         inputs=[
             source_repo, source_file, target_repo, target_file_base,
-            quant_format, layer_filter, exclude_layers, full_precision
         ],
-        outputs=[status_text, pr_link]
     )
 if __name__ == "__main__":
-    demo.launch()

     layer_filter,
     exclude_layers_regex,
     full_precision_matrix_mult,
+    hf_token
 ):
     if not all([source_repo, source_file, target_repo, target_filename_base]):
+        yield "Please fill in all repository and filename fields.", gr.update(visible=False)
         return
     try:
         # Download
+        yield f"Downloading {source_file} from {source_repo}...", gr.update(visible=False)
+        # We use token for download if provided, otherwise anonymous
+        local_input_path = hf_hub_download(repo_id=source_repo, filename=source_file, token=hf_token if hf_token else None)
         # Setup quant arguments based on UI
         quant_args = {
         if exclude_layers_regex:
             quant_args["exclude-layers"] = exclude_layers_regex
+        yield f"Quantizing to {output_filename}...\nThis may take a few minutes.", gr.update(visible=False)
         do_quantize(quant_args)
+        if hf_token:
+            yield f"Uploading {output_filename} to {target_repo}...", gr.update(visible=False)
+            # Upload
+            api = HfApi(token=hf_token)
+            commit_info = api.upload_file(
+                path_or_fileobj=output_path,
+                path_in_repo=output_filename,
+                repo_id=target_repo,
+                commit_message=f"Add {output_filename} quantized model",
+                create_pr=True
+            )
+            pr_url = commit_info.pr_url if hasattr(commit_info, 'pr_url') else f"https://huggingface.co/{target_repo}"
+            yield f"Complete! Uploaded to {target_repo}", gr.update(value=f"<a href='{pr_url}' target='_blank' style='color: #3b82f6; text-decoration: underline; font-weight: bold;'>Click here to view the Pull Request</a>", visible=True)
+        else:
+            yield f"Complete! Ready for download below.", gr.update(value=output_path, visible=True)
     except Exception as e:
+        yield f"Error: {str(e)}", gr.update(visible=False)
 # Build UI
+with gr.Blocks() as demo:
     with gr.Row(elem_id="topbar"):
         gr.Markdown("## 🤗 Model Quantizer", elem_classes=["brand"])
     with gr.Row(elem_id="main-row", equal_height=True):
         with gr.Column(scale=4, min_width=280, elem_id="input-panel"):
+            gr.Markdown("### Authentication (Optional)")
+            hf_token = gr.Textbox(label="HF Token (WRITE)", type="password", placeholder="Paste your WRITE token for PR upload")
+            gr.Markdown("*If no token is provided, the quantized model will be available for direct download instead of uploading as a PR.*", elem_classes=["text-sm"])
             gr.Markdown("### Input Model")
             source_repo = gr.Textbox(label="Source HF Repo (e.g. author/model)")
             source_file = gr.Textbox(label="Source Filename (e.g. model.safetensors)")
             exclude_layers = gr.Textbox(label="Exclude Layers Regex (Optional)", placeholder="(substring_1|substring_2)")
+            run_btn = gr.Button("Quantize Model", variant="primary", size="lg")
             gr.Markdown("ℹ️ *For more advanced quantization modes, install and use [convert-to-quant](https://pypi.org/project/convert-to-quant/) locally.*", elem_classes=["text-sm", "mt-4"])
         with gr.Column(scale=8, elem_id="output-panel"):
             status_text = gr.Textbox(label="Status Log", lines=10, interactive=False)
+            output_link = gr.HTML(visible=False)
+            output_file = gr.File(label="Download Quantized Model", visible=False)
+    def route_output(*args):
+        # We handle routing the result to HTML vs File based on whether token is provided.
+        # However, it's easier to just yield both appropriately.
+        for status, result in run_quantization(*args):
+            # If the result is a string with <a href=, it's HTML. Otherwise it's a file path.
+            if result and isinstance(result, dict) and "value" in result:
+                val = result["value"]
+                if isinstance(val, str) and val.startswith("<a"):
+                    yield status, result, gr.update(visible=False)
+                else:
+                    yield status, gr.update(visible=False), result
+            else:
+                yield status, gr.update(visible=False), gr.update(visible=False)
     run_btn.click(
+        fn=route_output,
         inputs=[
             source_repo, source_file, target_repo, target_file_base,
+            quant_format, layer_filter, exclude_layers, full_precision,
+            hf_token
         ],
+        outputs=[status_text, output_link, output_file]
     )
 if __name__ == "__main__":
+    demo.launch(css_paths=["assets/responsive.css"], theme=gr.themes.Soft())