rootlocalghost commited on
Commit
eb37dfe
Β·
verified Β·
1 Parent(s): a8a35df

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -0
app.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gc
3
+ import torch
4
+ import shutil
5
+ import uuid
6
+ import gradio as gr
7
+ from huggingface_hub import HfApi, hf_hub_download
8
+ from safetensors.torch import load_file, save_file
9
+
10
+ def convert_and_upload(token, source_repo, target_repo, precision, target_components):
11
+ if not token:
12
+ yield "❌ Error: Please provide a valid Hugging Face Write Token."
13
+ return
14
+ if not target_repo.strip() or "your-username" in target_repo:
15
+ yield "❌ Error: Please specify a valid Target Repository (e.g., your-username/repo-name)."
16
+ return
17
+ if not target_components:
18
+ yield "❌ Error: Please select at least one component to quantize."
19
+ return
20
+
21
+ # Map precision string to PyTorch dtype
22
+ if precision == "FP8":
23
+ target_dtype = torch.float8_e4m3fn
24
+ elif precision == "FP16":
25
+ target_dtype = torch.float16
26
+ elif precision == "BF16":
27
+ target_dtype = torch.bfloat16
28
+ else:
29
+ target_dtype = None
30
+
31
+ api = HfApi(token=token)
32
+ yield f"πŸ”„ Connecting to Hugging Face and verifying target repo: {target_repo}..."
33
+
34
+ try:
35
+ api.create_repo(repo_id=target_repo, exist_ok=True, private=False)
36
+ except Exception as e:
37
+ yield f"❌ Error checking/creating repo: {str(e)}\nMake sure your token has 'Write' permissions."
38
+ return
39
+
40
+ yield f"πŸ“‹ Fetching file list from {source_repo}..."
41
+ try:
42
+ files = api.list_repo_files(source_repo)
43
+ except Exception as e:
44
+ yield f"❌ Error fetching files: {str(e)}"
45
+ return
46
+
47
+ # Create a unique cache directory for this specific run to prevent disk bloat
48
+ cache_dir = f"./hf_cache_{uuid.uuid4().hex[:8]}"
49
+
50
+ success_count = 0
51
+ error_count = 0
52
+
53
+ for file in files:
54
+ # Detect large .safetensors files at the root level (just in case)
55
+ is_root_safetensor = "/" not in file and file.endswith(".safetensors")
56
+
57
+ if is_root_safetensor:
58
+ yield f"πŸ—‘οΈ Auto-skipping massive root model: {file}..."
59
+ try:
60
+ api.delete_file(path_in_repo=file, repo_id=target_repo, token=token, commit_message=f"Auto-deleted root file {file}")
61
+ yield f"βœ… Ensured {file} is removed from target repository."
62
+ except Exception:
63
+ pass
64
+ continue
65
+
66
+ yield f"⏳ Processing {file}..."
67
+
68
+ try:
69
+ os.makedirs(cache_dir, exist_ok=True)
70
+
71
+ # Download using the token to ensure gated/large files don't fail silently
72
+ local_path = hf_hub_download(
73
+ repo_id=source_repo,
74
+ filename=file,
75
+ cache_dir=cache_dir,
76
+ token=token
77
+ )
78
+
79
+ in_target_component = any(f"{comp}/" in file for comp in target_components)
80
+
81
+ if file.endswith(".safetensors") and in_target_component:
82
+ yield f"🧠 Quantizing {file} to {precision} (This will take a few minutes)..."
83
+
84
+ tensors = load_file(local_path)
85
+
86
+ if target_dtype:
87
+ keys = list(tensors.keys())
88
+ for k in keys:
89
+ if tensors[k].is_floating_point():
90
+ # Cast the tensor to target precision
91
+ tensors[k] = tensors[k].to(target_dtype)
92
+
93
+ converted_path = "converted.safetensors"
94
+ save_file(tensors, converted_path)
95
+
96
+ # Aggressive memory flush (Crucial for the 11.68GB transformer file)
97
+ del tensors
98
+ gc.collect()
99
+
100
+ yield f"☁️ Uploading {precision} version of {file}..."
101
+ api.upload_file(
102
+ path_or_fileobj=converted_path,
103
+ path_in_repo=file,
104
+ repo_id=target_repo,
105
+ commit_message=f"Upload {precision} quantized {file}"
106
+ )
107
+
108
+ os.remove(converted_path)
109
+
110
+ else:
111
+ yield f"☁️ Copying {file} as-is..."
112
+ api.upload_file(
113
+ path_or_fileobj=local_path,
114
+ path_in_repo=file,
115
+ repo_id=target_repo,
116
+ commit_message=f"Copy {file} from original repo"
117
+ )
118
+
119
+ success_count += 1
120
+
121
+ # EXTREME DISK CLEANUP: Nuke the cache directory after every file to prevent the 50GB Space Crash
122
+ if os.path.exists(cache_dir):
123
+ shutil.rmtree(cache_dir)
124
+
125
+ gc.collect()
126
+
127
+ except Exception as e:
128
+ error_count += 1
129
+ yield f"⚠️ Error processing {file}: {str(e)}\nSkipping to next file..."
130
+
131
+ # Final cleanup sweep
132
+ if os.path.exists(cache_dir):
133
+ shutil.rmtree(cache_dir)
134
+
135
+ yield f"βœ… Finished! Successfully processed {success_count} files. Errors encountered: {error_count}."
136
+
137
+ # Dynamic UI Update for Target Repo Name
138
+ def update_target_repo(username, source, precision):
139
+ user_prefix = username.strip() if username.strip() else "your-username"
140
+ model_name = source.split("/")[-1] if "/" in source else source
141
+ return f"{user_prefix}/{model_name}-{precision}"
142
+
143
+ # Build the Gradio UI
144
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
145
+ gr.Markdown("# πŸš€ LongCat Dedicated Quantizer")
146
+ gr.Markdown(
147
+ "Convert the **LongCat-Image** family of models to lower precisions (FP8, FP16, BF16).\n\n"
148
+ "**Memory & Disk Protection:** This tool is specifically tuned to survive the massive 11.68 GB single-file `transformer` "
149
+ "shard. It aggressively purges Hugging Face's download cache and PyTorch's RAM buffer after every single step to keep the free Space alive."
150
+ )
151
+
152
+ with gr.Row():
153
+ with gr.Column(scale=2):
154
+ hf_token = gr.Textbox(
155
+ label="Hugging Face Token (Write Access Required)",
156
+ type="password",
157
+ placeholder="hf_..."
158
+ )
159
+ hf_username = gr.Textbox(
160
+ label="Your Hugging Face Username",
161
+ placeholder="e.g., rootlocalghost"
162
+ )
163
+ # Locked down to LongCat models
164
+ source_repo = gr.Dropdown(
165
+ choices=[
166
+ "meituan-longcat/LongCat-Image-Edit-Turbo",
167
+ "meituan-longcat/LongCat-Image-Edit",
168
+ "meituan-longcat/LongCat-Image"
169
+ ],
170
+ value="meituan-longcat/LongCat-Image-Edit-Turbo",
171
+ label="Source Repository",
172
+ allow_custom_value=False
173
+ )
174
+
175
+ target_components = gr.CheckboxGroup(
176
+ choices=["text_encoder", "transformer", "vae"],
177
+ value=["text_encoder", "transformer"],
178
+ label="Components to Quantize",
179
+ info="Select which folders should be cast to the new precision. Unselected folders will be copied as-is."
180
+ )
181
+
182
+ precision = gr.Dropdown(
183
+ choices=["FP8", "FP16", "BF16"],
184
+ value="FP8",
185
+ label="Target Precision"
186
+ )
187
+ target_repo = gr.Textbox(
188
+ label="Target Repository (Auto-generated)",
189
+ value="your-username/LongCat-Image-Edit-Turbo-FP8",
190
+ interactive=True
191
+ )
192
+ start_btn = gr.Button("Start Quantization & Upload", variant="primary")
193
+
194
+ with gr.Column(scale=3):
195
+ output_log = gr.Textbox(
196
+ label="Operation Logs",
197
+ lines=20,
198
+ interactive=False,
199
+ max_lines=25
200
+ )
201
+
202
+ inputs_to_watch = [hf_username, source_repo, precision]
203
+ for inp in inputs_to_watch:
204
+ inp.change(
205
+ fn=update_target_repo,
206
+ inputs=inputs_to_watch,
207
+ outputs=[target_repo]
208
+ )
209
+
210
+ start_btn.click(
211
+ fn=convert_and_upload,
212
+ inputs=[hf_token, source_repo, target_repo, precision, target_components],
213
+ outputs=[output_log]
214
+ )
215
+
216
+ if __name__ == "__main__":
217
+ demo.launch()