Vishwas1 commited on
Commit
17f949e
·
verified ·
1 Parent(s): e4e0279

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ from huggingface_hub import HfApi
6
+
7
+ def prune_to_single_layer_and_push(base_model_id: str, new_repo_id: str, make_private: bool):
8
+ hf_token = os.getenv("HF_TOKEN")
9
+ if not hf_token:
10
+ return "❌ Set HF_TOKEN secret in Space Settings first!"
11
+
12
+ if not base_model_id or not new_repo_id:
13
+ return "❌ Fill model and repo name"
14
+
15
+ try:
16
+ # Load safely on CPU (free Space friendly)
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ base_model_id,
19
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
20
+ device_map="cpu",
21
+ trust_remote_code=True,
22
+ low_cpu_mem_usage=True
23
+ )
24
+ tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
25
+
26
+ # === SINGLE LAYER MAGIC ===
27
+ if hasattr(model, "model") and hasattr(model.model, "layers"):
28
+ model.model.layers = torch.nn.ModuleList([model.model.layers[-1]]) # keep LAST layer only
29
+ model.config.num_hidden_layers = 1
30
+ else:
31
+ return "❌ Model type not supported (needs standard .model.layers)"
32
+
33
+ # Push
34
+ model.push_to_hub(new_repo_id, private=make_private, token=hf_token, safe_serialization=True)
35
+ tokenizer.push_to_hub(new_repo_id, private=make_private, token=hf_token)
36
+
37
+ link = f"https://huggingface.co/{new_repo_id}"
38
+ return f"""✅ SUCCESS!
39
+
40
+ Single-layer model is live at:
41
+ **{link}**
42
+
43
+ Size: ~0.3–0.8 GB → runs at 40–100+ tokens/s on any CPU!
44
+
45
+ Load it anywhere:
46
+ ```python
47
+ from transformers import AutoModelForCausalLM
48
+ model = AutoModelForCausalLM.from_pretrained("{new_repo_id}")