Vishwas1 commited on
Commit
8449b1c
Β·
verified Β·
1 Parent(s): 80b56c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -14,7 +14,6 @@ def prune_to_single_layer(model_id: str):
14
  status_lines = []
15
  status_lines.append(f"Loading base model: {model_id}")
16
 
17
- # Initialize as None to prevent UnboundLocalError in the finally block
18
  model = None
19
  tokenizer = None
20
 
@@ -65,8 +64,8 @@ def prune_to_single_layer(model_id: str):
65
  outputs = model.generate(
66
  **inputs.to(model.device),
67
  max_new_tokens=40,
68
- do_sample=False, # Temperature removed to prevent conflict
69
- pad_token_id=tokenizer.eos_token_id # Prevents warnings/crashes on Llama
70
  )
71
  text = tokenizer.decode(outputs[0], skip_special_tokens=True)
72
  status_lines.append("\nQuick generation test (should be at least semi-coherent):")
@@ -86,7 +85,6 @@ def prune_to_single_layer(model_id: str):
86
  return "\n".join(status_lines) + f"\n\n❌ Failed: {err_msg}"
87
 
88
  finally:
89
- # Safely try to free memory even on failure
90
  if model is not None:
91
  del model
92
  if tokenizer is not None:
@@ -98,7 +96,8 @@ def prune_to_single_layer(model_id: str):
98
  # ────────────────────────────────────────────────
99
  CSS = """.gradio-container { max-width: 780px !important; }"""
100
 
101
- with gr.Blocks(title="Minimal Single-Layer Pruner", css=CSS, theme=gr.themes.Default()) as demo:
 
102
  gr.Markdown("""
103
  # Single-Layer Pruner (test version)
104
  Loads a small model β†’ keeps **only the last layer** β†’ shows result + quick generation test.
@@ -117,11 +116,11 @@ with gr.Blocks(title="Minimal Single-Layer Pruner", css=CSS, theme=gr.themes.Def
117
  value="Qwen/Qwen2.5-0.5B-Instruct"
118
  )
119
 
 
120
  status = gr.Textbox(
121
  label="Pruning log",
122
  lines=18,
123
- interactive=False,
124
- show_copy_button=True
125
  )
126
 
127
  btn = gr.Button("Prune to 1 layer β†’ Test", variant="primary")
@@ -140,4 +139,5 @@ with gr.Blocks(title="Minimal Single-Layer Pruner", css=CSS, theme=gr.themes.Def
140
  """)
141
 
142
  if __name__ == "__main__":
143
- demo.launch()
 
 
14
  status_lines = []
15
  status_lines.append(f"Loading base model: {model_id}")
16
 
 
17
  model = None
18
  tokenizer = None
19
 
 
64
  outputs = model.generate(
65
  **inputs.to(model.device),
66
  max_new_tokens=40,
67
+ do_sample=False,
68
+ pad_token_id=tokenizer.eos_token_id
69
  )
70
  text = tokenizer.decode(outputs[0], skip_special_tokens=True)
71
  status_lines.append("\nQuick generation test (should be at least semi-coherent):")
 
85
  return "\n".join(status_lines) + f"\n\n❌ Failed: {err_msg}"
86
 
87
  finally:
 
88
  if model is not None:
89
  del model
90
  if tokenizer is not None:
 
96
  # ────────────────────────────────────────────────
97
  CSS = """.gradio-container { max-width: 780px !important; }"""
98
 
99
+ # FIX 1: Removed css and theme from Blocks()
100
+ with gr.Blocks(title="Minimal Single-Layer Pruner") as demo:
101
  gr.Markdown("""
102
  # Single-Layer Pruner (test version)
103
  Loads a small model β†’ keeps **only the last layer** β†’ shows result + quick generation test.
 
116
  value="Qwen/Qwen2.5-0.5B-Instruct"
117
  )
118
 
119
+ # FIX 2: Removed show_copy_button=True
120
  status = gr.Textbox(
121
  label="Pruning log",
122
  lines=18,
123
+ interactive=False
 
124
  )
125
 
126
  btn = gr.Button("Prune to 1 layer β†’ Test", variant="primary")
 
139
  """)
140
 
141
  if __name__ == "__main__":
142
+ # FIX 3: Moved css and theme into launch()
143
+ demo.launch(css=CSS, theme=gr.themes.Default())