llaa33219 commited on
Commit
2e2d23d
Β·
verified Β·
1 Parent(s): adbe710

Upload 3 files

Browse files
Files changed (1) hide show
  1. app.py +63 -11
app.py CHANGED
@@ -88,7 +88,7 @@ def load_model_with_extension(model_id, extension_method, new_context_length, ro
88
  return result
89
 
90
 
91
- @spaces.GPU(duration=120)
92
  def generate(model_id, extension_method, new_context_length, rope_type, rope_factor, prompt, max_new_tokens, temperature, top_p):
93
  if not model_id.strip():
94
  return "Error: Please enter a model ID"
@@ -142,15 +142,7 @@ with gr.Blocks(title="Context Window Extender - Chat") as demo:
142
  ["deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"],
143
  ], inputs=model_id)
144
 
145
- with gr.Column(scale=1):
146
- # Context multiplier selector
147
- context_multiplier = gr.Dropdown(
148
- choices=["2x", "5x", "10x", "20x", "50x", "100x"],
149
- value="2x",
150
- label="πŸ“ˆ Context Multiplier",
151
- info="Expand context window by this factor"
152
- )
153
-
154
  with gr.Row():
155
  with gr.Column():
156
  extension_method = gr.Radio(
@@ -174,6 +166,66 @@ with gr.Blocks(title="Context Window Extender - Chat") as demo:
174
  visible=True
175
  )
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  # Show context info
178
  with gr.Row():
179
  base_ctx = gr.Number(value=32768, label="Base Context", interactive=False)
@@ -226,7 +278,7 @@ with gr.Blocks(title="Context Window Extender - Chat") as demo:
226
  gr.Markdown("### πŸ’¬ Chat with the Model")
227
 
228
  # Conversational chat interface
229
- @spaces.GPU(duration=120)
230
  def respond(
231
  message: str,
232
  history: list,
 
88
  return result
89
 
90
 
91
+ @spaces.GPU(duration=300)
92
  def generate(model_id, extension_method, new_context_length, rope_type, rope_factor, prompt, max_new_tokens, temperature, top_p):
93
  if not model_id.strip():
94
  return "Error: Please enter a model ID"
 
142
  ["deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"],
143
  ], inputs=model_id)
144
 
145
+ # Define these first so they can be used in buttons
 
 
 
 
 
 
 
 
146
  with gr.Row():
147
  with gr.Column():
148
  extension_method = gr.Radio(
 
166
  visible=True
167
  )
168
 
169
+ # Define context_multiplier BEFORE it's used in buttons
170
+ context_multiplier = gr.Dropdown(
171
+ choices=["2x", "5x", "10x", "20x", "50x", "100x"],
172
+ value="2x",
173
+ label="πŸ“ˆ Context Multiplier",
174
+ info="Expand context window by this factor"
175
+ )
176
+
177
+ with gr.Row():
178
+ with gr.Column(scale=2):
179
+ # Model selection
180
+ model_id = gr.Textbox(
181
+ value=DEFAULT_MODEL,
182
+ label="πŸ€— Model ID",
183
+ placeholder="Enter Hugging Face model ID..."
184
+ )
185
+ gr.Examples([
186
+ ["Qwen/Qwen3-30B-A3B-Thinking-2507"],
187
+ ["Qwen/Qwen2.5-1.5B-Instruct"],
188
+ ["Qwen/Qwen2.5-3B-Instruct"],
189
+ ["microsoft/phi-4-mini-instruct"],
190
+ ["deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"],
191
+ ], inputs=model_id)
192
+
193
+ with gr.Row():
194
+ download_btn = gr.Button("πŸ“₯ Download Model", variant="secondary")
195
+ load_btn = gr.Button("πŸš€ Load Model", variant="primary")
196
+
197
+ model_status = gr.Textbox(label="Model Status", interactive=False)
198
+
199
+ # Download model function (runs outside ZeroGPU)
200
+ def download_model(mid):
201
+ if not mid.strip():
202
+ return "Error: Please enter a model ID"
203
+ try:
204
+ # Download tokenizer and config first
205
+ from transformers import AutoTokenizer, AutoConfig
206
+ tokenizer = AutoTokenizer.from_pretrained(mid, trust_remote_code=True)
207
+ config = AutoConfig.from_pretrained(mid, trust_remote_code=True)
208
+ return f"βœ… Model downloaded: {mid}"
209
+ except Exception as e:
210
+ return f"❌ Download failed: {str(e)}"
211
+
212
+ download_btn.click(download_model, inputs=[model_id], outputs=[model_status])
213
+
214
+ # Load model function (runs inside ZeroGPU)
215
+ @spaces.GPU(duration=300)
216
+ def load_model(mid, ext_method, ctx_mult, rt, rf):
217
+ if not mid.strip():
218
+ return "Error: Please enter a model ID"
219
+ try:
220
+ base_ctx = 32768
221
+ new_ctx = calculate_context_length(base_ctx, ctx_mult)
222
+ model_data = load_model_with_extension(mid, ext_method, new_ctx, rt, rf)
223
+ return f"βœ… Model loaded: {mid} (context: {new_ctx})"
224
+ except Exception as e:
225
+ return f"❌ Load failed: {str(e)}"
226
+
227
+ load_btn.click(load_model, inputs=[model_id, extension_method, context_multiplier, rope_type, rope_factor], outputs=[model_status])
228
+
229
  # Show context info
230
  with gr.Row():
231
  base_ctx = gr.Number(value=32768, label="Base Context", interactive=False)
 
278
  gr.Markdown("### πŸ’¬ Chat with the Model")
279
 
280
  # Conversational chat interface
281
+ @spaces.GPU(duration=300)
282
  def respond(
283
  message: str,
284
  history: list,