Spaces:

llaa33219
/

context-window-extender

Running on Zero

App Files Files Community

llaa33219 commited on Mar 8

Commit

2e2d23d

verified ·

1 Parent(s): adbe710

Upload 3 files

Browse files

Files changed (1) hide show

app.py +63 -11

app.py CHANGED Viewed

@@ -88,7 +88,7 @@ def load_model_with_extension(model_id, extension_method, new_context_length, ro
     return result
-@spaces.GPU(duration=120)
 def generate(model_id, extension_method, new_context_length, rope_type, rope_factor, prompt, max_new_tokens, temperature, top_p):
     if not model_id.strip():
         return "Error: Please enter a model ID"
@@ -142,15 +142,7 @@ with gr.Blocks(title="Context Window Extender - Chat") as demo:
                 ["deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"],
             ], inputs=model_id)
-        with gr.Column(scale=1):
-            # Context multiplier selector
-            context_multiplier = gr.Dropdown(
-                choices=["2x", "5x", "10x", "20x", "50x", "100x"],
-                value="2x",
-                label="📈 Context Multiplier",
-                info="Expand context window by this factor"
-            )
     with gr.Row():
         with gr.Column():
             extension_method = gr.Radio(
@@ -174,6 +166,66 @@ with gr.Blocks(title="Context Window Extender - Chat") as demo:
                 visible=True
             )
     # Show context info
     with gr.Row():
         base_ctx = gr.Number(value=32768, label="Base Context", interactive=False)
@@ -226,7 +278,7 @@ with gr.Blocks(title="Context Window Extender - Chat") as demo:
     gr.Markdown("### 💬 Chat with the Model")
     # Conversational chat interface
-    @spaces.GPU(duration=120)
     def respond(
         message: str,
         history: list,

     return result
+@spaces.GPU(duration=300)
 def generate(model_id, extension_method, new_context_length, rope_type, rope_factor, prompt, max_new_tokens, temperature, top_p):
     if not model_id.strip():
         return "Error: Please enter a model ID"
                 ["deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"],
             ], inputs=model_id)
+    # Define these first so they can be used in buttons
     with gr.Row():
         with gr.Column():
             extension_method = gr.Radio(
                 visible=True
             )
+    # Define context_multiplier BEFORE it's used in buttons
+    context_multiplier = gr.Dropdown(
+        choices=["2x", "5x", "10x", "20x", "50x", "100x"],
+        value="2x",
+        label="📈 Context Multiplier",
+        info="Expand context window by this factor"
+    )
+    with gr.Row():
+        with gr.Column(scale=2):
+            # Model selection
+            model_id = gr.Textbox(
+                value=DEFAULT_MODEL,
+                label="🤗 Model ID",
+                placeholder="Enter Hugging Face model ID..."
+            )
+            gr.Examples([
+                ["Qwen/Qwen3-30B-A3B-Thinking-2507"],
+                ["Qwen/Qwen2.5-1.5B-Instruct"],
+                ["Qwen/Qwen2.5-3B-Instruct"],
+                ["microsoft/phi-4-mini-instruct"],
+                ["deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"],
+            ], inputs=model_id)
+            with gr.Row():
+                download_btn = gr.Button("📥 Download Model", variant="secondary")
+                load_btn = gr.Button("🚀 Load Model", variant="primary")
+            model_status = gr.Textbox(label="Model Status", interactive=False)
+            # Download model function (runs outside ZeroGPU)
+            def download_model(mid):
+                if not mid.strip():
+                    return "Error: Please enter a model ID"
+                try:
+                    # Download tokenizer and config first
+                    from transformers import AutoTokenizer, AutoConfig
+                    tokenizer = AutoTokenizer.from_pretrained(mid, trust_remote_code=True)
+                    config = AutoConfig.from_pretrained(mid, trust_remote_code=True)
+                    return f"✅ Model downloaded: {mid}"
+                except Exception as e:
+                    return f"❌ Download failed: {str(e)}"
+            download_btn.click(download_model, inputs=[model_id], outputs=[model_status])
+            # Load model function (runs inside ZeroGPU)
+            @spaces.GPU(duration=300)
+            def load_model(mid, ext_method, ctx_mult, rt, rf):
+                if not mid.strip():
+                    return "Error: Please enter a model ID"
+                try:
+                    base_ctx = 32768
+                    new_ctx = calculate_context_length(base_ctx, ctx_mult)
+                    model_data = load_model_with_extension(mid, ext_method, new_ctx, rt, rf)
+                    return f"✅ Model loaded: {mid} (context: {new_ctx})"
+                except Exception as e:
+                    return f"❌ Load failed: {str(e)}"
+            load_btn.click(load_model, inputs=[model_id, extension_method, context_multiplier, rope_type, rope_factor], outputs=[model_status])
     # Show context info
     with gr.Row():
         base_ctx = gr.Number(value=32768, label="Base Context", interactive=False)
     gr.Markdown("### 💬 Chat with the Model")
     # Conversational chat interface
+    @spaces.GPU(duration=300)
     def respond(
         message: str,
         history: list,