Spaces:

Alovestocode
/

ZeroGPU-LLM-Inference

Sleeping

Alikestocode commited on Nov 8, 2025

Commit

a217627

1 Parent(s): 9a4d6d3

Fix: Remove context manager usage for spaces.GPU decorator

- spaces.GPU is a decorator, not a context manager
- Use decorator with maximum duration (1800s) to allow user flexibility
- Keep gpu_duration parameter for user awareness/UI
- Fix AttributeError: __enter__ error

Files changed (1) hide show

app.py +13 -10

app.py CHANGED Viewed

@@ -291,9 +291,7 @@ def _generate_router_plan_streaming_internal(
         yield "", {}, f"❌ Invalid model choice: {model_choice}. Available: {list(MODELS.keys())}", ""
         return
-    # Use GPU context manager with user-specified duration
-    with spaces.GPU(duration=gpu_duration):
-        try:
             prompt = build_router_prompt(
                 user_task=user_task,
                 context=context,
@@ -382,14 +380,14 @@ def _generate_router_plan_streaming_internal(
                     parsed_plan = {}
                     validation_msg = f"❌ JSON parsing failed: {exc}"
-            yield completion, parsed_plan, validation_msg, prompt
-        except Exception as exc:
-            error_msg = f"❌ Generation failed: {str(exc)}"
-            yield "", {}, error_msg, ""
-@spaces.GPU(duration=600)  # Default wrapper for backward compatibility
 def generate_router_plan_streaming(
     user_task: str,
     context: str,
@@ -403,7 +401,12 @@ def generate_router_plan_streaming(
     top_p: float,
     gpu_duration: int = 600,
 ):
-    """Wrapper function that calls internal generator with GPU duration."""
     yield from _generate_router_plan_streaming_internal(
         user_task, context, acceptance, extra_guidance,
         difficulty, tags, model_choice, max_new_tokens,

         yield "", {}, f"❌ Invalid model choice: {model_choice}. Available: {list(MODELS.keys())}", ""
         return
+    try:
             prompt = build_router_prompt(
                 user_task=user_task,
                 context=context,
                     parsed_plan = {}
                     validation_msg = f"❌ JSON parsing failed: {exc}"
+        yield completion, parsed_plan, validation_msg, prompt
+    except Exception as exc:
+        error_msg = f"❌ Generation failed: {str(exc)}"
+        yield "", {}, error_msg, ""
+@spaces.GPU(duration=1800)  # Use maximum duration to allow user flexibility
 def generate_router_plan_streaming(
     user_task: str,
     context: str,
     top_p: float,
     gpu_duration: int = 600,
 ):
+    """
+    Generate router plan with streaming output.
+    Note: gpu_duration parameter is for user awareness. The actual GPU allocation
+    uses the decorator's duration (1800s max) to allow flexibility.
+    """
     yield from _generate_router_plan_streaming_internal(
         user_task, context, acceptance, extra_guidance,
         difficulty, tags, model_choice, max_new_tokens,