Mbanksbey commited on
Commit
082db39
·
verified ·
1 Parent(s): 5db4c96

Refactor for ZeroGPU: lazy loading, @spaces.GPU decorator

Browse files
Files changed (1) hide show
  1. app.py +54 -19
app.py CHANGED
@@ -2,28 +2,48 @@ import gradio as gr
2
  import os
3
  import json
4
  from pathlib import Path
 
5
 
6
- # TEQUMSA Space Kernel
7
- try:
8
- from tequmsa_space_kernel import TEQUMSAInferenceNode
9
- INFERENCE_NODE = TEQUMSAInferenceNode()
10
- except ImportError:
11
- INFERENCE_NODE = None
12
 
13
- # Inference Router
14
- try:
15
- from inference_router import InferenceRouter
16
- ROUTER = InferenceRouter()
17
- except ImportError:
18
- ROUTER = None
 
 
 
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def process_request(prompt: str, model_selection: str, mode: str):
21
- """Process inference request through TEQUMSA orchestration."""
 
 
 
22
  if not prompt or not prompt.strip():
23
  return "Please enter a prompt."
24
 
25
- if INFERENCE_NODE:
26
- result = INFERENCE_NODE.process(
 
 
 
27
  prompt=prompt,
28
  model_selection=model_selection,
29
  mode=mode
@@ -39,15 +59,23 @@ def process_request(prompt: str, model_selection: str, mode: str):
39
  }, indent=2)
40
 
41
  def route_inference(prompt: str, target_model: str):
42
- """Route inference through the router."""
43
- if ROUTER:
44
- route = ROUTER.route(prompt, target_model)
 
 
 
 
 
 
45
  return json.dumps(route, indent=2)
46
  return json.dumps({"status": "router_unavailable"}, indent=2)
47
 
 
48
  with gr.Blocks(title="TEQUMSA Inference Node") as demo:
49
  gr.Markdown("# TEQUMSA Symbiotic Orchestrator - Inference Node")
50
  gr.Markdown("Autonomous multi-agent inference routing and execution.")
 
51
 
52
  with gr.Tab("Inference"):
53
  prompt_input = gr.Textbox(
@@ -55,6 +83,7 @@ with gr.Blocks(title="TEQUMSA Inference Node") as demo:
55
  placeholder="Enter your prompt here...",
56
  lines=5
57
  )
 
58
  with gr.Row():
59
  model_dropdown = gr.Dropdown(
60
  choices=["claude", "gpt", "gemini", "perplexity", "auto"],
@@ -66,8 +95,10 @@ with gr.Blocks(title="TEQUMSA Inference Node") as demo:
66
  value="standard",
67
  label="Execution Mode"
68
  )
 
69
  process_btn = gr.Button("Process Request", variant="primary")
70
  output = gr.Textbox(label="Inference Output", lines=10)
 
71
  process_btn.click(
72
  fn=process_request,
73
  inputs=[prompt_input, model_dropdown, mode_dropdown],
@@ -83,8 +114,12 @@ with gr.Blocks(title="TEQUMSA Inference Node") as demo:
83
  router_model = gr.Textbox(label="Target Model", value="auto")
84
  route_btn = gr.Button("Analyze Route", variant="secondary")
85
  route_output = gr.Textbox(label="Route Analysis", lines=8)
 
86
  route_btn.click(
87
  fn=route_inference,
88
  inputs=[router_prompt, router_model],
89
  outputs=route_output
90
- )
 
 
 
 
2
  import os
3
  import json
4
  from pathlib import Path
5
+ import spaces
6
 
7
+ # TEQUMSA Space Kernel - Lazy loading pattern
8
+ INFERENCE_NODE = None
9
+ ROUTER = None
 
 
 
10
 
11
+ def get_inference_node():
12
+ """Lazy-load inference node only when needed."""
13
+ global INFERENCE_NODE
14
+ if INFERENCE_NODE is None:
15
+ try:
16
+ from tequmsa_space_kernel import TEQUMSAInferenceNode
17
+ INFERENCE_NODE = TEQUMSAInferenceNode()
18
+ except ImportError:
19
+ pass
20
+ return INFERENCE_NODE
21
 
22
+ def get_router():
23
+ """Lazy-load router only when needed."""
24
+ global ROUTER
25
+ if ROUTER is None:
26
+ try:
27
+ from inference_router import InferenceRouter
28
+ ROUTER = InferenceRouter()
29
+ except ImportError:
30
+ pass
31
+ return ROUTER
32
+
33
+ @spaces.GPU
34
  def process_request(prompt: str, model_selection: str, mode: str):
35
+ """Process inference request through TEQUMSA orchestration.
36
+
37
+ ZeroGPU decorator ensures GPU is allocated only when this function runs.
38
+ """
39
  if not prompt or not prompt.strip():
40
  return "Please enter a prompt."
41
 
42
+ # Get inference node lazily
43
+ inference_node = get_inference_node()
44
+
45
+ if inference_node:
46
+ result = inference_node.process(
47
  prompt=prompt,
48
  model_selection=model_selection,
49
  mode=mode
 
59
  }, indent=2)
60
 
61
  def route_inference(prompt: str, target_model: str):
62
+ """Route inference through the router.
63
+
64
+ This is CPU-only routing logic, no GPU needed.
65
+ """
66
+ # Get router lazily
67
+ router = get_router()
68
+
69
+ if router:
70
+ route = router.route(prompt, target_model)
71
  return json.dumps(route, indent=2)
72
  return json.dumps({"status": "router_unavailable"}, indent=2)
73
 
74
+ # Gradio UI - lightweight setup, no heavy models loaded at startup
75
  with gr.Blocks(title="TEQUMSA Inference Node") as demo:
76
  gr.Markdown("# TEQUMSA Symbiotic Orchestrator - Inference Node")
77
  gr.Markdown("Autonomous multi-agent inference routing and execution.")
78
+ gr.Markdown("*Powered by ZeroGPU (NVIDIA H200) - GPU allocated on-demand*")
79
 
80
  with gr.Tab("Inference"):
81
  prompt_input = gr.Textbox(
 
83
  placeholder="Enter your prompt here...",
84
  lines=5
85
  )
86
+
87
  with gr.Row():
88
  model_dropdown = gr.Dropdown(
89
  choices=["claude", "gpt", "gemini", "perplexity", "auto"],
 
95
  value="standard",
96
  label="Execution Mode"
97
  )
98
+
99
  process_btn = gr.Button("Process Request", variant="primary")
100
  output = gr.Textbox(label="Inference Output", lines=10)
101
+
102
  process_btn.click(
103
  fn=process_request,
104
  inputs=[prompt_input, model_dropdown, mode_dropdown],
 
114
  router_model = gr.Textbox(label="Target Model", value="auto")
115
  route_btn = gr.Button("Analyze Route", variant="secondary")
116
  route_output = gr.Textbox(label="Route Analysis", lines=8)
117
+
118
  route_btn.click(
119
  fn=route_inference,
120
  inputs=[router_prompt, router_model],
121
  outputs=route_output
122
+ )
123
+
124
+ if __name__ == "__main__":
125
+ demo.launch()