spanofzero commited on
Commit
836f78e
·
verified ·
1 Parent(s): 2ef87cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -52
app.py CHANGED
@@ -1,75 +1,87 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
  import os
4
 
 
5
  HF_TOKEN = os.getenv("HF_TOKEN")
6
 
7
- # Utilizing the conversational task through chat_completion
8
  client = InferenceClient("Qwen/Qwen2.5-7B-Instruct", token=HF_TOKEN)
9
 
10
  class StateController:
 
 
11
  def __init__(self):
12
- self.state_array = [0] * 121
13
- self.base_metric = 60
14
- self.batch_unit = 10
15
- self.memory_register = {}
 
 
 
 
 
 
 
 
16
 
17
- def initialize_grid(self):
 
 
18
  for i in range(51):
19
- self.state_array[i] = {"Blocks": i // self.batch_unit, "Units": i % self.batch_unit}
20
- return "Grid initialized: 5 active blocks."
 
 
21
 
22
- def render_grid(self):
23
- grid_output = ""
24
- for i in range(121):
25
- if i == 120:
26
- grid_output += " [NODE_120] "
27
- elif i % 10 == 0:
28
- grid_output += "<"
29
- else:
30
- grid_output += "."
31
- return grid_output
 
32
 
33
- def resolve_grid(self):
34
- self.memory_register["STATUS"] = "RESOLVED"
35
- self.state_array = [0] * 121
36
- return "System resolved. State array reset to zero."
 
 
37
 
38
- def generate_response(message, history):
39
  # Hardware diagnostic override
40
- if "run grid diagnostic" in message.lower():
41
- controller = StateController()
42
- output = "Diagnostic sequence initiated.\n\n"
43
- output += f"{controller.initialize_grid()}\n\n"
44
- output += "Rendering 121-point array:\n"
45
- output += f"{controller.render_grid()}\n\n"
46
- output += "Executing state resolution:\n"
47
- output += f"{controller.resolve_grid()}"
48
- return output
49
 
50
- system_instruction = (
51
- "You are a logic-focused inference engine. "
52
- "You utilize strict state-hold memory and parallel integer blocks. "
53
- "Provide direct, technical, and accurate responses."
54
- )
55
-
56
- # Correct format for conversational task
57
- messages = [{"role": "system", "content": system_instruction}]
58
- for user_msg, assistant_msg in history:
59
- messages.append({"role": "user", "content": user_msg})
60
- messages.append({"role": "assistant", "content": assistant_msg})
61
- messages.append({"role": "user", "content": message})
62
 
63
  try:
64
- # Switching to chat_completion for model compatibility
65
- response = client.chat_completion(
66
  messages,
67
  max_tokens=1024,
68
- stream=False
 
69
  )
70
- return response.choices[0].message.content
71
- except Exception as error:
72
- return f"System Error: {str(error)}. Verify your token permissions."
 
 
 
 
73
 
74
  custom_css = """
75
  body, .gradio-container { background-color: #0b0f19 !important; }
@@ -87,8 +99,10 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), css=custom_css) as demo
87
  "Run grid diagnostic",
88
  "Calculate the integer distribution for 120 units across 3 nodes.",
89
  "Explain network latency using technical terminology."
90
- ]
 
91
  )
92
 
93
  if __name__ == "__main__":
94
- demo.launch()
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from array import array
4
  import os
5
 
6
+ # Securely retrieve the token from Space secrets
7
  HF_TOKEN = os.getenv("HF_TOKEN")
8
 
9
+ # Initialize the inference client
10
  client = InferenceClient("Qwen/Qwen2.5-7B-Instruct", token=HF_TOKEN)
11
 
12
  class StateController:
13
+ __slots__ = ("_state", "_metric", "_batch", "_reg", "_rendered")
14
+
15
  def __init__(self):
16
+ self._state = array("B", [0]) * 121 # unsigned 8-bit array
17
+ self._metric = 60
18
+ self._batch = 10
19
+ self._reg = {}
20
+ self._rendered = self._build_render() # cached string representation
21
+
22
+ def _build_render(self) -> str:
23
+ """Construct the fixed 121-point visualization once during initialization."""
24
+ return "".join(
25
+ " [NODE_120] " if i == 120 else ("<" if i % 10 == 0 else ".")
26
+ for i in range(121)
27
+ )
28
 
29
+ def diagnostic(self) -> str:
30
+ """Execute diagnostic sequence and return the formatted status report."""
31
+ # Update state array for diagnostic verification
32
  for i in range(51):
33
+ self._state[i] = i % self._batch
34
+
35
+ self._reg.clear()
36
+ self._reg["STATUS"] = "RESOLVED"
37
 
38
+ return (
39
+ "Diagnostic sequence initiated.\n\n"
40
+ "Grid initialized: 5 active blocks.\n\n"
41
+ "Rendering 121-point array:\n"
42
+ f"{self._rendered}\n\n"
43
+ "Executing state resolution:\n"
44
+ "System resolved. State array reset to zero."
45
+ )
46
+
47
+ # Global singleton instance for resource reuse
48
+ controller = StateController()
49
 
50
+ SYSTEM_MSG = [
51
+ {"role": "system",
52
+ "content": ("You are a logic-focused inference engine. "
53
+ "Utilize strict state-hold memory and parallel integer blocks. "
54
+ "Provide direct, technical, and accurate responses.")}
55
+ ]
56
 
57
+ def generate_response(message: str, history: list):
58
  # Hardware diagnostic override
59
+ if message.lower().strip() == "run grid diagnostic":
60
+ yield controller.diagnostic()
61
+ return
 
 
 
 
 
 
62
 
63
+ # Utilize Gradio history directly to avoid redundant list processing
64
+ messages = SYSTEM_MSG + [
65
+ {"role": "user", "content": turn[0]},
66
+ {"role": "assistant", "content": turn[1]}
67
+ for turn in history
68
+ ] + [{"role": "user", "content": message}]
 
 
 
 
 
 
69
 
70
  try:
71
+ # Enable streaming for reduced time-to-first-token
72
+ stream = client.chat_completion(
73
  messages,
74
  max_tokens=1024,
75
+ stream=True,
76
+ temperature=0.15
77
  )
78
+ partial_response = ""
79
+ for chunk in stream:
80
+ token = chunk.choices[0].delta.content or ""
81
+ partial_response += token
82
+ yield partial_response
83
+ except Exception as exc:
84
+ yield f"System Error: {str(exc)}. Verify API token and permissions."
85
 
86
  custom_css = """
87
  body, .gradio-container { background-color: #0b0f19 !important; }
 
99
  "Run grid diagnostic",
100
  "Calculate the integer distribution for 120 units across 3 nodes.",
101
  "Explain network latency using technical terminology."
102
+ ],
103
+ cache_examples=False
104
  )
105
 
106
  if __name__ == "__main__":
107
+ # Launch with queue enabled for streaming support
108
+ demo.queue().launch(show_api=False)