Vishwas1 commited on
Commit
804d64f
·
verified ·
1 Parent(s): 7d4c953

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +53 -19
  2. model_manager.py +50 -36
app.py CHANGED
@@ -28,7 +28,13 @@ if "messages" not in st.session_state:
28
  # Sidebar for controls and monitoring
29
  with st.sidebar:
30
  st.header("⚙️ Settings")
31
- model_id = st.selectbox("Select Model", ["microsoft/bitnet-b1.58-3B", "microsoft/bitnet-b1.58-large-4t"])
 
 
 
 
 
 
32
 
33
  st.header("📈 System Resources")
34
  cpu_usage = psutil.cpu_percent()
@@ -41,10 +47,19 @@ with st.sidebar:
41
  </div>
42
  """, unsafe_allow_html=True)
43
 
44
- if st.button("Initialize Engine"):
45
- manager = BitNetManager()
46
- if manager.setup_engine():
47
- st.success("BitNet.cpp Ready!")
 
 
 
 
 
 
 
 
 
48
 
49
  # Main Chat Interface
50
  for message in st.session_state.messages:
@@ -52,24 +67,43 @@ for message in st.session_state.messages:
52
  st.markdown(message["content"])
53
 
54
  if prompt := st.chat_input("Ask me anything..."):
55
- st.session_state.messages.append({"role": "user", "content": prompt})
56
- with st.chat_message("user"):
57
- st.markdown(prompt)
58
-
59
- with st.chat_message("assistant"):
60
- message_placeholder = st.empty()
61
- full_response = ""
62
 
63
- # In a real HF space, we'd trigger the inference runner here
64
- # For now, we simulate the logic with a placeholder as compilation takes time
65
- manager = BitNetManager()
66
- model_path = manager.download_model(model_id=model_id)
 
 
 
67
 
68
  if model_path:
69
  st.info("Generating response using 1-bit kernels...")
70
- # Note: Streaming implementation would capture stdout from the process
71
- full_response = "Engine is currently setting up the 1-bit kernels. Once deployed to HF Spaces with Clang 18, I will be able to stream responses at 10+ tokens/sec on this CPU!"
72
- message_placeholder.markdown(full_response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  else:
74
  st.error("Model not available.")
75
 
 
28
  # Sidebar for controls and monitoring
29
  with st.sidebar:
30
  st.header("⚙️ Settings")
31
+ # Corrected IDs from the official setup_env.py usage
32
+ model_options = {
33
+ "1bitLLM/bitnet_b1_58-3B": "3B Optimized (Recommended)",
34
+ "1bitLLM/bitnet_b1_58-large": "Large (Efficient)",
35
+ "microsoft/BitNet-b1.58-2B-4T": "2B Specialized"
36
+ }
37
+ model_id = st.selectbox("Select Model", options=list(model_options.keys()), format_func=lambda x: model_options[x])
38
 
39
  st.header("📈 System Resources")
40
  cpu_usage = psutil.cpu_percent()
 
47
  </div>
48
  """, unsafe_allow_html=True)
49
 
50
+ if "engine_ready" not in st.session_state:
51
+ st.session_state.engine_ready = False
52
+
53
+ if st.button("Initialize Engine") or not st.session_state.engine_ready:
54
+ if not st.session_state.engine_ready:
55
+ manager = BitNetManager()
56
+ if manager.setup_engine():
57
+ st.session_state.engine_ready = True
58
+ st.success("BitNet.cpp Ready!")
59
+ else:
60
+ st.error("Engine setup failed. Check logs above.")
61
+ else:
62
+ st.info("Engine is already initialized and ready.")
63
 
64
  # Main Chat Interface
65
  for message in st.session_state.messages:
 
67
  st.markdown(message["content"])
68
 
69
  if prompt := st.chat_input("Ask me anything..."):
70
+ if not st.session_state.get("engine_ready"):
71
+ st.warning("Please initialize the engine in the sidebar first!")
72
+ else:
73
+ st.session_state.messages.append({"role": "user", "content": prompt})
74
+ with st.chat_message("user"):
75
+ st.markdown(prompt)
 
76
 
77
+ with st.chat_message("assistant"):
78
+ message_placeholder = st.empty()
79
+ full_response = ""
80
+
81
+ manager = BitNetManager()
82
+ # Use i2_s as default quant as specified in previous logs
83
+ model_path = manager.download_model(model_id=model_id, filename="ggml-model-i2_s.gguf")
84
 
85
  if model_path:
86
  st.info("Generating response using 1-bit kernels...")
87
+
88
+ # Execute real inference
89
+ process = manager.run_inference(prompt, model_path)
90
+
91
+ if process:
92
+ full_response = ""
93
+ # Stream the output tokens
94
+ for line in process.stdout:
95
+ # BitNet binary usually prints the prompt then the response
96
+ # We'll collect and display as we go
97
+ full_response += line
98
+ message_placeholder.markdown(full_response + "▌")
99
+
100
+ # Check for errors
101
+ stderr = process.stderr.read()
102
+ if stderr:
103
+ st.warning(f"Engine Warning: {stderr}")
104
+ else:
105
+ full_response = "Failed to launch inference engine."
106
+ message_placeholder.markdown(full_response)
107
  else:
108
  st.error("Model not available.")
109
 
model_manager.py CHANGED
@@ -32,75 +32,89 @@ class BitNetManager:
32
  st.warning("Patch target line not found. It might have been already patched or updated.")
33
 
34
  def setup_engine(self):
35
- """Clone and compile utilizing official setup_env.py."""
36
  if not os.path.exists(self.bitnet_dir):
37
  st.info("Cloning BitNet repository...")
38
  subprocess.run(["git", "clone", "--recursive", self.repo_url], check=True)
39
 
40
- # Always try to patch before build to ensure errors are fixed
41
  self.patch_source()
42
 
43
- # We'll use the official setup_env.py which handles kernel injection
44
- st.info("Running official BitNet setup/compilation (setup_env.py)...")
45
  try:
46
- # We pass a model ID to trigger the full build and conversion sequence
47
- # --hf-repo specifies the model, --model-dir specifies where to save it
48
  # We'll use 1bitLLM/bitnet_b1_58-3B as it's a supported option
49
  cmd = ["python", "setup_env.py", "--hf-repo", "1bitLLM/bitnet_b1_58-3B", "--use-pretuned"]
50
 
51
- # Use Popen to stream logs to streamlit if possible, or just run and capture
52
- st.warning("This process injects custom 1-bit kernels and compiles the engine. Please wait...")
53
- process = subprocess.run(cmd, cwd=self.bitnet_dir, capture_output=True, text=True)
 
 
 
 
 
 
 
 
 
54
 
55
  if process.returncode != 0:
56
- st.error(f"Official Setup failed (Exit {process.returncode})")
57
-
58
- # Try to read the specific compilation log generated by setup_env.py
59
- log_path = os.path.join(self.bitnet_dir, "logs", "compile.log")
60
- if os.path.exists(log_path):
61
- with open(log_path, "r") as f:
62
- st.info("Compilation Log (logs/compile.log):")
63
- st.code(f.read()[-3000:]) # Show last 3000 chars
64
- else:
65
- st.code(process.stderr)
66
- st.info("Detailed Output:")
67
- st.code(process.stdout[-2000:])
68
  return False
69
 
70
  st.success("Official Setup Completed Successfully!")
71
  return True
72
  except Exception as e:
73
- st.error(f"Execution error during setup: {e}")
74
  return False
75
 
76
- def download_model(self, model_id="microsoft/bitnet-b1.58-3B", filename="ggml-model-i2_s.gguf"):
77
- """Download model weights from Hugging Face."""
78
- st.info(f"Checking for model: {model_id}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  try:
80
  path = hf_hub_download(repo_id=model_id, filename=filename)
81
- st.success(f"Model ready at: {path}")
82
  return path
83
  except Exception as e:
84
  st.error(f"Model download failed: {e}")
85
  return None
86
 
87
  def run_inference(self, prompt, model_path):
88
- """Execute the bitnet_inference binary."""
89
- # This is a placeholder for the actual command-line call
90
- # Typically: ./build/bitnet_inference -m model.gguf -p "prompt"
91
- cmd = [
92
- os.path.join(self.build_dir, "bitnet_inference"),
93
- "-m", model_path,
94
- "-p", prompt,
95
- "-n", "128" # tokens
96
- ]
97
 
 
 
 
 
 
 
98
  try:
 
99
  process = subprocess.Popen(
100
  cmd,
101
  stdout=subprocess.PIPE,
102
  stderr=subprocess.PIPE,
103
- text=True
 
104
  )
105
  return process
106
  except Exception as e:
 
32
  st.warning("Patch target line not found. It might have been already patched or updated.")
33
 
34
  def setup_engine(self):
35
+ """Clone and compile utilizing official setup_env.py with log streaming."""
36
  if not os.path.exists(self.bitnet_dir):
37
  st.info("Cloning BitNet repository...")
38
  subprocess.run(["git", "clone", "--recursive", self.repo_url], check=True)
39
 
 
40
  self.patch_source()
41
 
42
+ st.info("Running official BitNet setup (setup_env.py)...")
 
43
  try:
 
 
44
  # We'll use 1bitLLM/bitnet_b1_58-3B as it's a supported option
45
  cmd = ["python", "setup_env.py", "--hf-repo", "1bitLLM/bitnet_b1_58-3B", "--use-pretuned"]
46
 
47
+ # Stream the stdout to Streamlit in real-time
48
+ process = subprocess.Popen(cmd, cwd=self.bitnet_dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
49
+
50
+ log_container = st.empty()
51
+ logs = []
52
+
53
+ for line in process.stdout:
54
+ logs.append(line)
55
+ # Keep only the last 15 lines or so for UI clarity
56
+ log_container.code("".join(logs[-15:]))
57
+
58
+ process.wait()
59
 
60
  if process.returncode != 0:
61
+ st.error(f"Setup failed (Exit {process.returncode})")
 
 
 
 
 
 
 
 
 
 
 
62
  return False
63
 
64
  st.success("Official Setup Completed Successfully!")
65
  return True
66
  except Exception as e:
67
+ st.error(f"Execution error durante setup: {e}")
68
  return False
69
 
70
+ def get_binary_path(self):
71
+ """Locate the bitnet binary based on platform/build structure."""
72
+ possible_paths = [
73
+ os.path.join(self.bitnet_dir, "build", "bitnet"), # Linux
74
+ os.path.join(self.bitnet_dir, "build", "Release", "bitnet.exe"), # Windows
75
+ os.path.join(self.bitnet_dir, "run_inference.py") # Script fallback
76
+ ]
77
+ for p in possible_paths:
78
+ if os.path.exists(p):
79
+ return p
80
+ return None
81
+
82
+ def download_model(self, model_id="1bitLLM/bitnet_b1_58-3B", filename="ggml-model-i2_s.gguf"):
83
+ """Download model weights if not already handled by setup_env.py."""
84
+ # setup_env.py usually downloads to models/ (relative to bitnet_dir)
85
+ local_model_path = os.path.join(self.bitnet_dir, "models", model_id, filename)
86
+ if os.path.exists(local_model_path):
87
+ return local_model_path
88
+
89
+ st.info(f"Downloading model {model_id} via Hub...")
90
  try:
91
  path = hf_hub_download(repo_id=model_id, filename=filename)
 
92
  return path
93
  except Exception as e:
94
  st.error(f"Model download failed: {e}")
95
  return None
96
 
97
  def run_inference(self, prompt, model_path):
98
+ """Execute the bitnet binary with the provided prompt."""
99
+ binary = self.get_binary_path()
100
+ if not binary:
101
+ st.error("Inference binary not found. Please re-run Initialization.")
102
+ return None
 
 
 
 
103
 
104
+ # Build the command. bitnet binary usually takes -m and -p
105
+ if binary.endswith(".py"):
106
+ cmd = ["python", binary, "-m", model_path, "-p", prompt, "-n", "128"]
107
+ else:
108
+ cmd = [binary, "-m", model_path, "-p", prompt, "-n", "128"]
109
+
110
  try:
111
+ # We'll return a Popen object so the app can stream the response
112
  process = subprocess.Popen(
113
  cmd,
114
  stdout=subprocess.PIPE,
115
  stderr=subprocess.PIPE,
116
+ text=True,
117
+ bufsize=1
118
  )
119
  return process
120
  except Exception as e: