Spaces:

Vishwas1
/

bitnet_cpu_assistant

Running

App Files Files Community

Vishwas1 commited on Jan 30

Commit

804d64f

verified ·

1 Parent(s): 7d4c953

Upload 2 files

Browse files

Files changed (2) hide show

app.py +53 -19
model_manager.py +50 -36

app.py CHANGED Viewed

@@ -28,7 +28,13 @@ if "messages" not in st.session_state:
 # Sidebar for controls and monitoring
 with st.sidebar:
     st.header("⚙️ Settings")
-    model_id = st.selectbox("Select Model", ["microsoft/bitnet-b1.58-3B", "microsoft/bitnet-b1.58-large-4t"])
     st.header("📈 System Resources")
     cpu_usage = psutil.cpu_percent()
@@ -41,10 +47,19 @@ with st.sidebar:
     </div>
     """, unsafe_allow_html=True)
-    if st.button("Initialize Engine"):
-        manager = BitNetManager()
-        if manager.setup_engine():
-            st.success("BitNet.cpp Ready!")
 # Main Chat Interface
 for message in st.session_state.messages:
@@ -52,24 +67,43 @@ for message in st.session_state.messages:
         st.markdown(message["content"])
 if prompt := st.chat_input("Ask me anything..."):
-    st.session_state.messages.append({"role": "user", "content": prompt})
-    with st.chat_message("user"):
-        st.markdown(prompt)
-    with st.chat_message("assistant"):
-        message_placeholder = st.empty()
-        full_response = ""
-        # In a real HF space, we'd trigger the inference runner here
-        # For now, we simulate the logic with a placeholder as compilation takes time
-        manager = BitNetManager()
-        model_path = manager.download_model(model_id=model_id)
         if model_path:
             st.info("Generating response using 1-bit kernels...")
-            # Note: Streaming implementation would capture stdout from the process
-            full_response = "Engine is currently setting up the 1-bit kernels. Once deployed to HF Spaces with Clang 18, I will be able to stream responses at 10+ tokens/sec on this CPU!"
-            message_placeholder.markdown(full_response)
         else:
             st.error("Model not available.")

 # Sidebar for controls and monitoring
 with st.sidebar:
     st.header("⚙️ Settings")
+    # Corrected IDs from the official setup_env.py usage
+    model_options = {
+        "1bitLLM/bitnet_b1_58-3B": "3B Optimized (Recommended)",
+        "1bitLLM/bitnet_b1_58-large": "Large (Efficient)",
+        "microsoft/BitNet-b1.58-2B-4T": "2B Specialized"
+    }
+    model_id = st.selectbox("Select Model", options=list(model_options.keys()), format_func=lambda x: model_options[x])
     st.header("📈 System Resources")
     cpu_usage = psutil.cpu_percent()
     </div>
     """, unsafe_allow_html=True)
+    if "engine_ready" not in st.session_state:
+        st.session_state.engine_ready = False
+    if st.button("Initialize Engine") or not st.session_state.engine_ready:
+        if not st.session_state.engine_ready:
+            manager = BitNetManager()
+            if manager.setup_engine():
+                st.session_state.engine_ready = True
+                st.success("BitNet.cpp Ready!")
+            else:
+                st.error("Engine setup failed. Check logs above.")
+        else:
+            st.info("Engine is already initialized and ready.")
 # Main Chat Interface
 for message in st.session_state.messages:
         st.markdown(message["content"])
 if prompt := st.chat_input("Ask me anything..."):
+    if not st.session_state.get("engine_ready"):
+        st.warning("Please initialize the engine in the sidebar first!")
+    else:
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        with st.chat_message("assistant"):
+            message_placeholder = st.empty()
+            full_response = ""
+            manager = BitNetManager()
+            # Use i2_s as default quant as specified in previous logs
+            model_path = manager.download_model(model_id=model_id, filename="ggml-model-i2_s.gguf")
         if model_path:
             st.info("Generating response using 1-bit kernels...")
+            # Execute real inference
+            process = manager.run_inference(prompt, model_path)
+            if process:
+                full_response = ""
+                # Stream the output tokens
+                for line in process.stdout:
+                    # BitNet binary usually prints the prompt then the response
+                    # We'll collect and display as we go
+                    full_response += line
+                    message_placeholder.markdown(full_response + "▌")
+                # Check for errors
+                stderr = process.stderr.read()
+                if stderr:
+                    st.warning(f"Engine Warning: {stderr}")
+            else:
+                full_response = "Failed to launch inference engine."
+                message_placeholder.markdown(full_response)
         else:
             st.error("Model not available.")

model_manager.py CHANGED Viewed

@@ -32,75 +32,89 @@ class BitNetManager:
                 st.warning("Patch target line not found. It might have been already patched or updated.")
     def setup_engine(self):
-        """Clone and compile utilizing official setup_env.py."""
         if not os.path.exists(self.bitnet_dir):
             st.info("Cloning BitNet repository...")
             subprocess.run(["git", "clone", "--recursive", self.repo_url], check=True)
-        # Always try to patch before build to ensure errors are fixed
         self.patch_source()
-        # We'll use the official setup_env.py which handles kernel injection
-        st.info("Running official BitNet setup/compilation (setup_env.py)...")
         try:
-            # We pass a model ID to trigger the full build and conversion sequence
-            # --hf-repo specifies the model, --model-dir specifies where to save it
             # We'll use 1bitLLM/bitnet_b1_58-3B as it's a supported option
             cmd = ["python", "setup_env.py", "--hf-repo", "1bitLLM/bitnet_b1_58-3B", "--use-pretuned"]
-            # Use Popen to stream logs to streamlit if possible, or just run and capture
-            st.warning("This process injects custom 1-bit kernels and compiles the engine. Please wait...")
-            process = subprocess.run(cmd, cwd=self.bitnet_dir, capture_output=True, text=True)
             if process.returncode != 0:
-                st.error(f"Official Setup failed (Exit {process.returncode})")
-                # Try to read the specific compilation log generated by setup_env.py
-                log_path = os.path.join(self.bitnet_dir, "logs", "compile.log")
-                if os.path.exists(log_path):
-                    with open(log_path, "r") as f:
-                        st.info("Compilation Log (logs/compile.log):")
-                        st.code(f.read()[-3000:]) # Show last 3000 chars
-                else:
-                    st.code(process.stderr)
-                    st.info("Detailed Output:")
-                    st.code(process.stdout[-2000:])
                 return False
             st.success("Official Setup Completed Successfully!")
             return True
         except Exception as e:
-            st.error(f"Execution error during setup: {e}")
             return False
-    def download_model(self, model_id="microsoft/bitnet-b1.58-3B", filename="ggml-model-i2_s.gguf"):
-        """Download model weights from Hugging Face."""
-        st.info(f"Checking for model: {model_id}...")
         try:
             path = hf_hub_download(repo_id=model_id, filename=filename)
-            st.success(f"Model ready at: {path}")
             return path
         except Exception as e:
             st.error(f"Model download failed: {e}")
             return None
     def run_inference(self, prompt, model_path):
-        """Execute the bitnet_inference binary."""
-        # This is a placeholder for the actual command-line call
-        # Typically: ./build/bitnet_inference -m model.gguf -p "prompt"
-        cmd = [
-            os.path.join(self.build_dir, "bitnet_inference"),
-            "-m", model_path,
-            "-p", prompt,
-            "-n", "128" # tokens
-        ]
         try:
             process = subprocess.Popen(
                 cmd,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
-                text=True
             )
             return process
         except Exception as e:

                 st.warning("Patch target line not found. It might have been already patched or updated.")
     def setup_engine(self):
+        """Clone and compile utilizing official setup_env.py with log streaming."""
         if not os.path.exists(self.bitnet_dir):
             st.info("Cloning BitNet repository...")
             subprocess.run(["git", "clone", "--recursive", self.repo_url], check=True)
         self.patch_source()
+        st.info("Running official BitNet setup (setup_env.py)...")
         try:
             # We'll use 1bitLLM/bitnet_b1_58-3B as it's a supported option
             cmd = ["python", "setup_env.py", "--hf-repo", "1bitLLM/bitnet_b1_58-3B", "--use-pretuned"]
+            # Stream the stdout to Streamlit in real-time
+            process = subprocess.Popen(cmd, cwd=self.bitnet_dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
+            log_container = st.empty()
+            logs = []
+            for line in process.stdout:
+                logs.append(line)
+                # Keep only the last 15 lines or so for UI clarity
+                log_container.code("".join(logs[-15:]))
+            process.wait()
             if process.returncode != 0:
+                st.error(f"Setup failed (Exit {process.returncode})")
                 return False
             st.success("Official Setup Completed Successfully!")
             return True
         except Exception as e:
+            st.error(f"Execution error durante setup: {e}")
             return False
+    def get_binary_path(self):
+        """Locate the bitnet binary based on platform/build structure."""
+        possible_paths = [
+            os.path.join(self.bitnet_dir, "build", "bitnet"), # Linux
+            os.path.join(self.bitnet_dir, "build", "Release", "bitnet.exe"), # Windows
+            os.path.join(self.bitnet_dir, "run_inference.py") # Script fallback
+        ]
+        for p in possible_paths:
+            if os.path.exists(p):
+                return p
+        return None
+    def download_model(self, model_id="1bitLLM/bitnet_b1_58-3B", filename="ggml-model-i2_s.gguf"):
+        """Download model weights if not already handled by setup_env.py."""
+        # setup_env.py usually downloads to models/ (relative to bitnet_dir)
+        local_model_path = os.path.join(self.bitnet_dir, "models", model_id, filename)
+        if os.path.exists(local_model_path):
+            return local_model_path
+        st.info(f"Downloading model {model_id} via Hub...")
         try:
             path = hf_hub_download(repo_id=model_id, filename=filename)
             return path
         except Exception as e:
             st.error(f"Model download failed: {e}")
             return None
     def run_inference(self, prompt, model_path):
+        """Execute the bitnet binary with the provided prompt."""
+        binary = self.get_binary_path()
+        if not binary:
+            st.error("Inference binary not found. Please re-run Initialization.")
+            return None
+        # Build the command. bitnet binary usually takes -m and -p
+        if binary.endswith(".py"):
+            cmd = ["python", binary, "-m", model_path, "-p", prompt, "-n", "128"]
+        else:
+            cmd = [binary, "-m", model_path, "-p", prompt, "-n", "128"]
         try:
+            # We'll return a Popen object so the app can stream the response
             process = subprocess.Popen(
                 cmd,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
+                text=True,
+                bufsize=1
             )
             return process
         except Exception as e: