Spaces:
Running
Running
| import streamlit as st | |
| import psutil | |
| from model_manager import BitNetManager | |
| st.set_page_config(page_title="BitNet CPU Assistant", page_icon="π§ ", layout="wide") | |
| st.markdown(""" | |
| <style> | |
| .stApp { background-color: #0d1117; color: #c9d1d9; } | |
| .status-card { | |
| background: rgba(30, 41, 59, 0.5); | |
| border: 1px solid #30363d; | |
| border-radius: 10px; | |
| padding: 15px; | |
| margin-bottom: 10px; | |
| } | |
| .metric-value { color: #58a6ff; font-weight: bold; } | |
| h1, h2, h3 { color: #58a6ff; } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.title("π§ BitNet CPU Assistant") | |
| st.caption("Blazingly fast 1-bit LLM Inference on CPU-only Environments") | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| # Sidebar for controls and monitoring | |
| with st.sidebar: | |
| st.header("βοΈ Settings") | |
| # Corrected IDs from the official setup_env.py usage | |
| model_options = { | |
| "1bitLLM/bitnet_b1_58-3B": "3B Optimized (Recommended)", | |
| "1bitLLM/bitnet_b1_58-large": "Large (Efficient)", | |
| "microsoft/BitNet-b1.58-2B-4T": "2B Specialized" | |
| } | |
| model_id = st.selectbox("Select Model", options=list(model_options.keys()), format_func=lambda x: model_options[x]) | |
| st.header("π System Resources") | |
| cpu_usage = psutil.cpu_percent() | |
| ram_usage = psutil.virtual_memory().percent | |
| st.markdown(f""" | |
| <div class="status-card"> | |
| CPU Usage: <span class="metric-value">{cpu_usage}%</span><br> | |
| RAM Usage: <span class="metric-value">{ram_usage}%</span> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| if "engine_ready" not in st.session_state: | |
| st.session_state.engine_ready = False | |
| if st.button("π Initialize Engine"): | |
| manager = BitNetManager() | |
| if manager.setup_engine(model_id=model_id): | |
| st.session_state.engine_ready = True | |
| st.success("BitNet.cpp Ready!") | |
| else: | |
| st.error("Engine setup failed. Check logs above.") | |
| # Main Chat Interface | |
| status_placeholder = st.empty() | |
| if not st.session_state.engine_ready: | |
| status_placeholder.warning("β οΈ Engine not initialized. Please click 'Initialize Engine' in the sidebar to start.") | |
| # Render message history | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| # Chat input | |
| if prompt := st.chat_input("Ask me anything..."): | |
| if not st.session_state.engine_ready: | |
| st.error("You must initialize the engine before chatting.") | |
| else: | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| with st.chat_message("assistant"): | |
| message_placeholder = st.empty() | |
| full_response = "" | |
| manager = BitNetManager() | |
| model_path = manager.download_model(model_id=model_id, filename="ggml-model-i2_s.gguf") | |
| if model_path: | |
| with st.status("π Initializing inference engine...") as status: | |
| # Execute real inference | |
| process = manager.run_inference(prompt, model_path) | |
| if process: | |
| status.update(label="π§ Generating response using 1-bit kernels...", state="running") | |
| full_response = "" | |
| log_buffer = [] | |
| # Create an expander for engine logs | |
| with st.expander("π οΈ Engine Metadata & Logs", expanded=False): | |
| log_placeholder = st.empty() | |
| # Stream the output tokens | |
| for line in process.stdout: | |
| # Filter out engine metadata from the chat bubble | |
| metadata_prefixes = ("llama", "main", "llm_", "llm-", "system_info", "sampler", "generate", "common", "BOS", "EOS", "UNK", "PAD") | |
| if any(line.strip().startswith(p) for p in metadata_prefixes) or "..." in line or "init:" in line: | |
| log_buffer.append(line) | |
| log_placeholder.code("".join(log_buffer[-10:])) | |
| else: | |
| full_response += line | |
| message_placeholder.markdown(full_response + "β") | |
| status.update(label="β Generation Complete", state="complete") | |
| # Check for errors in stderr but don't clutter the chat | |
| stderr = process.stderr.read() | |
| if stderr: | |
| log_buffer.append(f"\nSTDERR: {stderr}") | |
| log_placeholder.code("".join(log_buffer[-15:])) | |
| else: | |
| status.update(label="β Failed to launch engine", state="error") | |
| full_response = "Failed to launch inference engine." | |
| message_placeholder.markdown(full_response) | |
| else: | |
| st.error("Model not available.") | |
| st.session_state.messages.append({"role": "assistant", "content": full_response}) | |