Spaces:

SiennaClarke
/

ChatBoxApp

Sleeping

App Files Files Community

SiennaClarke commited on 30 days ago

Commit

78d1301

verified ·

1 Parent(s): 5f88dc7

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -40

app.py CHANGED Viewed

@@ -1,68 +1,59 @@
 import streamlit as st
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-import os
-# Set page title
-st.set_page_config(page_title="Qwen3 Chat", page_icon="🤖")
-st.title("🤖 Qwen3-1.7B (Streamlit SDK)")
-# 1. Reliable Model Loading
 @st.cache_resource
-def load_qwen_model():
     repo_id = "Qwen/Qwen3-1.7B-GGUF"
     filename = "Qwen3-1.7B-Q8_0.gguf"
-    with st.spinner("Downloading model... this may take a minute"):
-        # hf_hub_download gives us a direct, absolute path string
-        # e.g., "/home/user/.cache/huggingface/hub/..."
         model_path = hf_hub_download(repo_id=repo_id, filename=filename)
-    # Passing the absolute path directly to Llama() fix the ValueError
     return Llama(
         model_path=model_path,
-        n_ctx=4096,           # Context window
-        n_threads=2,          # Use the 2 vCPUs available on Free Tier
-        verbose=False         # Set to True if you want to see C++ logs in HF Logs
     )
-llm = load_qwen_model()
-# 2. Setup Chat Session
 if "messages" not in st.session_state:
-    st.session_state.messages = [
-        {"role": "system", "content": "You are a helpful assistant."}
-    ]
-# Display history
-for message in st.session_state.messages:
-    if message["role"] != "system":
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-# 3. Chat Logic
-if prompt := st.chat_input("How can I help you today?"):
-    # Add user message
     st.session_state.messages.append({"role": "user", "content": prompt})
-    with st.chat_message("user"):
-        st.markdown(prompt)
-    # Generate Assistant response
     with st.chat_message("assistant"):
         stream = llm.create_chat_completion(
             messages=st.session_state.messages,
             stream=True,
-            max_tokens=1024,
-            temperature=0.7
         )
-        # Function to handle streaming output
-        def stream_response():
             for chunk in stream:
-                delta = chunk['choices'][0]['delta']
-                if 'content' in delta:
-                    yield delta['content']
-        full_response = st.write_stream(stream_response())
-    st.session_state.messages.append({"role": "assistant", "content": full_response})

 import streamlit as st
+import os
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+st.set_page_config(page_title="Qwen3 Docker Chat", page_icon="🐳")
+st.title("🐳 Qwen3-1.7B (Docker Optimized)")
+# --- Model Loading with Absolute Path Fix ---
 @st.cache_resource
+def load_model():
     repo_id = "Qwen/Qwen3-1.7B-GGUF"
     filename = "Qwen3-1.7B-Q8_0.gguf"
+    with st.spinner("🚀 Downloading model (this happens once)..."):
+        # Returns the direct local string path
         model_path = hf_hub_download(repo_id=repo_id, filename=filename)
+        # Verify the file actually exists to avoid ValueError
+        if not os.path.exists(model_path):
+            st.error(f"Failed to find model at {model_path}")
+            return None
     return Llama(
         model_path=model_path,
+        n_ctx=4096,
+        n_threads=2,        # Optimized for HF Free Tier
+        chat_format="chatml",
+        verbose=False
     )
+llm = load_model()
+# --- Simple Chat Logic ---
 if "messages" not in st.session_state:
+    st.session_state.messages = [{"role": "system", "content": "You are a fast AI."}]
+for msg in st.session_state.messages:
+    if msg["role"] != "system":
+        st.chat_message(msg["role"]).write(msg["content"])
+if prompt := st.chat_input():
     st.session_state.messages.append({"role": "user", "content": prompt})
+    st.chat_message("user").write(prompt)
     with st.chat_message("assistant"):
         stream = llm.create_chat_completion(
             messages=st.session_state.messages,
             stream=True,
+            max_tokens=1024
         )
+        def response_gen():
             for chunk in stream:
+                if 'content' in chunk['choices'][0]['delta']:
+                    yield chunk['choices'][0]['delta']['content']
+        response = st.write_stream(response_gen())
+        st.session_state.messages.append({"role": "assistant", "content": response})