SiennaClarke commited on
Commit
78d1301
·
verified ·
1 Parent(s): 5f88dc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -40
app.py CHANGED
@@ -1,68 +1,59 @@
1
  import streamlit as st
 
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
- import os
5
 
6
- # Set page title
7
- st.set_page_config(page_title="Qwen3 Chat", page_icon="🤖")
8
- st.title("🤖 Qwen3-1.7B (Streamlit SDK)")
9
 
10
- # 1. Reliable Model Loading
11
  @st.cache_resource
12
- def load_qwen_model():
13
  repo_id = "Qwen/Qwen3-1.7B-GGUF"
14
  filename = "Qwen3-1.7B-Q8_0.gguf"
15
 
16
- with st.spinner("Downloading model... this may take a minute"):
17
- # hf_hub_download gives us a direct, absolute path string
18
- # e.g., "/home/user/.cache/huggingface/hub/..."
19
  model_path = hf_hub_download(repo_id=repo_id, filename=filename)
20
-
21
- # Passing the absolute path directly to Llama() fix the ValueError
 
 
 
 
22
  return Llama(
23
  model_path=model_path,
24
- n_ctx=4096, # Context window
25
- n_threads=2, # Use the 2 vCPUs available on Free Tier
26
- verbose=False # Set to True if you want to see C++ logs in HF Logs
 
27
  )
28
 
29
- llm = load_qwen_model()
30
 
31
- # 2. Setup Chat Session
32
  if "messages" not in st.session_state:
33
- st.session_state.messages = [
34
- {"role": "system", "content": "You are a helpful assistant."}
35
- ]
36
 
37
- # Display history
38
- for message in st.session_state.messages:
39
- if message["role"] != "system":
40
- with st.chat_message(message["role"]):
41
- st.markdown(message["content"])
42
 
43
- # 3. Chat Logic
44
- if prompt := st.chat_input("How can I help you today?"):
45
- # Add user message
46
  st.session_state.messages.append({"role": "user", "content": prompt})
47
- with st.chat_message("user"):
48
- st.markdown(prompt)
49
 
50
- # Generate Assistant response
51
  with st.chat_message("assistant"):
52
  stream = llm.create_chat_completion(
53
  messages=st.session_state.messages,
54
  stream=True,
55
- max_tokens=1024,
56
- temperature=0.7
57
  )
58
 
59
- # Function to handle streaming output
60
- def stream_response():
61
  for chunk in stream:
62
- delta = chunk['choices'][0]['delta']
63
- if 'content' in delta:
64
- yield delta['content']
65
 
66
- full_response = st.write_stream(stream_response())
67
-
68
- st.session_state.messages.append({"role": "assistant", "content": full_response})
 
1
  import streamlit as st
2
+ import os
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
 
5
 
6
+ st.set_page_config(page_title="Qwen3 Docker Chat", page_icon="🐳")
7
+ st.title("🐳 Qwen3-1.7B (Docker Optimized)")
 
8
 
9
+ # --- Model Loading with Absolute Path Fix ---
10
  @st.cache_resource
11
+ def load_model():
12
  repo_id = "Qwen/Qwen3-1.7B-GGUF"
13
  filename = "Qwen3-1.7B-Q8_0.gguf"
14
 
15
+ with st.spinner("🚀 Downloading model (this happens once)..."):
16
+ # Returns the direct local string path
 
17
  model_path = hf_hub_download(repo_id=repo_id, filename=filename)
18
+
19
+ # Verify the file actually exists to avoid ValueError
20
+ if not os.path.exists(model_path):
21
+ st.error(f"Failed to find model at {model_path}")
22
+ return None
23
+
24
  return Llama(
25
  model_path=model_path,
26
+ n_ctx=4096,
27
+ n_threads=2, # Optimized for HF Free Tier
28
+ chat_format="chatml",
29
+ verbose=False
30
  )
31
 
32
+ llm = load_model()
33
 
34
+ # --- Simple Chat Logic ---
35
  if "messages" not in st.session_state:
36
+ st.session_state.messages = [{"role": "system", "content": "You are a fast AI."}]
 
 
37
 
38
+ for msg in st.session_state.messages:
39
+ if msg["role"] != "system":
40
+ st.chat_message(msg["role"]).write(msg["content"])
 
 
41
 
42
+ if prompt := st.chat_input():
 
 
43
  st.session_state.messages.append({"role": "user", "content": prompt})
44
+ st.chat_message("user").write(prompt)
 
45
 
 
46
  with st.chat_message("assistant"):
47
  stream = llm.create_chat_completion(
48
  messages=st.session_state.messages,
49
  stream=True,
50
+ max_tokens=1024
 
51
  )
52
 
53
+ def response_gen():
 
54
  for chunk in stream:
55
+ if 'content' in chunk['choices'][0]['delta']:
56
+ yield chunk['choices'][0]['delta']['content']
 
57
 
58
+ response = st.write_stream(response_gen())
59
+ st.session_state.messages.append({"role": "assistant", "content": response})