SiennaClarke commited on
Commit
4ed8d9c
·
verified ·
1 Parent(s): bd4eccf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -68
app.py CHANGED
@@ -1,90 +1,69 @@
1
  import streamlit as st
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
- from threading import Thread
4
- import torch
5
 
6
- # App Configuration
7
- st.set_page_config(page_title="Klove AI ChatBox", page_icon="")
 
8
 
9
- # 1. Load Qwen3 (Cached for efficiency)
10
  @st.cache_resource
11
- def load_qwen3():
12
- model_id = "Qwen/Qwen3-1.7B"
13
- tokenizer = AutoTokenizer.from_pretrained(model_id)
14
- model = AutoModelForCausalLM.from_pretrained(
15
- model_id,
16
- torch_dtype="auto",
17
- device_map="auto"
 
 
 
 
 
18
  )
19
- return tokenizer, model
20
-
21
- tokenizer, model = load_qwen3()
22
-
23
- # 2. System Prompt Selection (Main UI, No Sidebar)
24
- st.title("⚡ Klove AI ChatBox")
25
-
26
- system_options = {
27
- "General Assistant": "You are a helpful and concise assistant.",
28
- "Python Expert": "You are an expert Python developer. Provide clean, efficient code with brief explanations.",
29
- "Creative Storyteller": "You are a creative writer. Use vivid imagery and engaging narrative styles."
30
- }
31
-
32
- # Horizontal layout for the selector and a clear button
33
- col1, col2 = st.columns([3, 1])
34
- with col1:
35
- selected_role = st.selectbox("Choose AI Personality:", list(system_options.keys()))
36
- with col2:
37
- if st.button("Clear History", use_container_width=True):
38
- st.session_state.messages = []
39
- st.rerun()
40
 
41
- system_prompt = system_options[selected_role]
42
 
43
- # 3. Setup Session State
44
  if "messages" not in st.session_state:
45
- st.session_state.messages = []
 
 
46
 
47
- # Display Chat History
48
  for message in st.session_state.messages:
49
- if message["role"] != "system": # Hide system prompt from UI
50
  with st.chat_message(message["role"]):
51
  st.markdown(message["content"])
52
 
53
- # 4. Chat Input & Streaming
54
- if prompt := st.chat_input("Message Qwen3..."):
55
- # Insert system prompt if history is empty
56
- if len(st.session_state.messages) == 0:
57
- st.session_state.messages.append({"role": "system", "content": system_prompt})
58
-
59
- # Add User Message
60
  st.session_state.messages.append({"role": "user", "content": prompt})
61
  with st.chat_message("user"):
62
  st.markdown(prompt)
63
 
64
- # Generate Assistant Response
65
  with st.chat_message("assistant"):
66
- # Prepare input with chat template
67
- input_ids = tokenizer.apply_chat_template(
68
- st.session_state.messages,
69
- tokenize=True,
70
- add_generation_prompt=True,
71
- return_tensors="pt"
72
- ).to(model.device)
73
-
74
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
75
-
76
- generation_kwargs = dict(
77
- input_ids=input_ids,
78
- streamer=streamer,
79
- max_new_tokens=1024,
80
- temperature=0.7
81
  )
 
 
 
 
 
 
 
82
 
83
- # Threaded generation for real-time streaming
84
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
85
- thread.start()
86
-
87
- # Stream the output
88
- full_response = st.write_stream(streamer)
89
 
 
90
  st.session_state.messages.append({"role": "assistant", "content": full_response})
 
1
  import streamlit as st
2
+ from llama_cpp import Llama
3
+ import os
 
4
 
5
+ # Page configuration
6
+ st.set_page_config(page_title="Qwen3 GGUF Chat", page_icon="🤖")
7
+ st.title("🤖 Qwen3-1.7B (8-bit GGUF)")
8
 
9
+ # 1. Load Model (Cached to prevent reloading on every interaction)
10
  @st.cache_resource
11
+ def load_qwen_gguf():
12
+ # repo_id and filename as per your request
13
+ repo_id = "Qwen/Qwen3-1.7B-GGUF"
14
+ filename = "Qwen3-1.7B-Q8_0.gguf"
15
+
16
+ # from_pretrained automatically downloads and caches the .gguf file
17
+ return Llama.from_pretrained(
18
+ repo_id=repo_id,
19
+ filename=filename,
20
+ n_ctx=4096, # Context window (Qwen3 supports up to 32k, but 4k is safer for free RAM)
21
+ n_threads=2, # Match the 2 vCPUs on HF Free Tier
22
+ verbose=False # Reduces log clutter
23
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ llm = load_qwen_gguf()
26
 
27
+ # 2. Chat History Initialization
28
  if "messages" not in st.session_state:
29
+ st.session_state.messages = [
30
+ {"role": "system", "content": "You are a helpful AI assistant. Answer concisely and accurately."}
31
+ ]
32
 
33
+ # Display existing chat messages
34
  for message in st.session_state.messages:
35
+ if message["role"] != "system":
36
  with st.chat_message(message["role"]):
37
  st.markdown(message["content"])
38
 
39
+ # 3. Chat Logic & Streaming
40
+ if prompt := st.chat_input("Ask Qwen3..."):
41
+ # Add user message to history
 
 
 
 
42
  st.session_state.messages.append({"role": "user", "content": prompt})
43
  with st.chat_message("user"):
44
  st.markdown(prompt)
45
 
46
+ # Generate assistant response
47
  with st.chat_message("assistant"):
48
+ # Use create_chat_completion for OpenAI-style streaming
49
+ stream = llm.create_chat_completion(
50
+ messages=st.session_state.messages,
51
+ stream=True,
52
+ max_tokens=1024,
53
+ temperature=0.7,
54
+ top_p=0.8,
55
+ repeat_penalty=1.1
 
 
 
 
 
 
 
56
  )
57
+
58
+ # Generator for Streamlit streaming
59
+ def stream_response():
60
+ for chunk in stream:
61
+ delta = chunk['choices'][0]['delta']
62
+ if 'content' in delta:
63
+ yield delta['content']
64
 
65
+ # write_stream handles the "typewriter" effect automatically
66
+ full_response = st.write_stream(stream_response())
 
 
 
 
67
 
68
+ # Save the assistant's final answer
69
  st.session_state.messages.append({"role": "assistant", "content": full_response})