SiennaClarke commited on
Commit
f292bd2
·
verified ·
1 Parent(s): 1d5a30d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -59
app.py CHANGED
@@ -2,34 +2,76 @@ import streamlit as st
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
  from threading import Thread
4
  import torch
 
 
5
 
6
- # 1. Page Configuration
7
- st.set_page_config(page_title="Qwen Chat", page_icon="🧠", layout="centered")
8
 
9
- # Custom CSS for a cleaner "Claude-like" feel
10
  st.markdown("""
11
  <style>
12
- [data-testid="stSidebar"] {display: none;}
13
- .stChatMessage { border-radius: 10px; margin-bottom: 5px; }
14
- .stChatInputContainer { padding-bottom: 20px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  </style>
16
  """, unsafe_allow_html=True)
17
 
18
- st.title("Qwen 2.5 3B Chat 🚀")
19
- st.caption("A balanced, high-performance model for local CPU/GPU inference.")
20
-
21
- # 2. Optimized Model Loading
22
  @st.cache_resourced
23
- def load_model():
24
- # '3B' is the most feasible mid-point for modern laptops/PCs
25
  model_id = "Qwen/Qwen2.5-3B-Instruct"
26
  tokenizer = AutoTokenizer.from_pretrained(model_id)
27
-
28
- # Auto-detect device (Use GPU if available, else CPU)
29
  device = "cuda" if torch.cuda.is_available() else "cpu"
30
-
31
- # Use float16 for GPU or bfloat16 for modern CPUs to save memory
32
- dtype = torch.float16 if device == "cuda" else torch.bfloat16
33
 
34
  model = AutoModelForCausalLM.from_pretrained(
35
  model_id,
@@ -38,63 +80,69 @@ def load_model():
38
  )
39
  return model, tokenizer, device
40
 
41
- model, tokenizer, device = load_model()
42
 
43
- # 3. Session State for Chat History
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  if "messages" not in st.session_state:
45
  st.session_state.messages = []
 
 
 
 
 
 
 
46
 
47
- # Display Chat History
48
- for message in st.session_state.messages:
49
- with st.chat_message(message["role"]):
50
- st.markdown(message["content"])
51
 
52
- # 4. Chat Input & Streaming Logic
53
- if prompt := st.chat_input("How can I help you today?"):
54
- # Add user message to history
55
  st.session_state.messages.append({"role": "user", "content": prompt})
56
-
57
  with st.chat_message("user"):
58
  st.markdown(prompt)
59
 
60
  with st.chat_message("assistant"):
61
- # Setup Streamer
62
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
63
 
64
- # Format conversation using the model's chat template
65
- # Limit history to last 5 turns to prevent CPU slowdown
66
- context_messages = st.session_state.messages[-10:]
67
- full_prompt = [{"role": "system", "content": "You are Qwen, a helpful and concise AI assistant."}] + context_messages
 
 
68
 
69
- model_inputs = tokenizer.apply_chat_template(
70
- full_prompt,
71
- tokenize=True,
72
- add_generation_prompt=True,
73
- return_tensors="pt"
74
- ).to(device)
75
-
76
- # Generation Arguments
77
- generation_kwargs = dict(
78
- input_ids=model_inputs,
79
- streamer=streamer,
80
- max_new_tokens=1024,
81
- do_sample=True,
82
- temperature=0.7,
83
- top_p=0.8,
84
- repetition_penalty=1.1,
85
- pad_token_id=tokenizer.eos_token_id
86
- )
87
-
88
- # Start thread
89
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
90
- thread.start()
91
 
92
- # Stream the output
93
- response_container = st.empty()
94
- full_response = ""
 
95
 
96
- # Use st.write_stream for a native feel
97
  full_response = st.write_stream(streamer)
98
 
99
- # Save assistant response to history
100
  st.session_state.messages.append({"role": "assistant", "content": full_response})
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
  from threading import Thread
4
  import torch
5
+ from pypdf import PdfReader
6
+ import io
7
 
8
+ # 1. Page Configuration & Aesthetic Injection
9
+ st.set_page_config(page_title="Claude", page_icon="☁️", layout="wide")
10
 
11
+ # CSS to override Streamlit's look to match Claude's 2026 UI
12
  st.markdown("""
13
  <style>
14
+ /* Claude's specific background and fonts */
15
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&display=swap');
16
+
17
+ .stApp {
18
+ background-color: #ffffff;
19
+ font-family: 'Inter', sans-serif;
20
+ }
21
+
22
+ /* Sidebar styling: Off-white with a very thin border */
23
+ [data-testid="stSidebar"] {
24
+ background-color: #f9f9f8 !important;
25
+ border-right: 1px solid #e5e5e5;
26
+ width: 300px !important;
27
+ }
28
+
29
+ /* The Main Chat Container (Centered) */
30
+ .block-container {
31
+ max-width: 850px;
32
+ padding-top: 3rem;
33
+ }
34
+
35
+ /* Message Bubbles: Claude uses a clean, borderless look */
36
+ .stChatMessage {
37
+ border: none !important;
38
+ padding: 1.5rem 0 !important;
39
+ background-color: transparent !important;
40
+ }
41
+
42
+ /* Assistant Message (Bot) specific styling */
43
+ [data-testid="chatAvatarAssistant"] {
44
+ background-color: #d97757 !important; /* Claude's signature orange */
45
+ border-radius: 6px !important;
46
+ }
47
+
48
+ /* Floating, Centered Chat Input */
49
+ [data-testid="stBottom"] {
50
+ background-color: white !important;
51
+ border-top: none !important;
52
+ }
53
+
54
+ .stChatInputContainer {
55
+ border: 1px solid #d1d1d1 !important;
56
+ border-radius: 14px !important;
57
+ box-shadow: 0 4px 24px rgba(0,0,0,0.06) !important;
58
+ max-width: 800px !important;
59
+ margin: 0 auto 20px auto !important;
60
+ }
61
+
62
+ /* Hide default Streamlit elements */
63
+ header {visibility: hidden;}
64
+ footer {visibility: hidden;}
65
  </style>
66
  """, unsafe_allow_html=True)
67
 
68
+ # 2. Optimized 2026 Local Model (Qwen 2.5 3B)
 
 
 
69
  @st.cache_resourced
70
+ def load_llm():
 
71
  model_id = "Qwen/Qwen2.5-3B-Instruct"
72
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
73
  device = "cuda" if torch.cuda.is_available() else "cpu"
74
+ dtype = torch.bfloat16
 
 
75
 
76
  model = AutoModelForCausalLM.from_pretrained(
77
  model_id,
 
80
  )
81
  return model, tokenizer, device
82
 
83
+ model, tokenizer, device = load_llm()
84
 
85
+ # 3. Sidebar: "Projects" & Document Upload
86
+ with st.sidebar:
87
+ st.markdown("<div style='font-size:1.1rem; font-weight:600; color:#1a1a1a; margin-bottom:1rem;'>Claude 3.5 Sonnet</div>", unsafe_allow_html=True)
88
+ if st.button("+ Start New Chat", use_container_width=True):
89
+ st.session_state.messages = []
90
+ st.session_state.pdf_text = ""
91
+ st.rerun()
92
+
93
+ st.divider()
94
+ st.caption("UPLOAD DOCUMENTS")
95
+ uploaded_file = st.file_uploader("Drop a PDF here to analyze", type="pdf")
96
+
97
+ if uploaded_file:
98
+ reader = PdfReader(uploaded_file)
99
+ text = "".join([page.extract_text() for page in reader.pages])
100
+ st.session_state.pdf_text = text
101
+ st.success(f"Attached: {uploaded_file.name}")
102
+
103
+ st.divider()
104
+ st.caption("RECENT CHATS")
105
+ st.markdown("📝 **LOS and LMS Training**")
106
+
107
+ # 4. Main Chat Interface Logic
108
  if "messages" not in st.session_state:
109
  st.session_state.messages = []
110
+ if "pdf_text" not in st.session_state:
111
+ st.session_state.pdf_text = ""
112
+
113
+ # Empty State: Landing screen
114
+ if not st.session_state.messages:
115
+ st.markdown("<div style='height: 12vh;'></div>", unsafe_allow_html=True)
116
+ st.markdown("<h1 style='text-align: center; font-weight: 500; color: #1a1a1a;'>How can I help you today?</h1>", unsafe_allow_html=True)
117
 
118
+ # Render Chat History
119
+ for msg in st.session_state.messages:
120
+ with st.chat_message(msg["role"]):
121
+ st.markdown(msg["content"])
122
 
123
+ # 5. Chat Input & Generation
124
+ if prompt := st.chat_input("Ask anything..."):
 
125
  st.session_state.messages.append({"role": "user", "content": prompt})
 
126
  with st.chat_message("user"):
127
  st.markdown(prompt)
128
 
129
  with st.chat_message("assistant"):
 
130
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
131
 
132
+ # Build context (System Prompt + PDF Content + History)
133
+ sys_msg = "You are a world-class AI assistant. Be helpful and professional."
134
+ if st.session_state.pdf_text:
135
+ sys_msg += f"\n\nContext from uploaded document:\n{st.session_state.pdf_text[:5000]}" # Limit to first 5k chars for speed
136
+
137
+ messages = [{"role": "system", "content": sys_msg}] + st.session_state.messages[-8:]
138
 
139
+ inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
+ gen_kwargs = dict(input_ids=inputs, streamer=streamer, max_new_tokens=1024, do_sample=True, temperature=0.7, top_p=0.9, pad_token_id=tokenizer.eos_token_id)
142
+
143
+ thread = Thread(target=model.generate, kwargs=gen_kwargs)
144
+ thread.start()
145
 
 
146
  full_response = st.write_stream(streamer)
147
 
 
148
  st.session_state.messages.append({"role": "assistant", "content": full_response})