GhufranAI commited on
Commit
1b9aa7b
Β·
verified Β·
1 Parent(s): bb07c8b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +319 -0
app.py ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Advanced RAG System - Streamlit Web UI
3
+ ==========================================
4
+
5
+ Professional web interface with real-time chat and document management.
6
+
7
+ Run with:
8
+ streamlit run app.py
9
+
10
+ Make sure to have the main AdvancedRAGSystem code in a file named 'advanced_rag.py'
11
+ """
12
+
13
+ import streamlit as st
14
+ import os
15
+ from datetime import datetime
16
+ from pathlib import Path
17
+
18
+ # Import the RAG system (assuming it's in advanced_rag.py)
19
+ # If not, copy the previous code to 'advanced_rag.py'
20
+ try:
21
+ from advanced_rag import AdvancedRAGSystem, Config
22
+ SYSTEM_AVAILABLE = True
23
+ except:
24
+ SYSTEM_AVAILABLE = False
25
+ st.error("⚠️ Please save the Advanced RAG System code as 'advanced_rag.py' in the same directory")
26
+
27
+ # ═══════════════════════════════════════════════════════════════════════════
28
+ # PAGE CONFIGURATION
29
+ # ═══════════════════════════════════════════════════════════════════════════
30
+
31
+ st.set_page_config(
32
+ page_title="Advanced RAG System 2025",
33
+ page_icon="πŸ€–",
34
+ layout="wide",
35
+ initial_sidebar_state="expanded"
36
+ )
37
+
38
+ # Custom CSS
39
+ st.markdown("""
40
+ <style>
41
+ .main-header {
42
+ font-size: 2.5rem;
43
+ font-weight: bold;
44
+ color: #1f77b4;
45
+ text-align: center;
46
+ padding: 1rem 0;
47
+ }
48
+ .sub-header {
49
+ font-size: 1.2rem;
50
+ color: #666;
51
+ text-align: center;
52
+ margin-bottom: 2rem;
53
+ }
54
+ .chat-message {
55
+ padding: 1rem;
56
+ border-radius: 0.5rem;
57
+ margin: 0.5rem 0;
58
+ }
59
+ .user-message {
60
+ background-color: #e3f2fd;
61
+ border-left: 4px solid #2196f3;
62
+ }
63
+ .assistant-message {
64
+ background-color: #f5f5f5;
65
+ border-left: 4px solid #4caf50;
66
+ }
67
+ .source-box {
68
+ background-color: #fff3cd;
69
+ padding: 0.5rem;
70
+ border-radius: 0.25rem;
71
+ border-left: 3px solid #ffc107;
72
+ margin: 0.25rem 0;
73
+ }
74
+ .stat-box {
75
+ background-color: #f0f8ff;
76
+ padding: 1rem;
77
+ border-radius: 0.5rem;
78
+ text-align: center;
79
+ }
80
+ </style>
81
+ """, unsafe_allow_html=True)
82
+
83
+ # ═══════════════════════════════════════════════════════════════════════════
84
+ # SESSION STATE INITIALIZATION
85
+ # ═══════════════════════════════════════════════════════════════════════════
86
+
87
+ if 'system' not in st.session_state:
88
+ st.session_state.system = None
89
+
90
+ if 'chat_history' not in st.session_state:
91
+ st.session_state.chat_history = []
92
+
93
+ if 'documents_loaded' not in st.session_state:
94
+ st.session_state.documents_loaded = []
95
+
96
+ # ═══════════════════════════════════════════════════════════════════════════
97
+ # SIDEBAR - CONFIGURATION & DOCUMENT MANAGEMENT
98
+ # ═══════════════════════════════════════════════════════════════════════════
99
+
100
+ with st.sidebar:
101
+ st.markdown("## βš™οΈ Configuration")
102
+
103
+ # API Token
104
+ with st.expander("πŸ”‘ Hugging Face Token", expanded=not st.session_state.system):
105
+ hf_token = st.text_input(
106
+ "Enter your token",
107
+ type="password",
108
+ help="Get your token from https://huggingface.co/settings/tokens"
109
+ )
110
+
111
+ if st.button("Initialize System", disabled=not hf_token):
112
+ if SYSTEM_AVAILABLE:
113
+ with st.spinner("Initializing Advanced RAG System..."):
114
+ try:
115
+ st.session_state.system = AdvancedRAGSystem(token=hf_token)
116
+ st.success("βœ… System initialized!")
117
+ except Exception as e:
118
+ st.error(f"❌ Initialization failed: {e}")
119
+ else:
120
+ st.error("System code not available")
121
+
122
+ st.markdown("---")
123
+
124
+ # Document Upload
125
+ st.markdown("## πŸ“ Document Management")
126
+
127
+ uploaded_files = st.file_uploader(
128
+ "Upload documents",
129
+ type=['pdf', 'txt'],
130
+ accept_multiple_files=True,
131
+ help="Upload PDF or TXT files to add to the knowledge base"
132
+ )
133
+
134
+ if st.button("Process Documents", disabled=not uploaded_files or not st.session_state.system):
135
+ if uploaded_files and st.session_state.system:
136
+ with st.spinner("Processing documents..."):
137
+ try:
138
+ # Save uploaded files temporarily
139
+ temp_dir = Path("temp_uploads")
140
+ temp_dir.mkdir(exist_ok=True)
141
+
142
+ file_paths = []
143
+ for uploaded_file in uploaded_files:
144
+ file_path = temp_dir / uploaded_file.name
145
+ with open(file_path, "wb") as f:
146
+ f.write(uploaded_file.getbuffer())
147
+ file_paths.append(str(file_path))
148
+
149
+ # Ingest documents
150
+ st.session_state.system.ingest_documents(file_paths)
151
+ st.session_state.documents_loaded.extend([f.name for f in uploaded_files])
152
+
153
+ st.success(f"βœ… Processed {len(uploaded_files)} documents!")
154
+
155
+ except Exception as e:
156
+ st.error(f"❌ Error processing documents: {e}")
157
+
158
+ # Show loaded documents
159
+ if st.session_state.documents_loaded:
160
+ st.markdown("### πŸ“š Loaded Documents")
161
+ for doc in st.session_state.documents_loaded:
162
+ st.markdown(f"- {doc}")
163
+
164
+ st.markdown("---")
165
+
166
+ # Advanced Options
167
+ with st.expander("πŸ”§ Advanced Options"):
168
+ use_multi_query = st.checkbox("Multi-Query Retrieval", value=True,
169
+ help="Generate multiple query variations (improves accuracy)")
170
+ use_reranking = st.checkbox("Re-ranking", value=True,
171
+ help="Re-rank results using cross-encoder (40% better)")
172
+ show_sources = st.checkbox("Show Source Details", value=True)
173
+ show_queries = st.checkbox("Show Generated Queries", value=False)
174
+
175
+ # Reset button
176
+ if st.button("πŸ”„ Reset Conversation"):
177
+ if st.session_state.system:
178
+ st.session_state.system.reset_conversation()
179
+ st.session_state.chat_history = []
180
+ st.rerun()
181
+
182
+ st.markdown("---")
183
+
184
+ # Stats
185
+ if st.session_state.system:
186
+ st.markdown("### πŸ“Š Statistics")
187
+ col1, col2 = st.columns(2)
188
+ with col1:
189
+ st.metric("Documents", len(st.session_state.documents_loaded))
190
+ with col2:
191
+ st.metric("Messages", len(st.session_state.chat_history))
192
+
193
+ # ═══════════════════════════════════════════════════════════════════════════
194
+ # MAIN AREA - HEADER
195
+ # ═══════════════════════════════════════════════════════════════════════════
196
+
197
+ st.markdown('<div class="main-header">πŸ€– Advanced RAG System 2025</div>', unsafe_allow_html=True)
198
+ st.markdown('<div class="sub-header">State-of-the-art Retrieval-Augmented Generation with Multi-Query, Hybrid Search & Re-ranking</div>', unsafe_allow_html=True)
199
+
200
+ # System status indicator
201
+ if st.session_state.system:
202
+ st.success("βœ… System Active | Models: meta-llama/Llama-3.1-8B (LLM) + all-MiniLM-L6-v2 (Embeddings)")
203
+ else:
204
+ st.warning("⚠️ Please initialize the system in the sidebar")
205
+
206
+ st.markdown("---")
207
+
208
+ # ═══════════════════════════════════════════════════════════════════════════
209
+ # MAIN AREA - CHAT INTERFACE
210
+ # ═══════════════════════════════════════════════════════════════════════════
211
+
212
+ # Display chat history
213
+ chat_container = st.container()
214
+
215
+ with chat_container:
216
+ for message in st.session_state.chat_history:
217
+ # User message
218
+ st.markdown(f"""
219
+ <div class="chat-message user-message">
220
+ <strong>πŸ§‘ You:</strong><br>
221
+ {message['question']}
222
+ </div>
223
+ """, unsafe_allow_html=True)
224
+
225
+ # Assistant message
226
+ st.markdown(f"""
227
+ <div class="chat-message assistant-message">
228
+ <strong>πŸ€– Assistant:</strong><br>
229
+ {message['answer']}
230
+ </div>
231
+ """, unsafe_allow_html=True)
232
+
233
+ # Sources
234
+ if show_sources and 'sources' in message:
235
+ with st.expander(f"πŸ“š Sources ({message['num_sources']} documents)"):
236
+ for i, doc in enumerate(message['sources'], 1):
237
+ source = doc.metadata.get('filename', 'Unknown')
238
+ st.markdown(f"""
239
+ <div class="source-box">
240
+ <strong>Source {i}:</strong> {source}<br>
241
+ <em>{doc.page_content[:200]}...</em>
242
+ </div>
243
+ """, unsafe_allow_html=True)
244
+
245
+ # Generated queries
246
+ if show_queries and 'queries_used' in message and len(message['queries_used']) > 1:
247
+ with st.expander(f"πŸ” Generated Queries ({len(message['queries_used'])})"):
248
+ for i, query in enumerate(message['queries_used'], 1):
249
+ st.markdown(f"{i}. {query}")
250
+
251
+ # Chat input
252
+ st.markdown("---")
253
+
254
+ if st.session_state.system:
255
+ user_input = st.chat_input("Ask a question about your documents...")
256
+
257
+ if user_input:
258
+ # Add user message to history
259
+ with st.spinner("πŸ€” Thinking..."):
260
+ try:
261
+ # Query the system
262
+ result = st.session_state.system.query(
263
+ user_input,
264
+ use_multi_query=use_multi_query,
265
+ use_reranking=use_reranking
266
+ )
267
+
268
+ # Add to chat history
269
+ st.session_state.chat_history.append({
270
+ 'question': user_input,
271
+ 'answer': result['answer'],
272
+ 'sources': result['sources'],
273
+ 'num_sources': result['num_sources'],
274
+ 'queries_used': result['queries_used'],
275
+ 'timestamp': datetime.now().isoformat()
276
+ })
277
+
278
+ # Rerun to update display
279
+ st.rerun()
280
+
281
+ except Exception as e:
282
+ st.error(f"❌ Error: {e}")
283
+ else:
284
+ st.info("πŸ‘ˆ Initialize the system in the sidebar to start chatting")
285
+
286
+ # ═══════════════════════════════════════════════════════════════════════════
287
+ # FOOTER
288
+ # ═══════════════════════════════════════════════════════════════════════════
289
+
290
+ st.markdown("---")
291
+
292
+ col1, col2, col3 = st.columns(3)
293
+
294
+ with col1:
295
+ st.markdown("""
296
+ **2025 Features:**
297
+ - βœ… Multi-Query Retrieval
298
+ - βœ… Hybrid Search
299
+ - βœ… Re-ranking
300
+ """)
301
+
302
+ with col2:
303
+ st.markdown("""
304
+ **Technologies:**
305
+ - LangChain
306
+ - Hugging Face
307
+ - ChromaDB
308
+ """)
309
+
310
+ with col3:
311
+ st.markdown("""
312
+ **Links:**
313
+ - [GitHub](#)
314
+ - [Documentation](#)
315
+ - [Report Issue](#)
316
+ """)
317
+
318
+ st.markdown("---")
319
+ st.markdown("<div style='text-align: center; color: #666;'>Built with ❀️ using state-of-the-art 2025 techniques</div>", unsafe_allow_html=True)