cryogenic22 commited on
Commit
cf3279a
Β·
verified Β·
1 Parent(s): 62f0b1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +257 -74
app.py CHANGED
@@ -14,6 +14,7 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
14
  from langchain_community.chat_models import ChatOpenAI
15
  from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
16
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, BaseMessage
 
17
  import tempfile
18
  from utils.database import (
19
  create_connection,
@@ -33,7 +34,8 @@ from utils.database import (
33
  get_chat_messages,
34
  get_document_tags,
35
  add_document_tags,
36
- delete_collection)
 
37
 
38
 
39
  @dataclass
@@ -49,73 +51,54 @@ class SessionState:
49
  reinitialize_chat: bool = False
50
 
51
 
52
- def display_header():
53
- """Display the application header with navigation."""
54
- # Add custom CSS for header styling
55
- st.markdown(
56
- """
57
- <style>
58
- .stButton > button {
59
- width: 100%;
60
- margin-bottom: 0;
61
- }
62
- .header-button {
63
- margin: 0 5px;
64
- }
65
- </style>
66
- """,
67
- unsafe_allow_html=True
68
- )
69
-
70
- # Create header layout
71
- header_container = st.container()
72
- with header_container:
73
- # Main header row
74
- col1, col2, col3, col4, col5, col6 = st.columns([1.5, 2.5, 1, 1, 1, 1])
75
-
76
- # Logo
77
- with col1:
78
- if os.path.exists("img/logo.png"):
79
- st.image("img/logo.png", width=150)
80
- else:
81
- st.info("Logo missing: img/logo.png")
82
-
83
- # Title
84
- with col2:
85
- st.markdown("##### Synaptyx RFP Analyzer Agent")
86
-
87
- # Navigation Buttons
88
- with col3:
89
- if st.button("🏠 Home", use_container_width=True, key="home_btn"):
90
- st.session_state.chat_ready = False
91
- st.session_state.messages = []
92
- st.session_state.current_chat_id = None
93
- st.session_state.show_explorer = False
94
- st.rerun()
95
-
96
- with col4:
97
- if st.button("πŸ“š Explorer", use_container_width=True, key="explorer_btn"):
98
- st.session_state.show_explorer = True
99
- st.session_state.chat_ready = False
100
- st.rerun()
101
-
102
- with col5:
103
- if st.session_state.chat_ready:
104
- if st.button("πŸ’­ New Chat", use_container_width=True, key="chat_btn"):
105
- st.session_state.messages = []
106
- st.session_state.current_chat_id = None
107
- st.rerun()
108
-
109
- with col6:
110
- if st.button("πŸ“ Upload", use_container_width=True, key="upload_btn"):
111
- st.session_state.show_collection_dialog = True
112
- st.rerun()
113
-
114
- # Add divider after header
115
- st.divider()
116
 
117
 
118
- async def process_document(file_path: str, collection_id: Optional[int] = None) -> Tuple[List, str]:
119
  """Process a document with automatic tagging."""
120
  try:
121
  # Load PDF
@@ -126,7 +109,7 @@ async def process_document(file_path: str, collection_id: Optional[int] = None)
126
  full_content = "\n".join(doc.page_content for doc in documents)
127
 
128
  # Generate tags
129
- tags = await generate_document_tags(full_content)
130
 
131
  # Create text splitter for chunks
132
  text_splitter = RecursiveCharacterTextSplitter(
@@ -153,7 +136,7 @@ async def process_document(file_path: str, collection_id: Optional[int] = None)
153
  return [], "", []
154
 
155
 
156
- async def handle_document_upload(uploaded_files: List, collection_id: Optional[int] = None) -> bool:
157
  """Handle document upload with progress tracking and auto-tagging."""
158
  try:
159
  progress_container = st.empty()
@@ -183,7 +166,7 @@ async def handle_document_upload(uploaded_files: List, collection_id: Optional[i
183
  tmp_file.flush()
184
 
185
  # Process document with tagging
186
- chunks, content, tags = await process_document(tmp_file.name, collection_id)
187
 
188
  # Store in database
189
  doc_id = insert_document(st.session_state.db_conn, uploaded_file.name, content)
@@ -228,6 +211,72 @@ async def handle_document_upload(uploaded_files: List, collection_id: Optional[i
228
  return False
229
 
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  def display_collection_management():
232
  """Display collection management interface."""
233
  st.header("πŸ“ Collection Management")
@@ -271,7 +320,7 @@ def display_collection_management():
271
  if uploaded_files:
272
  collection_id = selected_collection[1]
273
  with st.spinner("Processing documents..."):
274
- if await handle_document_upload(uploaded_files, collection_id=collection_id):
275
  st.success("Documents added to collection successfully!")
276
  st.rerun()
277
 
@@ -350,6 +399,140 @@ def display_chat_interface():
350
  st.rerun()
351
 
352
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  def main():
354
  """Main application function with improved state management."""
355
  st.set_page_config(
@@ -358,7 +541,7 @@ def main():
358
  initial_sidebar_state="collapsed"
359
  )
360
 
361
- # Initialize session state
362
  initialize_session_state()
363
 
364
  # Initialize database connection
@@ -372,13 +555,13 @@ def main():
372
  # Display header
373
  display_header()
374
 
375
- # Show collection management if triggered
376
  if st.session_state.show_collection_dialog:
377
  display_collection_management()
378
- # Display chat interface if ready
379
  elif st.session_state.chat_ready:
380
  display_chat_interface()
381
- # Show welcome screen
 
382
  else:
383
  display_welcome_screen()
384
 
 
14
  from langchain_community.chat_models import ChatOpenAI
15
  from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
16
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, BaseMessage
17
+ from langchain_community.document_loaders import PyPDFLoader
18
  import tempfile
19
  from utils.database import (
20
  create_connection,
 
34
  get_chat_messages,
35
  get_document_tags,
36
  add_document_tags,
37
+ delete_collection,
38
+ insert_document)
39
 
40
 
41
  @dataclass
 
51
  reinitialize_chat: bool = False
52
 
53
 
54
+ def initialize_session_state():
55
+ """Initialize session state with default values."""
56
+ defaults = SessionState()
57
+ if 'initialized' not in st.session_state:
58
+ # Setup data paths
59
+ data_path = Path('/data' if os.path.exists('/data') else 'data')
60
+ vector_store_path = data_path / 'vector_stores'
61
+
62
+ # Create necessary directories
63
+ data_path.mkdir(parents=True, exist_ok=True)
64
+ vector_store_path.mkdir(parents=True, exist_ok=True)
65
+
66
+ # Initialize session state
67
+ st.session_state.update({
68
+ 'show_collection_dialog': defaults.show_collection_dialog,
69
+ 'selected_collection': defaults.selected_collection,
70
+ 'chat_ready': defaults.chat_ready,
71
+ 'messages': [] if defaults.messages is None else defaults.messages,
72
+ 'current_chat_id': defaults.current_chat_id,
73
+ 'vector_store': defaults.vector_store,
74
+ 'qa_system': defaults.qa_system,
75
+ 'reinitialize_chat': defaults.reinitialize_chat,
76
+ 'initialized': True,
77
+ 'data_path': data_path,
78
+ 'vector_store_path': vector_store_path,
79
+ 'show_explorer': False
80
+ })
81
+
82
+
83
+ def generate_document_tags(content: str) -> List[str]:
84
+ """Generate tags for a document using AI."""
85
+ try:
86
+ llm = ChatOpenAI(temperature=0.2, model="gpt-3.5-turbo")
87
+
88
+ prompt = """Analyze the following document content and generate relevant tags/keywords.
89
+ Focus on key themes, topics, and important terminology.
90
+ Return only the tags as a comma-separated list.
91
+ Content: {content}"""
92
+
93
+ response = llm.invoke(prompt.format(content=content[:2000])) # Use first 2000 chars
94
+ tags = [tag.strip() for tag in response.split(',')]
95
+ return tags
96
+ except Exception as e:
97
+ st.error(f"Error generating tags: {e}")
98
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
 
101
+ def process_document(file_path: str, collection_id: Optional[int] = None) -> Tuple[List, str, List[str]]:
102
  """Process a document with automatic tagging."""
103
  try:
104
  # Load PDF
 
109
  full_content = "\n".join(doc.page_content for doc in documents)
110
 
111
  # Generate tags
112
+ tags = generate_document_tags(full_content)
113
 
114
  # Create text splitter for chunks
115
  text_splitter = RecursiveCharacterTextSplitter(
 
136
  return [], "", []
137
 
138
 
139
+ def handle_document_upload(uploaded_files: List, collection_id: Optional[int] = None) -> bool:
140
  """Handle document upload with progress tracking and auto-tagging."""
141
  try:
142
  progress_container = st.empty()
 
166
  tmp_file.flush()
167
 
168
  # Process document with tagging
169
+ chunks, content, tags = process_document(tmp_file.name, collection_id)
170
 
171
  # Store in database
172
  doc_id = insert_document(st.session_state.db_conn, uploaded_file.name, content)
 
211
  return False
212
 
213
 
214
+ def display_header():
215
+ """Display the application header with navigation."""
216
+ # Add custom CSS for header styling
217
+ st.markdown(
218
+ """
219
+ <style>
220
+ .stButton > button {
221
+ width: 100%;
222
+ margin-bottom: 0;
223
+ }
224
+ .header-button {
225
+ margin: 0 5px;
226
+ }
227
+ </style>
228
+ """,
229
+ unsafe_allow_html=True
230
+ )
231
+
232
+ # Create header layout
233
+ header_container = st.container()
234
+ with header_container:
235
+ # Main header row
236
+ col1, col2, col3, col4, col5, col6 = st.columns([1.5, 2.5, 1, 1, 1, 1])
237
+
238
+ # Logo
239
+ with col1:
240
+ if os.path.exists("img/logo.png"):
241
+ st.image("img/logo.png", width=150)
242
+ else:
243
+ st.info("Logo missing: img/logo.png")
244
+
245
+ # Title
246
+ with col2:
247
+ st.markdown("##### Synaptyx RFP Analyzer Agent")
248
+
249
+ # Navigation Buttons
250
+ with col3:
251
+ if st.button("🏠 Home", use_container_width=True, key="home_btn"):
252
+ st.session_state.chat_ready = False
253
+ st.session_state.messages = []
254
+ st.session_state.current_chat_id = None
255
+ st.session_state.show_explorer = False
256
+ st.rerun()
257
+
258
+ with col4:
259
+ if st.button("πŸ“š Explorer", use_container_width=True, key="explorer_btn"):
260
+ st.session_state.show_explorer = True
261
+ st.session_state.chat_ready = False
262
+ st.rerun()
263
+
264
+ with col5:
265
+ if st.session_state.chat_ready:
266
+ if st.button("πŸ’­ New Chat", use_container_width=True, key="chat_btn"):
267
+ st.session_state.messages = []
268
+ st.session_state.current_chat_id = None
269
+ st.rerun()
270
+
271
+ with col6:
272
+ if st.button("πŸ“ Upload", use_container_width=True, key="upload_btn"):
273
+ st.session_state.show_collection_dialog = True
274
+ st.rerun()
275
+
276
+ # Add divider after header
277
+ st.divider()
278
+
279
+
280
  def display_collection_management():
281
  """Display collection management interface."""
282
  st.header("πŸ“ Collection Management")
 
320
  if uploaded_files:
321
  collection_id = selected_collection[1]
322
  with st.spinner("Processing documents..."):
323
+ if handle_document_upload(uploaded_files, collection_id=collection_id):
324
  st.success("Documents added to collection successfully!")
325
  st.rerun()
326
 
 
399
  st.rerun()
400
 
401
 
402
+ def display_welcome_screen():
403
+ """Display welcome screen with quick actions."""
404
+ st.header("Quick Start")
405
+
406
+ col1, col2 = st.columns([3, 2])
407
+
408
+ with col1:
409
+ # Upload new documents
410
+ st.markdown("### Upload Documents")
411
+ collection_id = None
412
+ collections = get_collections(st.session_state.db_conn)
413
+
414
+ if collections:
415
+ selected_collection = st.selectbox(
416
+ "Select Collection (Optional)",
417
+ options=[("None", None)] + [(c["name"], c["id"]) for c in collections],
418
+ format_func=lambda x: x[0]
419
+ )
420
+ collection_id = selected_collection[1] if selected_collection[0] != "None" else None
421
+
422
+ # Add new collection button
423
+ if st.button("Create New Collection", use_container_width=True):
424
+ st.session_state.show_collection_dialog = True
425
+ st.rerun()
426
+
427
+ uploaded_files = st.file_uploader(
428
+ "Upload Documents",
429
+ type=['pdf'],
430
+ accept_multiple_files=True,
431
+ help="Upload PDF documents to start analyzing"
432
+ )
433
+
434
+ if uploaded_files:
435
+ with st.spinner("Processing documents..."):
436
+ if handle_document_upload(uploaded_files, collection_id=collection_id):
437
+ initialize_chat_system(collection_id)
438
+ st.rerun()
439
+
440
+ with col2:
441
+ # Display existing collections
442
+ st.header("Collections")
443
+ if collections:
444
+ for collection in collections:
445
+ with st.expander(f"πŸ“ {collection['name']} ({collection['doc_count']} documents)"):
446
+ st.write(collection.get('description', ''))
447
+ if st.button("Start Chat", key=f"chat_{collection['id']}", use_container_width=True):
448
+ st.session_state.selected_collection = collection
449
+ if initialize_chat_system(collection['id']):
450
+ st.rerun()
451
+
452
+ # Show recent documents
453
+ st.header("Recent Documents")
454
+ recent_docs = get_recent_documents(st.session_state.db_conn, limit=5)
455
+ for doc in recent_docs:
456
+ with st.expander(f"πŸ“„ {doc['name']}"):
457
+ st.caption(f"Upload date: {doc['upload_date']}")
458
+ if doc['collections']:
459
+ st.caption(f"Collections: {', '.join(doc['collections'])}")
460
+ if st.button("Start Chat", key=f"doc_{doc['id']}", use_container_width=True):
461
+ if initialize_chat_system():
462
+ st.rerun()
463
+
464
+
465
+ def display_document_chunks():
466
+ """Display document chunks with search and filtering capabilities."""
467
+ st.subheader("Document Chunk Explorer")
468
+
469
+ # Get all documents
470
+ documents = get_all_documents(st.session_state.db_conn)
471
+ if not documents:
472
+ st.info("No documents available.")
473
+ return
474
+
475
+ # Document selection
476
+ selected_doc = st.selectbox(
477
+ "Select Document",
478
+ options=documents,
479
+ format_func=lambda x: x['name']
480
+ )
481
+ if not selected_doc:
482
+ return
483
+
484
+ try:
485
+ # Load vector store for selected document
486
+ embeddings = get_embeddings_model()
487
+ chunks = []
488
+
489
+ # Search functionality
490
+ search_query = st.text_input("πŸ” Search within chunks")
491
+
492
+ if search_query and st.session_state.vector_store:
493
+ chunks = st.session_state.vector_store.similarity_search(search_query, k=5)
494
+ elif st.session_state.vector_store:
495
+ chunks = st.session_state.vector_store.similarity_search("", k=100)
496
+
497
+ # Display chunks with metadata
498
+ st.markdown("### Document Chunks")
499
+
500
+ # Filtering options
501
+ col1, col2 = st.columns(2)
502
+ with col1:
503
+ chunk_size = st.slider("Preview Size", 100, 1000, 500)
504
+ with col2:
505
+ sort_by = st.selectbox("Sort By", ["Relevance", "Position"])
506
+
507
+ # Display chunks in an organized way
508
+ for i, chunk in enumerate(chunks):
509
+ with st.expander(f"Chunk {i+1} | Source: {chunk.metadata.get('source', 'Unknown')}"):
510
+ # Content preview
511
+ st.markdown("**Content:**")
512
+ st.text(chunk.page_content[:chunk_size] + "..." if len(chunk.page_content) > chunk_size else chunk.page_content)
513
+
514
+ # Metadata
515
+ st.markdown("**Metadata:**")
516
+ for key, value in chunk.metadata.items():
517
+ st.text(f"{key}: {value}")
518
+
519
+ # Actions
520
+ col1, col2 = st.columns(2)
521
+ with col1:
522
+ if st.button("Copy", key=f"copy_{i}"):
523
+ st.write("Content copied to clipboard!")
524
+ with col2:
525
+ if st.button("Start Chat", key=f"chat_{i}"):
526
+ initialize_chat_system()
527
+ st.session_state.messages.append(
528
+ HumanMessage(content=f"Tell me about: {chunk.page_content[:100]}...")
529
+ )
530
+ st.rerun()
531
+
532
+ except Exception as e:
533
+ st.error(f"Error loading document chunks: {e}")
534
+
535
+
536
  def main():
537
  """Main application function with improved state management."""
538
  st.set_page_config(
 
541
  initial_sidebar_state="collapsed"
542
  )
543
 
544
+ # Initialize session state with paths
545
  initialize_session_state()
546
 
547
  # Initialize database connection
 
555
  # Display header
556
  display_header()
557
 
558
+ # Show different views based on application state
559
  if st.session_state.show_collection_dialog:
560
  display_collection_management()
 
561
  elif st.session_state.chat_ready:
562
  display_chat_interface()
563
+ elif st.session_state.show_explorer:
564
+ display_document_chunks()
565
  else:
566
  display_welcome_screen()
567