dnj0 commited on
Commit
f482e1d
Β·
verified Β·
1 Parent(s): a6680e7

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +74 -135
src/app.py CHANGED
@@ -1,33 +1,21 @@
1
- """
2
- Multimodal RAG LLM System - Streamlit App
3
- Complete working version with VISUAL image analysis using gpt-4o
4
- """
5
 
6
  import streamlit as st
7
  import os
8
  from pathlib import Path
9
 
10
- # Import optimized versions
11
  from pdf_parser import PDFParser
12
  from vector_store import VectorStore
13
- from rag_system import VisualMultimodalRAG # NEW - Vision model
14
  from config import UPLOAD_FOLDER, MAX_PDF_SIZE_MB
15
 
16
 
17
- # ============================================================================
18
- # PAGE CONFIGURATION
19
- # ============================================================================
20
-
21
  st.set_page_config(
22
- page_title="πŸ“„ Multimodal RAG LLM System",
23
- page_icon="πŸ€–",
24
  layout="wide",
25
  initial_sidebar_state="expanded"
26
  )
27
 
28
- # ============================================================================
29
- # SESSION STATE INITIALIZATION
30
- # ============================================================================
31
 
32
  if 'api_key_set' not in st.session_state:
33
  st.session_state.api_key_set = False
@@ -35,7 +23,7 @@ if 'api_key_set' not in st.session_state:
35
  if 'api_key' not in st.session_state:
36
  st.session_state.api_key = None
37
 
38
- if 'visual_rag_system' not in st.session_state: # NEW - Vision model
39
  st.session_state.visual_rag_system = None
40
 
41
  if 'vector_store' not in st.session_state:
@@ -56,30 +44,17 @@ if 'current_images' not in st.session_state:
56
  if 'current_tables' not in st.session_state:
57
  st.session_state.current_tables = None
58
 
59
- if 'processing_results' not in st.session_state: # NEW
60
  st.session_state.processing_results = None
61
 
62
  if 'answering_rag' not in st.session_state:
63
  st.session_state.answering_rag = None
64
 
65
 
66
- # ============================================================================
67
- # MAIN HEADER
68
- # ============================================================================
69
 
70
- st.title("πŸ“„ Multimodal RAG LLM System")
71
- st.markdown("""
72
- Process PDF documents with visual image analysis:
73
- - **PDF Parser** with OCR for Russian & English
74
- - **Visual Analysis** (gpt-4o) for image understanding
75
- - **Vector Store** (ChromaDB) for semantic search
76
- - **Individual Component** summarization and storage
77
- """)
78
 
79
 
80
- # ============================================================================
81
- # SIDEBAR - CONFIGURATION
82
- # ============================================================================
83
 
84
  with st.sidebar:
85
  st.header("βš™οΈ Configuration")
@@ -97,10 +72,9 @@ with st.sidebar:
97
  st.session_state.api_key = api_key
98
  st.session_state.api_key_set = True
99
 
100
- # Initialize RAG systems if not already done
101
  if st.session_state.visual_rag_system is None:
102
  try:
103
- st.session_state.visual_rag_system = VisualMultimodalRAG(api_key=api_key, debug=True) # NEW
104
  st.session_state.vector_store = VectorStore()
105
  st.session_state.parser = PDFParser(debug=True)
106
  st.success("βœ… API Key set & systems initialized")
@@ -112,7 +86,6 @@ with st.sidebar:
112
 
113
  st.divider()
114
 
115
- # Vector Store Status
116
  st.subheader("πŸ“Š Vector Store Status")
117
  if st.session_state.vector_store:
118
  try:
@@ -127,7 +100,6 @@ with st.sidebar:
127
 
128
  st.divider()
129
 
130
- # Document Management
131
  st.subheader("πŸ“ Document Management")
132
  if st.button("πŸ”„ Clear Vector Store"):
133
  if st.session_state.vector_store:
@@ -138,11 +110,7 @@ with st.sidebar:
138
  st.error(f"Error clearing store: {e}")
139
 
140
 
141
- # ============================================================================
142
- # MAIN CONTENT
143
- # ============================================================================
144
 
145
- # Upload Section
146
  st.header("πŸ“€ Upload PDF Document")
147
 
148
  uploaded_file = st.file_uploader(
@@ -152,7 +120,6 @@ uploaded_file = st.file_uploader(
152
  )
153
 
154
  if uploaded_file is not None:
155
- # Save uploaded file
156
  upload_path = Path(UPLOAD_FOLDER)
157
  upload_path.mkdir(exist_ok=True)
158
 
@@ -193,16 +160,16 @@ if uploaded_file is not None:
193
  st.metric("πŸ“‹ Tables", len(tables))
194
 
195
  # Show image OCR details
196
- if images:
197
- st.subheader("πŸ–ΌοΈ Extracted Images")
198
- for idx, img in enumerate(images):
199
- ocr_text = img.get('ocr_text', '')
200
- ocr_len = len(ocr_text)
201
-
202
- if ocr_len > 0:
203
- st.success(f"βœ… Image {idx}: {ocr_len} characters (OCR)")
204
- else:
205
- st.warning(f"⚠️ Image {idx}: No OCR text (will use visual analysis)")
206
 
207
  st.success("βœ… PDF parsing complete!")
208
 
@@ -211,40 +178,29 @@ if uploaded_file is not None:
211
  print(f"Error: {e}")
212
 
213
 
214
- # ============================================================================
215
- # VISUAL IMAGE ANALYSIS & COMPONENT STORAGE
216
- # ============================================================================
217
 
218
  st.divider()
219
- st.header("πŸ–ΌοΈ Visual Analysis & Storage")
220
 
221
- st.info("""
222
- **How it works:**
223
- 1. Images are sent to gpt-4o for visual analysis (not just text OCR)
224
- 2. Text is split into chunks and each chunk is summarized
225
- 3. Tables are analyzed individually
226
- 4. ALL summaries are stored in the vector store for semantic search
227
- """)
228
 
229
- if st.button("πŸ–ΌοΈ Analyze Images Visually & Store Components"):
230
  if not st.session_state.api_key_set:
231
  st.error("❌ Please set OpenAI API key first")
232
  elif st.session_state.current_text is None:
233
  st.error("❌ Please parse a PDF document first")
234
  else:
235
  try:
236
- with st.spinner("πŸ–ΌοΈ Analyzing images visually with gpt-4o..."):
237
  print(f"\n{'='*70}")
238
  print(f"VISUAL IMAGE ANALYSIS")
239
  print(f"{'='*70}")
240
 
241
- # Process with visual analysis
242
  visual_rag = st.session_state.visual_rag_system
243
  vector_store = st.session_state.vector_store
244
 
245
  results = visual_rag.process_and_store_document(
246
  text=st.session_state.current_text,
247
- images=st.session_state.current_images, # Actual images sent to gpt-4o
248
  tables=st.session_state.current_tables,
249
  vector_store=vector_store,
250
  doc_id=st.session_state.current_document or "current_doc"
@@ -266,65 +222,59 @@ if st.button("πŸ–ΌοΈ Analyze Images Visually & Store Components"):
266
  st.metric("πŸ“Š Total Stored in Vector", results['total_stored'])
267
 
268
  # Show image visual analyses
269
- if results['image_visual_analyses']:
270
- st.subheader("πŸ–ΌοΈ Visual Image Analyses (gpt-4o)")
271
- for img_analysis in results['image_visual_analyses']:
272
- with st.expander(f"Image {img_analysis['image_index']} - Visual Analysis"):
273
- st.write("**Visual Analysis by gpt-4o:**")
274
- st.write(img_analysis['visual_analysis'])
275
-
276
- st.write("**Image Path:**")
277
- st.code(img_analysis['image_path'])
278
-
279
- if img_analysis['ocr_text']:
280
- st.write("**OCR Text (backup):**")
281
- st.text(img_analysis['ocr_text'][:500])
282
 
283
  # Show text chunk summaries
284
- if results['text_summaries']:
285
- st.subheader("πŸ“ Text Chunk Summaries")
286
- for chunk_summary in results['text_summaries']:
287
- with st.expander(
288
- f"Chunk {chunk_summary['chunk_index']} "
289
- f"({chunk_summary['chunk_length']} chars)"
290
- ):
291
- st.write("**Summary:**")
292
- st.write(chunk_summary['summary'])
293
- st.write("**Original Text (first 500 chars):**")
294
- st.text(chunk_summary['original_text'])
295
 
296
  # Show table analyses
297
- if results['table_summaries']:
298
- st.subheader("πŸ“‹ Table Analyses")
299
- for table_summary in results['table_summaries']:
300
- with st.expander(
301
- f"Table {table_summary['table_index']} "
302
- f"({table_summary['table_length']} chars)"
303
- ):
304
- st.write("**Analysis:**")
305
- st.write(table_summary['summary'])
306
- st.write("**Original Content (first 500 chars):**")
307
- st.text(table_summary['original_content'])
308
 
309
- print(f"\nβœ… Visual analysis processing complete!")
310
 
311
  except Exception as e:
312
- st.error(f"❌ Error during visual analysis: {e}")
313
  print(f"Error: {e}")
314
 
315
 
316
- # ============================================================================
317
- # QUESTION & ANSWERING
318
- # ============================================================================
319
-
320
  st.divider()
321
  st.header("❓ Ask Questions About Document")
322
 
323
- # Initialize answering system if not done
324
  if 'answering_rag' not in st.session_state:
325
  st.session_state.answering_rag = None
326
 
327
- # Create answering system when API key is set
328
  if st.session_state.api_key_set and st.session_state.answering_rag is None:
329
  from rag_system import AnsweringRAG
330
  st.session_state.answering_rag = AnsweringRAG(api_key=st.session_state.api_key, debug=True)
@@ -349,10 +299,8 @@ if st.button("πŸ” Search & Generate Answer"):
349
  print(f"QUESTION: {question}")
350
  print(f"{'='*70}")
351
 
352
- # Search vector store
353
  store = st.session_state.vector_store
354
 
355
- # Add documents to store if needed
356
  doc_name = st.session_state.current_document or "current_doc"
357
  doc_data = {
358
  'text': st.session_state.current_text,
@@ -361,27 +309,23 @@ if st.button("πŸ” Search & Generate Answer"):
361
  }
362
  store.add_documents(doc_data, doc_name)
363
 
364
- # Search for relevant results
365
  search_results = store.search(question, n_results=5)
366
 
367
  print(f"\nπŸ“Š Search Results Found: {len(search_results)}")
368
 
369
- # Analyze results and generate answer
370
  answering_rag = st.session_state.answering_rag
371
  result = answering_rag.analyze_and_answer(question, search_results)
372
 
373
- # Display answer prominently
374
  st.success("βœ… Analysis complete!")
375
 
376
  st.subheader("πŸ“ Answer")
377
 
378
- # Show confidence level
379
  col1, col2, col3 = st.columns(3)
380
  with col1:
381
  confidence_color = {
382
- 'high': '🟒',
383
- 'medium': '🟑',
384
- 'low': 'πŸ”΄'
385
  }.get(result['confidence'], 'βšͺ')
386
  st.metric("Confidence", f"{confidence_color} {result['confidence'].upper()}")
387
  with col2:
@@ -390,7 +334,6 @@ if st.button("πŸ” Search & Generate Answer"):
390
  if result['sources_used'] > 0:
391
  st.metric("Avg Relevance", f"{sum(1-r.get('distance',0) for r in search_results)/len(search_results):.0%}")
392
 
393
- # Display the generated answer
394
  st.write(result['answer'])
395
 
396
  # Show sources
@@ -413,26 +356,22 @@ if st.button("πŸ” Search & Generate Answer"):
413
  print(f"Error: {e}")
414
 
415
 
416
- # ============================================================================
417
- # FOOTER
418
- # ============================================================================
419
-
420
  st.divider()
421
 
422
- col1, col2, col3 = st.columns(3)
423
 
424
- with col1:
425
- st.info("πŸ“– **Text Processing**: PyPDF2 extraction with UTF-8 support")
426
 
427
- with col2:
428
- st.info("πŸ–ΌοΈ **Visual Analysis**: GPT-4o vision for image understanding")
429
 
430
- with col3:
431
- st.info("πŸ“Š **Vector Storage**: ChromaDB with auto-persist")
432
 
433
- st.caption(
434
- "Multimodal RAG System | "
435
- "Visual Image Analysis | "
436
- "Russian Language Support | "
437
- "Individual Component Summarization"
438
- )
 
 
 
 
 
1
 
2
  import streamlit as st
3
  import os
4
  from pathlib import Path
5
 
6
+
7
  from pdf_parser import PDFParser
8
  from vector_store import VectorStore
9
+ from rag_system import VisualMultimodalRAG
10
  from config import UPLOAD_FOLDER, MAX_PDF_SIZE_MB
11
 
12
 
 
 
 
 
13
  st.set_page_config(
14
+ page_title="πŸ“„ Multimodal RAG LLM System (PDF Parsing)",
 
15
  layout="wide",
16
  initial_sidebar_state="expanded"
17
  )
18
 
 
 
 
19
 
20
  if 'api_key_set' not in st.session_state:
21
  st.session_state.api_key_set = False
 
23
  if 'api_key' not in st.session_state:
24
  st.session_state.api_key = None
25
 
26
+ if 'visual_rag_system' not in st.session_state:
27
  st.session_state.visual_rag_system = None
28
 
29
  if 'vector_store' not in st.session_state:
 
44
  if 'current_tables' not in st.session_state:
45
  st.session_state.current_tables = None
46
 
47
+ if 'processing_results' not in st.session_state:
48
  st.session_state.processing_results = None
49
 
50
  if 'answering_rag' not in st.session_state:
51
  st.session_state.answering_rag = None
52
 
53
 
54
+ st.title("πŸ“„ Multimodal RAG LLM System (PDF Parsing)")
 
 
55
 
 
 
 
 
 
 
 
 
56
 
57
 
 
 
 
58
 
59
  with st.sidebar:
60
  st.header("βš™οΈ Configuration")
 
72
  st.session_state.api_key = api_key
73
  st.session_state.api_key_set = True
74
 
 
75
  if st.session_state.visual_rag_system is None:
76
  try:
77
+ st.session_state.visual_rag_system = VisualMultimodalRAG(api_key=api_key, debug=True)
78
  st.session_state.vector_store = VectorStore()
79
  st.session_state.parser = PDFParser(debug=True)
80
  st.success("βœ… API Key set & systems initialized")
 
86
 
87
  st.divider()
88
 
 
89
  st.subheader("πŸ“Š Vector Store Status")
90
  if st.session_state.vector_store:
91
  try:
 
100
 
101
  st.divider()
102
 
 
103
  st.subheader("πŸ“ Document Management")
104
  if st.button("πŸ”„ Clear Vector Store"):
105
  if st.session_state.vector_store:
 
110
  st.error(f"Error clearing store: {e}")
111
 
112
 
 
 
 
113
 
 
114
  st.header("πŸ“€ Upload PDF Document")
115
 
116
  uploaded_file = st.file_uploader(
 
120
  )
121
 
122
  if uploaded_file is not None:
 
123
  upload_path = Path(UPLOAD_FOLDER)
124
  upload_path.mkdir(exist_ok=True)
125
 
 
160
  st.metric("πŸ“‹ Tables", len(tables))
161
 
162
  # Show image OCR details
163
+ #if images:
164
+ # st.subheader("πŸ–ΌοΈ Extracted Images")
165
+ # for idx, img in enumerate(images):
166
+ # ocr_text = img.get('ocr_text', '')
167
+ # ocr_len = len(ocr_text)
168
+ #
169
+ # if ocr_len > 0:
170
+ # st.success(f"βœ… Image {idx}: {ocr_len} characters (OCR)")
171
+ # else:
172
+ # st.warning(f"⚠️ Image {idx}: No OCR text (will use visual analysis)")
173
 
174
  st.success("βœ… PDF parsing complete!")
175
 
 
178
  print(f"Error: {e}")
179
 
180
 
 
 
 
181
 
182
  st.divider()
183
+ st.header("πŸ–ΌοΈ Analysis & Storage")
184
 
 
 
 
 
 
 
 
185
 
186
+ if st.button("πŸ–ΌοΈ Analyze & Store Components"):
187
  if not st.session_state.api_key_set:
188
  st.error("❌ Please set OpenAI API key first")
189
  elif st.session_state.current_text is None:
190
  st.error("❌ Please parse a PDF document first")
191
  else:
192
  try:
193
+ with st.spinner("πŸ–ΌοΈ Analyzing..."):
194
  print(f"\n{'='*70}")
195
  print(f"VISUAL IMAGE ANALYSIS")
196
  print(f"{'='*70}")
197
 
 
198
  visual_rag = st.session_state.visual_rag_system
199
  vector_store = st.session_state.vector_store
200
 
201
  results = visual_rag.process_and_store_document(
202
  text=st.session_state.current_text,
203
+ images=st.session_state.current_images,
204
  tables=st.session_state.current_tables,
205
  vector_store=vector_store,
206
  doc_id=st.session_state.current_document or "current_doc"
 
222
  st.metric("πŸ“Š Total Stored in Vector", results['total_stored'])
223
 
224
  # Show image visual analyses
225
+ #if results['image_visual_analyses']:
226
+ # st.subheader("πŸ–ΌοΈ Visual Image Analyses (gpt-4o)")
227
+ # for img_analysis in results['image_visual_analyses']:
228
+ # with st.expander(f"Image {img_analysis['image_index']} - Visual Analysis"):
229
+ # st.write("**Visual Analysis by gpt-4o:**")
230
+ # st.write(img_analysis['visual_analysis'])
231
+ #
232
+ # st.write("**Image Path:**")
233
+ # st.code(img_analysis['image_path'])
234
+ #
235
+ # if img_analysis['ocr_text']:
236
+ # st.write("**OCR Text (backup):**")
237
+ # st.text(img_analysis['ocr_text'][:500])
238
 
239
  # Show text chunk summaries
240
+ #if results['text_summaries']:
241
+ # st.subheader("πŸ“ Text Chunk Summaries")
242
+ # for chunk_summary in results['text_summaries']:
243
+ # with st.expander(
244
+ # f"Chunk {chunk_summary['chunk_index']} "
245
+ # f"({chunk_summary['chunk_length']} chars)"
246
+ # ):
247
+ # st.write("**Summary:**")
248
+ # st.write(chunk_summary['summary'])
249
+ # st.write("**Original Text (first 500 chars):**")
250
+ # st.text(chunk_summary['original_text'])
251
 
252
  # Show table analyses
253
+ #if results['table_summaries']:
254
+ # st.subheader("πŸ“‹ Table Analyses")
255
+ # for table_summary in results['table_summaries']:
256
+ # with st.expander(
257
+ # f"Table {table_summary['table_index']} "
258
+ # f"({table_summary['table_length']} chars)"
259
+ # ):
260
+ # st.write("**Analysis:**")
261
+ # st.write(table_summary['summary'])
262
+ # st.write("**Original Content (first 500 chars):**")
263
+ # st.text(table_summary['original_content'])
264
 
265
+ print(f"\nβœ… Analysis processing complete!")
266
 
267
  except Exception as e:
268
+ st.error(f"❌ Error during analysis: {e}")
269
  print(f"Error: {e}")
270
 
271
 
 
 
 
 
272
  st.divider()
273
  st.header("❓ Ask Questions About Document")
274
 
 
275
  if 'answering_rag' not in st.session_state:
276
  st.session_state.answering_rag = None
277
 
 
278
  if st.session_state.api_key_set and st.session_state.answering_rag is None:
279
  from rag_system import AnsweringRAG
280
  st.session_state.answering_rag = AnsweringRAG(api_key=st.session_state.api_key, debug=True)
 
299
  print(f"QUESTION: {question}")
300
  print(f"{'='*70}")
301
 
 
302
  store = st.session_state.vector_store
303
 
 
304
  doc_name = st.session_state.current_document or "current_doc"
305
  doc_data = {
306
  'text': st.session_state.current_text,
 
309
  }
310
  store.add_documents(doc_data, doc_name)
311
 
 
312
  search_results = store.search(question, n_results=5)
313
 
314
  print(f"\nπŸ“Š Search Results Found: {len(search_results)}")
315
 
 
316
  answering_rag = st.session_state.answering_rag
317
  result = answering_rag.analyze_and_answer(question, search_results)
318
 
 
319
  st.success("βœ… Analysis complete!")
320
 
321
  st.subheader("πŸ“ Answer")
322
 
 
323
  col1, col2, col3 = st.columns(3)
324
  with col1:
325
  confidence_color = {
326
+ 'high': 'high',
327
+ 'medium': 'medium',
328
+ 'low': 'low'
329
  }.get(result['confidence'], 'βšͺ')
330
  st.metric("Confidence", f"{confidence_color} {result['confidence'].upper()}")
331
  with col2:
 
334
  if result['sources_used'] > 0:
335
  st.metric("Avg Relevance", f"{sum(1-r.get('distance',0) for r in search_results)/len(search_results):.0%}")
336
 
 
337
  st.write(result['answer'])
338
 
339
  # Show sources
 
356
  print(f"Error: {e}")
357
 
358
 
 
 
 
 
359
  st.divider()
360
 
361
+ #col1, col2, col3 = st.columns(3)
362
 
363
+ #with col1:
364
+ # st.info("πŸ“– **Text Processing**: PyPDF2 extraction with UTF-8 support")
365
 
366
+ #with col2:
367
+ # st.info("πŸ–ΌοΈ **Visual Analysis**: GPT-4o vision for image understanding")
368
 
369
+ #with col3:
370
+ # st.info("πŸ“Š **Vector Storage**: ChromaDB with auto-persist")
371
 
372
+ #st.caption(
373
+ # "Multimodal RAG System | "
374
+ # "Visual Image Analysis | "
375
+ # "Russian Language Support | "
376
+ # "Individual Component Summarization"
377
+ #)