final_project2

Sleeping

App Files Files Community

dnj0 commited on Nov 19, 2025

Commit

f482e1d

verified ·

1 Parent(s): a6680e7

Update src/app.py

Browse files

Files changed (1) hide show

src/app.py +74 -135

src/app.py CHANGED Viewed

@@ -1,33 +1,21 @@
-"""
-Multimodal RAG LLM System - Streamlit App
-Complete working version with VISUAL image analysis using gpt-4o
-"""
 import streamlit as st
 import os
 from pathlib import Path
-# Import optimized versions
 from pdf_parser import PDFParser
 from vector_store import VectorStore
-from rag_system import VisualMultimodalRAG  # NEW - Vision model
 from config import UPLOAD_FOLDER, MAX_PDF_SIZE_MB
-# ============================================================================
-# PAGE CONFIGURATION
-# ============================================================================
 st.set_page_config(
-    page_title="📄 Multimodal RAG LLM System",
-    page_icon="🤖",
     layout="wide",
     initial_sidebar_state="expanded"
 )
-# ============================================================================
-# SESSION STATE INITIALIZATION
-# ============================================================================
 if 'api_key_set' not in st.session_state:
     st.session_state.api_key_set = False
@@ -35,7 +23,7 @@ if 'api_key_set' not in st.session_state:
 if 'api_key' not in st.session_state:
     st.session_state.api_key = None
-if 'visual_rag_system' not in st.session_state:  # NEW - Vision model
     st.session_state.visual_rag_system = None
 if 'vector_store' not in st.session_state:
@@ -56,30 +44,17 @@ if 'current_images' not in st.session_state:
 if 'current_tables' not in st.session_state:
     st.session_state.current_tables = None
-if 'processing_results' not in st.session_state:  # NEW
     st.session_state.processing_results = None
 if 'answering_rag' not in st.session_state:
     st.session_state.answering_rag = None
-# ============================================================================
-# MAIN HEADER
-# ============================================================================
-st.title("📄 Multimodal RAG LLM System")
-st.markdown("""
-Process PDF documents with visual image analysis:
-- **PDF Parser** with OCR for Russian & English
-- **Visual Analysis** (gpt-4o) for image understanding
-- **Vector Store** (ChromaDB) for semantic search
-- **Individual Component** summarization and storage
-""")
-# ============================================================================
-# SIDEBAR - CONFIGURATION
-# ============================================================================
 with st.sidebar:
     st.header("⚙️ Configuration")
@@ -97,10 +72,9 @@ with st.sidebar:
         st.session_state.api_key = api_key
         st.session_state.api_key_set = True
-        # Initialize RAG systems if not already done
         if st.session_state.visual_rag_system is None:
             try:
-                st.session_state.visual_rag_system = VisualMultimodalRAG(api_key=api_key, debug=True)  # NEW
                 st.session_state.vector_store = VectorStore()
                 st.session_state.parser = PDFParser(debug=True)
                 st.success("✅ API Key set & systems initialized")
@@ -112,7 +86,6 @@ with st.sidebar:
     st.divider()
-    # Vector Store Status
     st.subheader("📊 Vector Store Status")
     if st.session_state.vector_store:
         try:
@@ -127,7 +100,6 @@ with st.sidebar:
     st.divider()
-    # Document Management
     st.subheader("📁 Document Management")
     if st.button("🔄 Clear Vector Store"):
         if st.session_state.vector_store:
@@ -138,11 +110,7 @@ with st.sidebar:
                 st.error(f"Error clearing store: {e}")
-# ============================================================================
-# MAIN CONTENT
-# ============================================================================
-# Upload Section
 st.header("📤 Upload PDF Document")
 uploaded_file = st.file_uploader(
@@ -152,7 +120,6 @@ uploaded_file = st.file_uploader(
 )
 if uploaded_file is not None:
-    # Save uploaded file
     upload_path = Path(UPLOAD_FOLDER)
     upload_path.mkdir(exist_ok=True)
@@ -193,16 +160,16 @@ if uploaded_file is not None:
                         st.metric("📋 Tables", len(tables))
                     # Show image OCR details
-                    if images:
-                        st.subheader("🖼️ Extracted Images")
-                        for idx, img in enumerate(images):
-                            ocr_text = img.get('ocr_text', '')
-                            ocr_len = len(ocr_text)
-                            if ocr_len > 0:
-                                st.success(f"✅ Image {idx}: {ocr_len} characters (OCR)")
-                            else:
-                                st.warning(f"⚠️ Image {idx}: No OCR text (will use visual analysis)")
                     st.success("✅ PDF parsing complete!")
@@ -211,40 +178,29 @@ if uploaded_file is not None:
                 print(f"Error: {e}")
-# ============================================================================
-# VISUAL IMAGE ANALYSIS & COMPONENT STORAGE
-# ============================================================================
 st.divider()
-st.header("🖼️ Visual Analysis & Storage")
-st.info("""
-**How it works:**
-1. Images are sent to gpt-4o for visual analysis (not just text OCR)
-2. Text is split into chunks and each chunk is summarized
-3. Tables are analyzed individually
-4. ALL summaries are stored in the vector store for semantic search
-""")
-if st.button("🖼️ Analyze Images Visually & Store Components"):
     if not st.session_state.api_key_set:
         st.error("❌ Please set OpenAI API key first")
     elif st.session_state.current_text is None:
         st.error("❌ Please parse a PDF document first")
     else:
         try:
-            with st.spinner("🖼️ Analyzing images visually with gpt-4o..."):
                 print(f"\n{'='*70}")
                 print(f"VISUAL IMAGE ANALYSIS")
                 print(f"{'='*70}")
-                # Process with visual analysis
                 visual_rag = st.session_state.visual_rag_system
                 vector_store = st.session_state.vector_store
                 results = visual_rag.process_and_store_document(
                     text=st.session_state.current_text,
-                    images=st.session_state.current_images,    # Actual images sent to gpt-4o
                     tables=st.session_state.current_tables,
                     vector_store=vector_store,
                     doc_id=st.session_state.current_document or "current_doc"
@@ -266,65 +222,59 @@ if st.button("🖼️ Analyze Images Visually & Store Components"):
                 st.metric("📊 Total Stored in Vector", results['total_stored'])
                 # Show image visual analyses
-                if results['image_visual_analyses']:
-                    st.subheader("🖼️ Visual Image Analyses (gpt-4o)")
-                    for img_analysis in results['image_visual_analyses']:
-                        with st.expander(f"Image {img_analysis['image_index']} - Visual Analysis"):
-                            st.write("**Visual Analysis by gpt-4o:**")
-                            st.write(img_analysis['visual_analysis'])
-                            st.write("**Image Path:**")
-                            st.code(img_analysis['image_path'])
-                            if img_analysis['ocr_text']:
-                                st.write("**OCR Text (backup):**")
-                                st.text(img_analysis['ocr_text'][:500])
                 # Show text chunk summaries
-                if results['text_summaries']:
-                    st.subheader("📝 Text Chunk Summaries")
-                    for chunk_summary in results['text_summaries']:
-                        with st.expander(
-                            f"Chunk {chunk_summary['chunk_index']} "
-                            f"({chunk_summary['chunk_length']} chars)"
-                        ):
-                            st.write("**Summary:**")
-                            st.write(chunk_summary['summary'])
-                            st.write("**Original Text (first 500 chars):**")
-                            st.text(chunk_summary['original_text'])
                 # Show table analyses
-                if results['table_summaries']:
-                    st.subheader("📋 Table Analyses")
-                    for table_summary in results['table_summaries']:
-                        with st.expander(
-                            f"Table {table_summary['table_index']} "
-                            f"({table_summary['table_length']} chars)"
-                        ):
-                            st.write("**Analysis:**")
-                            st.write(table_summary['summary'])
-                            st.write("**Original Content (first 500 chars):**")
-                            st.text(table_summary['original_content'])
-                print(f"\n✅ Visual analysis processing complete!")
         except Exception as e:
-            st.error(f"❌ Error during visual analysis: {e}")
             print(f"Error: {e}")
-# ============================================================================
-# QUESTION & ANSWERING
-# ============================================================================
 st.divider()
 st.header("❓ Ask Questions About Document")
-# Initialize answering system if not done
 if 'answering_rag' not in st.session_state:
     st.session_state.answering_rag = None
-# Create answering system when API key is set
 if st.session_state.api_key_set and st.session_state.answering_rag is None:
     from rag_system import AnsweringRAG
     st.session_state.answering_rag = AnsweringRAG(api_key=st.session_state.api_key, debug=True)
@@ -349,10 +299,8 @@ if st.button("🔍 Search & Generate Answer"):
                 print(f"QUESTION: {question}")
                 print(f"{'='*70}")
-                # Search vector store
                 store = st.session_state.vector_store
-                # Add documents to store if needed
                 doc_name = st.session_state.current_document or "current_doc"
                 doc_data = {
                     'text': st.session_state.current_text,
@@ -361,27 +309,23 @@ if st.button("🔍 Search & Generate Answer"):
                 }
                 store.add_documents(doc_data, doc_name)
-                # Search for relevant results
                 search_results = store.search(question, n_results=5)
                 print(f"\n📊 Search Results Found: {len(search_results)}")
-                # Analyze results and generate answer
                 answering_rag = st.session_state.answering_rag
                 result = answering_rag.analyze_and_answer(question, search_results)
-                # Display answer prominently
                 st.success("✅ Analysis complete!")
                 st.subheader("📝 Answer")
-                # Show confidence level
                 col1, col2, col3 = st.columns(3)
                 with col1:
                     confidence_color = {
-                        'high': '🟢',
-                        'medium': '🟡',
-                        'low': '🔴'
                     }.get(result['confidence'], '⚪')
                     st.metric("Confidence", f"{confidence_color} {result['confidence'].upper()}")
                 with col2:
@@ -390,7 +334,6 @@ if st.button("🔍 Search & Generate Answer"):
                     if result['sources_used'] > 0:
                         st.metric("Avg Relevance", f"{sum(1-r.get('distance',0) for r in search_results)/len(search_results):.0%}")
-                # Display the generated answer
                 st.write(result['answer'])
                 # Show sources
@@ -413,26 +356,22 @@ if st.button("🔍 Search & Generate Answer"):
             print(f"Error: {e}")
-# ============================================================================
-# FOOTER
-# ============================================================================
 st.divider()
-col1, col2, col3 = st.columns(3)
-with col1:
-    st.info("📖 **Text Processing**: PyPDF2 extraction with UTF-8 support")
-with col2:
-    st.info("🖼️ **Visual Analysis**: GPT-4o vision for image understanding")
-with col3:
-    st.info("📊 **Vector Storage**: ChromaDB with auto-persist")
-st.caption(
-    "Multimodal RAG System | "
-    "Visual Image Analysis | "
-    "Russian Language Support | "
-    "Individual Component Summarization"
-)

 import streamlit as st
 import os
 from pathlib import Path
 from pdf_parser import PDFParser
 from vector_store import VectorStore
+from rag_system import VisualMultimodalRAG
 from config import UPLOAD_FOLDER, MAX_PDF_SIZE_MB
 st.set_page_config(
+    page_title="📄 Multimodal RAG LLM System (PDF Parsing)",
     layout="wide",
     initial_sidebar_state="expanded"
 )
 if 'api_key_set' not in st.session_state:
     st.session_state.api_key_set = False
 if 'api_key' not in st.session_state:
     st.session_state.api_key = None
+if 'visual_rag_system' not in st.session_state:
     st.session_state.visual_rag_system = None
 if 'vector_store' not in st.session_state:
 if 'current_tables' not in st.session_state:
     st.session_state.current_tables = None
+if 'processing_results' not in st.session_state:
     st.session_state.processing_results = None
 if 'answering_rag' not in st.session_state:
     st.session_state.answering_rag = None
+st.title("📄 Multimodal RAG LLM System (PDF Parsing)")
 with st.sidebar:
     st.header("⚙️ Configuration")
         st.session_state.api_key = api_key
         st.session_state.api_key_set = True
         if st.session_state.visual_rag_system is None:
             try:
+                st.session_state.visual_rag_system = VisualMultimodalRAG(api_key=api_key, debug=True)
                 st.session_state.vector_store = VectorStore()
                 st.session_state.parser = PDFParser(debug=True)
                 st.success("✅ API Key set & systems initialized")
     st.divider()
     st.subheader("📊 Vector Store Status")
     if st.session_state.vector_store:
         try:
     st.divider()
     st.subheader("📁 Document Management")
     if st.button("🔄 Clear Vector Store"):
         if st.session_state.vector_store:
                 st.error(f"Error clearing store: {e}")
 st.header("📤 Upload PDF Document")
 uploaded_file = st.file_uploader(
 )
 if uploaded_file is not None:
     upload_path = Path(UPLOAD_FOLDER)
     upload_path.mkdir(exist_ok=True)
                         st.metric("📋 Tables", len(tables))
                     # Show image OCR details
+                    #if images:
+                    #    st.subheader("🖼️ Extracted Images")
+                    #    for idx, img in enumerate(images):
+                    #        ocr_text = img.get('ocr_text', '')
+                    #        ocr_len = len(ocr_text)
+                    #
+                    #        if ocr_len > 0:
+                    #            st.success(f"✅ Image {idx}: {ocr_len} characters (OCR)")
+                    #        else:
+                    #            st.warning(f"⚠️ Image {idx}: No OCR text (will use visual analysis)")
                     st.success("✅ PDF parsing complete!")
                 print(f"Error: {e}")
 st.divider()
+st.header("🖼️ Analysis & Storage")
+if st.button("🖼️ Analyze & Store Components"):
     if not st.session_state.api_key_set:
         st.error("❌ Please set OpenAI API key first")
     elif st.session_state.current_text is None:
         st.error("❌ Please parse a PDF document first")
     else:
         try:
+            with st.spinner("🖼️ Analyzing..."):
                 print(f"\n{'='*70}")
                 print(f"VISUAL IMAGE ANALYSIS")
                 print(f"{'='*70}")
                 visual_rag = st.session_state.visual_rag_system
                 vector_store = st.session_state.vector_store
                 results = visual_rag.process_and_store_document(
                     text=st.session_state.current_text,
+                    images=st.session_state.current_images,
                     tables=st.session_state.current_tables,
                     vector_store=vector_store,
                     doc_id=st.session_state.current_document or "current_doc"
                 st.metric("📊 Total Stored in Vector", results['total_stored'])
                 # Show image visual analyses
+                #if results['image_visual_analyses']:
+                #    st.subheader("🖼️ Visual Image Analyses (gpt-4o)")
+                #    for img_analysis in results['image_visual_analyses']:
+                #        with st.expander(f"Image {img_analysis['image_index']} - Visual Analysis"):
+                #            st.write("**Visual Analysis by gpt-4o:**")
+                #            st.write(img_analysis['visual_analysis'])
+                #
+                #            st.write("**Image Path:**")
+                #            st.code(img_analysis['image_path'])
+                #
+                #            if img_analysis['ocr_text']:
+                #                st.write("**OCR Text (backup):**")
+                #                st.text(img_analysis['ocr_text'][:500])
                 # Show text chunk summaries
+                #if results['text_summaries']:
+                #    st.subheader("📝 Text Chunk Summaries")
+                #    for chunk_summary in results['text_summaries']:
+                #        with st.expander(
+                #            f"Chunk {chunk_summary['chunk_index']} "
+                #            f"({chunk_summary['chunk_length']} chars)"
+                #        ):
+                #            st.write("**Summary:**")
+                #            st.write(chunk_summary['summary'])
+                #            st.write("**Original Text (first 500 chars):**")
+                #            st.text(chunk_summary['original_text'])
                 # Show table analyses
+                #if results['table_summaries']:
+                #    st.subheader("📋 Table Analyses")
+                #    for table_summary in results['table_summaries']:
+                #        with st.expander(
+                #            f"Table {table_summary['table_index']} "
+                #            f"({table_summary['table_length']} chars)"
+                #        ):
+                #            st.write("**Analysis:**")
+                #            st.write(table_summary['summary'])
+                #            st.write("**Original Content (first 500 chars):**")
+                #            st.text(table_summary['original_content'])
+                print(f"\n✅ Analysis processing complete!")
         except Exception as e:
+            st.error(f"❌ Error during analysis: {e}")
             print(f"Error: {e}")
 st.divider()
 st.header("❓ Ask Questions About Document")
 if 'answering_rag' not in st.session_state:
     st.session_state.answering_rag = None
 if st.session_state.api_key_set and st.session_state.answering_rag is None:
     from rag_system import AnsweringRAG
     st.session_state.answering_rag = AnsweringRAG(api_key=st.session_state.api_key, debug=True)
                 print(f"QUESTION: {question}")
                 print(f"{'='*70}")
                 store = st.session_state.vector_store
                 doc_name = st.session_state.current_document or "current_doc"
                 doc_data = {
                     'text': st.session_state.current_text,
                 }
                 store.add_documents(doc_data, doc_name)
                 search_results = store.search(question, n_results=5)
                 print(f"\n📊 Search Results Found: {len(search_results)}")
                 answering_rag = st.session_state.answering_rag
                 result = answering_rag.analyze_and_answer(question, search_results)
                 st.success("✅ Analysis complete!")
                 st.subheader("📝 Answer")
                 col1, col2, col3 = st.columns(3)
                 with col1:
                     confidence_color = {
+                        'high': 'high',
+                        'medium': 'medium',
+                        'low': 'low'
                     }.get(result['confidence'], '⚪')
                     st.metric("Confidence", f"{confidence_color} {result['confidence'].upper()}")
                 with col2:
                     if result['sources_used'] > 0:
                         st.metric("Avg Relevance", f"{sum(1-r.get('distance',0) for r in search_results)/len(search_results):.0%}")
                 st.write(result['answer'])
                 # Show sources
             print(f"Error: {e}")
 st.divider()
+#col1, col2, col3 = st.columns(3)
+#with col1:
+#    st.info("📖 **Text Processing**: PyPDF2 extraction with UTF-8 support")
+#with col2:
+#    st.info("🖼️ **Visual Analysis**: GPT-4o vision for image understanding")
+#with col3:
+#    st.info("📊 **Vector Storage**: ChromaDB with auto-persist")
+#st.caption(
+#    "Multimodal RAG System | "
+#    "Visual Image Analysis | "
+#    "Russian Language Support | "
+#    "Individual Component Summarization"
+#)