Spaces:

sukhrobnurali
/

financial-document-analyzer

Running

sukhrobnurali commited on Nov 27, 2025

Commit

76cdde2

1 Parent(s): cdadb63

Bug fixes

Files changed (2) hide show

app.py CHANGED Viewed

@@ -120,6 +120,8 @@ def initialize_session_state():
         st.session_state.analysis_result = None
     if 'document_loaded' not in st.session_state:
         st.session_state.document_loaded = False
 def display_criteria_rules(criteria):
@@ -281,13 +283,23 @@ def main():
         )
         if uploaded_file is not None:
             # Save to temp file
             with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
                 tmp_file.write(uploaded_file.getvalue())
                 tmp_path = tmp_file.name
-            # Load document
-            if not st.session_state.document_loaded or st.session_state.processor is None:
                 with st.spinner("Loading and indexing document..."):
                     try:
                         processor = InvestmentDocumentProcessor(api_key)

         st.session_state.analysis_result = None
     if 'document_loaded' not in st.session_state:
         st.session_state.document_loaded = False
+    if 'current_file_name' not in st.session_state:
+        st.session_state.current_file_name = None
 def display_criteria_rules(criteria):
         )
         if uploaded_file is not None:
+            # Check if file has changed
+            file_changed = (uploaded_file.name != st.session_state.current_file_name)
+            if file_changed:
+                # Reset session state for new file
+                st.session_state.current_file_name = uploaded_file.name
+                st.session_state.document_loaded = False
+                st.session_state.analysis_result = None
+                st.session_state.processor = None
             # Save to temp file
             with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
                 tmp_file.write(uploaded_file.getvalue())
                 tmp_path = tmp_file.name
+            # Load document if not already loaded
+            if not st.session_state.document_loaded:
                 with st.spinner("Loading and indexing document..."):
                     try:
                         processor = InvestmentDocumentProcessor(api_key)

document_processor.py CHANGED Viewed

@@ -28,9 +28,10 @@ class InvestmentDocumentProcessor:
         Settings.llm = self.llm
         # Node parser to chunk documents while preserving metadata
         self.node_parser = SimpleNodeParser.from_defaults(
-            chunk_size=1024,
-            chunk_overlap=200
         )
         self.index = None
@@ -69,8 +70,8 @@ class InvestmentDocumentProcessor:
         # Create query engine with citation tracking
         query_engine = self.index.as_query_engine(
-            similarity_top_k=10,  # Get more context
-            response_mode="tree_summarize"
         )
         # Query with the criteria prompt
@@ -107,9 +108,9 @@ class InvestmentDocumentProcessor:
                 "index": idx + 1,
                 "page": page,
                 "score": node.score,
-                "text_preview": node.node.text[:200] + "..." if len(node.node.text) > 200 else node.node.text,
                 "full_text": node.node.text,
-                "is_truncated": len(node.node.text) > 200,
                 "file_name": node.node.metadata.get('file_name', 'Unknown')
             }
             citations.append(citation)

         Settings.llm = self.llm
         # Node parser to chunk documents while preserving metadata
+        # Larger chunks to capture complete financial statements/tables
         self.node_parser = SimpleNodeParser.from_defaults(
+            chunk_size=2048,
+            chunk_overlap=400
         )
         self.index = None
         # Create query engine with citation tracking
         query_engine = self.index.as_query_engine(
+            similarity_top_k=20,  # Increased to get more diverse context
+            response_mode="compact"  # More focused on relevant chunks
         )
         # Query with the criteria prompt
                 "index": idx + 1,
                 "page": page,
                 "score": node.score,
+                "text_preview": node.node.text[:350] + "..." if len(node.node.text) > 350 else node.node.text,
                 "full_text": node.node.text,
+                "is_truncated": len(node.node.text) > 350,
                 "file_name": node.node.metadata.get('file_name', 'Unknown')
             }
             citations.append(citation)