Spaces:

Gowthamvemula
/

ITC_Financial_Assistant

Sleeping

App Files Files Community

Gowthamvemula commited on May 8, 2025

Commit

009a93d

verified ·

1 Parent(s): a866ebf

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +32 -33

src/streamlit_app.py CHANGED Viewed

@@ -14,7 +14,7 @@ from langchain.docstore.document import Document
 @st.cache_resource
 def load_models():
     llm = Ollama(model="llama3")
-    sentence_transformer = SentenceTransformer('all-MiniLM-L6-v2')
     return llm, sentence_transformer
 llm, sentence_transformer = load_models()
@@ -55,31 +55,31 @@ def init_database():
     conn.commit()
     conn.close()
-# Process uploaded files and text
 @st.cache_resource
 def process_documents(_uploaded_files, manual_text=""):
     init_database()
     conn = sqlite3.connect('itc_finance.db')
     cursor = conn.cursor()
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=1000,
         chunk_overlap=200
     )
     chroma_db = Chroma(
         embedding_function=sentence_transformer_embedding,
         persist_directory="./chroma_db"
     )
     documents = []
     # Process uploaded files
     for uploaded_file in _uploaded_files:
         file_path = f"./temp_{uploaded_file.name}"
         with open(file_path, "wb") as f:
             f.write(uploaded_file.getbuffer())
         if uploaded_file.name.endswith('.pdf'):
             loader = PyPDFLoader(file_path)
             pages = loader.load_and_split()
@@ -87,7 +87,7 @@ def process_documents(_uploaded_files, manual_text=""):
             with open(file_path, 'r') as f:
                 text = f.read()
             pages = [Document(page_content=text)]
         for page in pages:
             chunks = text_splitter.split_text(page.page_content)
             for chunk in chunks:
@@ -96,20 +96,19 @@ def process_documents(_uploaded_files, manual_text=""):
                     (uploaded_file.name, chunk)
                 )
                 doc_id = cursor.lastrowid
                 chroma_db.add_texts(
                     texts=[chunk],
                     metadatas=[{"source": uploaded_file.name, "sql_id": doc_id}]
                 )
                 cursor.execute(
                     "UPDATE documents SET embedding_id = ? WHERE id = ?",
                     (str(doc_id), doc_id)
-                )
         os.remove(file_path)
         documents.append(uploaded_file.name)
     # Process manual text
     if manual_text:
         chunks = text_splitter.split_text(manual_text)
@@ -119,18 +118,18 @@ def process_documents(_uploaded_files, manual_text=""):
                 ("Manual Input", chunk)
             )
             doc_id = cursor.lastrowid
             chroma_db.add_texts(
                 texts=[chunk],
                 metadatas=[{"source": "Manual Input", "sql_id": doc_id}]
             )
             cursor.execute(
                 "UPDATE documents SET embedding_id = ? WHERE id = ?",
                 (str(doc_id), doc_id)
             )
         documents.append("Manual Input")
     conn.commit()
     conn.close()
     return chroma_db, documents
@@ -143,20 +142,20 @@ def get_query_engine(chroma_db):
     Use only the provided context to answer.
     Cite sources like: [Source: {source}, page X]
     <</SYS>>
     Context: {context}
     Question: {question}[/INST]
     """)
     def format_docs(docs):
         return "\n\n".join(
             f"Document Excerpt: {doc.page_content}\nSource: {doc.metadata['source']}"
             for doc in docs
         )
     retriever = chroma_db.as_retriever(search_kwargs={"k": 3})
     return (
         {
             "context": retriever | format_docs,
@@ -171,21 +170,21 @@ def get_query_engine(chroma_db):
 if uploaded_files or manual_text:
     with st.spinner("Processing documents..."):
         chroma_db, processed_docs = process_documents(uploaded_files, manual_text)
     st.success(f"Processed {len(processed_docs)} documents")
     query_engine = get_query_engine(chroma_db)
     # Query interface
     st.divider()
     question = st.text_input("Ask about ITC's finances:", placeholder="E.g. What was the revenue growth in 2023?")
     if question:
         with st.spinner("Analyzing..."):
             answer = query_engine.invoke({"question": question})
         st.subheader("Analysis Result")
         st.markdown(answer)
         with st.expander("View source documents"):
             st.write(chroma_db.similarity_search(question))
 else:
@@ -195,17 +194,17 @@ else:
 with st.sidebar:
     st.markdown("## How to Use")
     st.markdown("""
-    1. Upload PDF reports/presentations
-    2. Or paste financial text
     3. Ask questions about the data
     """)
     st.markdown("## Sample Questions")
     st.markdown("""
-    - What was ITC's net profit in 2023?
-    - Compare revenue between 2022–2024
     - Show me key financial ratios
     """)
     st.markdown("## System Info")
-    st.code("Using: Llama 3 (local)\nEmbeddings: all-MiniLM-L6-v2")

 @st.cache_resource
 def load_models():
     llm = Ollama(model="llama3")
+    sentence_transformer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
     return llm, sentence_transformer
 llm, sentence_transformer = load_models()
     conn.commit()
     conn.close()
+# Process uploaded files
 @st.cache_resource
 def process_documents(_uploaded_files, manual_text=""):
     init_database()
     conn = sqlite3.connect('itc_finance.db')
     cursor = conn.cursor()
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=1000,
         chunk_overlap=200
     )
     chroma_db = Chroma(
         embedding_function=sentence_transformer_embedding,
         persist_directory="./chroma_db"
     )
     documents = []
     # Process uploaded files
     for uploaded_file in _uploaded_files:
         file_path = f"./temp_{uploaded_file.name}"
         with open(file_path, "wb") as f:
             f.write(uploaded_file.getbuffer())
         if uploaded_file.name.endswith('.pdf'):
             loader = PyPDFLoader(file_path)
             pages = loader.load_and_split()
             with open(file_path, 'r') as f:
                 text = f.read()
             pages = [Document(page_content=text)]
         for page in pages:
             chunks = text_splitter.split_text(page.page_content)
             for chunk in chunks:
                     (uploaded_file.name, chunk)
                 )
                 doc_id = cursor.lastrowid
                 chroma_db.add_texts(
                     texts=[chunk],
                     metadatas=[{"source": uploaded_file.name, "sql_id": doc_id}]
                 )
                 cursor.execute(
                     "UPDATE documents SET embedding_id = ? WHERE id = ?",
                     (str(doc_id), doc_id)
         os.remove(file_path)
         documents.append(uploaded_file.name)
     # Process manual text
     if manual_text:
         chunks = text_splitter.split_text(manual_text)
                 ("Manual Input", chunk)
             )
             doc_id = cursor.lastrowid
             chroma_db.add_texts(
                 texts=[chunk],
                 metadatas=[{"source": "Manual Input", "sql_id": doc_id}]
             )
             cursor.execute(
                 "UPDATE documents SET embedding_id = ? WHERE id = ?",
                 (str(doc_id), doc_id)
             )
         documents.append("Manual Input")
     conn.commit()
     conn.close()
     return chroma_db, documents
     Use only the provided context to answer.
     Cite sources like: [Source: {source}, page X]
     <</SYS>>
     Context: {context}
     Question: {question}[/INST]
     """)
     def format_docs(docs):
         return "\n\n".join(
             f"Document Excerpt: {doc.page_content}\nSource: {doc.metadata['source']}"
             for doc in docs
         )
     retriever = chroma_db.as_retriever(search_kwargs={"k": 3})
     return (
         {
             "context": retriever | format_docs,
 if uploaded_files or manual_text:
     with st.spinner("Processing documents..."):
         chroma_db, processed_docs = process_documents(uploaded_files, manual_text)
     st.success(f"Processed {len(processed_docs)} documents")
     query_engine = get_query_engine(chroma_db)
     # Query interface
     st.divider()
     question = st.text_input("Ask about ITC's finances:", placeholder="E.g. What was the revenue growth in 2023?")
     if question:
         with st.spinner("Analyzing..."):
             answer = query_engine.invoke({"question": question})
         st.subheader("Analysis Result")
         st.markdown(answer)
         with st.expander("View source documents"):
             st.write(chroma_db.similarity_search(question))
 else:
 with st.sidebar:
     st.markdown("## How to Use")
     st.markdown("""
+    1. Upload PDF reports/presentations
+    2. Or paste financial text
     3. Ask questions about the data
     """)
     st.markdown("## Sample Questions")
     st.markdown("""
+    - What was ITC's net profit in 2023?
+    - Compare revenue between 2022-2024
     - Show me key financial ratios
     """)
     st.markdown("## System Info")
+    st.code(f"Using: Llama 3 (local)\nEmbeddings: sentence-transformers/all-MiniLM-L6-v2")