Spaces:

Vikrant26
/

Finance_Bot

Sleeping

App Files Files Community

Vikrant26 commited on Jan 18, 2025

Commit

65cdc34

verified ·

1 Parent(s): 8ddef96

Upload 6 files

Browse files

Files changed (6) hide show

.env +6 -0
.gitignore +2 -0
PL_image-removebg-preview.png +0 -0
app.py +108 -0
rag.py +123 -0
requirements.txt +11 -0

.env ADDED Viewed

	@@ -0,0 +1,6 @@

+GOOGLE_API_KEY="AIzaSyA6pBfBHg3zK_3JtB6fRoYUcG4589RjSjg"
+PINECONE_API_KEY="pcsk_3oYE7o_3JP3Y1f9zveyQYJxUy4WGwZy4TKqCWyemLAqUeCqpM6UPK8Ne1Bx2KGCkmDS3eq"
+PINECONE_ENV="us-west1-gcp-free"
+# Optional: ChromaDB Settings
+CHROMA_DB_IMPL=duckdb+parquet
+PERSIST_DIRECTORY=db

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ myenv
2	+ .env

PL_image-removebg-preview.png ADDED Viewed

app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import streamlit as st
+from rag import RAGProcessor
+import os
+from dotenv import load_dotenv
+import tempfile
+# Load environment variables
+load_dotenv()
+# Check for API key
+if not os.getenv('GOOGLE_API_KEY'):
+    st.error("Please set the GOOGLE_API_KEY in your .env file.")
+    st.stop()
+def initialize_session_state():
+    """Initialize session state variables."""
+    if "rag_processor" not in st.session_state:
+        st.session_state.rag_processor = RAGProcessor()
+    if "vector_store" not in st.session_state:
+        st.session_state.vector_store = None
+def save_uploaded_files(uploaded_files):
+    """Save uploaded files to a temporary directory and return file paths."""
+    try:
+        temp_dir = tempfile.mkdtemp()
+        file_paths = []
+        for uploaded_file in uploaded_files:
+            file_path = os.path.join(temp_dir, uploaded_file.name)
+            with open(file_path, "wb") as f:
+                f.write(uploaded_file.getbuffer())
+            file_paths.append(file_path)
+        return file_paths
+    except Exception as e:
+        st.error(f"Error saving uploaded files: {e}")
+        return []
+def main():
+    st.set_page_config(
+        page_title="Finance Buddy",
+        page_icon="💰",
+        layout="wide"
+    )
+    initialize_session_state()
+    # Main header with emoji
+    st.markdown("<div class='main-header'>", unsafe_allow_html=True)
+    st.markdown(
+        "<h1 style='text-align: center;'>💰 Finance Buddy</h1>",
+        unsafe_allow_html=True
+    )
+    st.markdown("</div>", unsafe_allow_html=True)
+    # Sidebar
+    with st.sidebar:
+        st.image("PL_image-removebg-preview.png", use_column_width=True)
+        st.title("📄 Document Analysis")
+        uploaded_files = st.file_uploader(
+            "Upload P&L Documents (PDF)",
+            accept_multiple_files=True,
+            type=['pdf']
+        )
+        if uploaded_files and st.button("Process Documents", key="process_docs"):
+            with st.spinner("Processing documents..."):
+                try:
+                    # Save uploaded files and process them
+                    file_paths = save_uploaded_files(uploaded_files)
+                    if file_paths:
+                        st.session_state.vector_store = st.session_state.rag_processor.process_documents(file_paths)
+                        st.success("✅ Documents processed successfully!")
+                except Exception as e:
+                    st.error(f"Error processing documents: {e}")
+    # Main content
+    st.markdown("""
+    💡 **Ask questions about your P&L statements and financial data.**
+    """)
+    # Query input
+    query = st.text_input("🔍 Ask your question:", key="query")
+    if query:
+        if not st.session_state.vector_store:
+            st.warning("Please upload and process documents first!")
+        else:
+            with st.spinner("Analyzing..."):
+                try:
+                    response = st.session_state.rag_processor.generate_response(
+                        query,
+                        st.session_state.vector_store
+                    )
+                    st.markdown("### 📋 Response:")
+                    st.markdown(f">{response}")
+                except Exception as e:
+                    st.error(f"Error generating response: {e}")
+    # Footer
+    st.markdown("---")
+    st.markdown(
+        "<p style='text-align: center;'>💼 Built with Streamlit & Google Generative AI</p>",
+        unsafe_allow_html=True
+    )
+if __name__ == "__main__":
+    main()

rag.py ADDED Viewed

	@@ -0,0 +1,123 @@

+from typing import List
+import google.generativeai as genai
+from langchain.embeddings.base import Embeddings
+from langchain_community.vectorstores import FAISS
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from PyPDF2 import PdfReader
+import pandas as pd
+import os
+class CustomGoogleEmbeddings(Embeddings):
+    """Custom Embedding Class for Google Generative AI"""
+    def __init__(self, model='models/embedding-001'):
+        self.client = genai
+        self.model = model
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        embeddings = []
+        for text in texts:
+            text = text[:2048] if len(text) > 2048 else text
+            try:
+                embedding = self.client.embed_content(
+                    model=self.model,
+                    content=text,
+                    task_type="retrieval_document"
+                )['embedding']
+                embeddings.append(embedding)
+            except Exception as e:
+                print(f"Embedding error: {e}")
+                embeddings.append([0.0] * 768)
+        return embeddings
+    def embed_query(self, text: str) -> List[float]:
+        text = text[:2048] if len(text) > 2048 else text
+        try:
+            return self.client.embed_content(
+                model=self.model,
+                content=text,
+                task_type="retrieval_query"
+            )['embedding']
+        except Exception as e:
+            print(f"Query embedding error: {e}")
+            return [0.0] * 768
+class RAGProcessor:
+    def __init__(self):
+        self.embeddings = CustomGoogleEmbeddings()
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200,
+            separators=["\n\n", "\n", ".", ",", " ", ""]
+        )
+        genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
+        self.model = genai.GenerativeModel('gemini-pro')
+    def extract_text_from_pdf(self, pdf_file) -> str:
+        """Extract text from PDF with focus on structured content"""
+        try:
+            pdf_reader = PdfReader(pdf_file)
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text() + "\n\n"
+            # Basic structure preservation
+            # Look for common P&L statement patterns
+            lines = text.split('\n')
+            structured_text = ""
+            for line in lines:
+                # Identify potential financial entries (e.g., "Revenue: $1000")
+                if any(keyword in line.lower() for keyword in ['revenue', 'profit', 'loss', 'expenses', 'income', 'cost', 'margin', 'ebitda', 'tax']):
+                    structured_text += f"FINANCIAL_ENTRY: {line}\n"
+                else:
+                    structured_text += line + "\n"
+            return structured_text
+        except Exception as e:
+            print(f"Error extracting text from PDF: {e}")
+            return ""
+    def process_documents(self, pdf_files: List[str]) -> FAISS:
+        """Process multiple PDF documents and create vector store"""
+        combined_text = ""
+        for pdf in pdf_files:
+            combined_text += self.extract_text_from_pdf(pdf)
+        # Create more focused chunks
+        text_chunks = self.text_splitter.split_text(combined_text)
+        # Create vector store
+        try:
+            vector_store = FAISS.from_texts(text_chunks, embedding=self.embeddings)
+            return vector_store
+        except Exception as e:
+            print(f"Error creating vector store: {e}")
+            raise
+    def generate_response(self, question: str, vector_store: FAISS) -> str:
+        """Generate response using RAG approach"""
+        # Retrieve relevant context
+        docs = vector_store.similarity_search(question, k=4)
+        context = "\n".join([doc.page_content for doc in docs])
+        prompt = f"""
+        You are a financial analyst assistant. Using the following financial data context,
+        answer the question accurately and professionally. Include specific numbers and
+        calculations when relevant.
+        Context: {context}
+        Question: {question}
+        If the context doesn't contain enough information to answer accurately,
+        please state that clearly. Focus on P&L related information and financial metrics.
+        When providing financial figures, please format them clearly with appropriate units
+        (e.g., "$1,234,567" or "1.2M" for millions).
+        """
+        try:
+            response = self.model.generate_content(prompt)
+            return response.text
+        except Exception as e:
+            return f"Error generating response: {e}"

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+streamlit
+python-dotenv
+google-generativeai
+langchain
+langchain-community
+faiss-cpu
+PyPDF2
+tabula-py
+pandas
+numpy
+python-multipart