ITC_Financial_Assistant / src /streamlit_app.py
Gowthamvemula's picture
Update src/streamlit_app.py
009a93d verified
import streamlit as st
import sqlite3
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import Ollama
from langchain_core.output_parsers import StrOutputParser
from sentence_transformers import SentenceTransformer
from langchain_community.document_loaders import PyPDFLoader
from langchain.docstore.document import Document
# Initialize models
@st.cache_resource
def load_models():
llm = Ollama(model="llama3")
sentence_transformer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
return llm, sentence_transformer
llm, sentence_transformer = load_models()
# Custom embedding function
def sentence_transformer_embedding(texts):
return sentence_transformer.encode(texts).tolist()
# Streamlit UI
st.title("πŸ“Š ITC Financial Analysis Assistant")
st.markdown("""
Analyze ITC's financial documents using local AI (Llama 3).
Upload annual reports, presentations, or paste text below.
""")
# File upload section
uploaded_files = st.file_uploader(
"Upload financial documents (PDF or TXT)",
type=["pdf", "txt"],
accept_multiple_files=True
)
# Text input alternative
manual_text = st.text_area("Or paste financial text directly:")
# Database setup
def init_database():
conn = sqlite3.connect('itc_finance.db')
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source TEXT,
content TEXT,
embedding_id TEXT
)
''')
conn.commit()
conn.close()
# Process uploaded files
@st.cache_resource
def process_documents(_uploaded_files, manual_text=""):
init_database()
conn = sqlite3.connect('itc_finance.db')
cursor = conn.cursor()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
chroma_db = Chroma(
embedding_function=sentence_transformer_embedding,
persist_directory="./chroma_db"
)
documents = []
# Process uploaded files
for uploaded_file in _uploaded_files:
file_path = f"./temp_{uploaded_file.name}"
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
if uploaded_file.name.endswith('.pdf'):
loader = PyPDFLoader(file_path)
pages = loader.load_and_split()
else:
with open(file_path, 'r') as f:
text = f.read()
pages = [Document(page_content=text)]
for page in pages:
chunks = text_splitter.split_text(page.page_content)
for chunk in chunks:
cursor.execute(
"INSERT INTO documents (source, content) VALUES (?, ?)",
(uploaded_file.name, chunk)
)
doc_id = cursor.lastrowid
chroma_db.add_texts(
texts=[chunk],
metadatas=[{"source": uploaded_file.name, "sql_id": doc_id}]
)
cursor.execute(
"UPDATE documents SET embedding_id = ? WHERE id = ?",
(str(doc_id), doc_id)
os.remove(file_path)
documents.append(uploaded_file.name)
# Process manual text
if manual_text:
chunks = text_splitter.split_text(manual_text)
for chunk in chunks:
cursor.execute(
"INSERT INTO documents (source, content) VALUES (?, ?)",
("Manual Input", chunk)
)
doc_id = cursor.lastrowid
chroma_db.add_texts(
texts=[chunk],
metadatas=[{"source": "Manual Input", "sql_id": doc_id}]
)
cursor.execute(
"UPDATE documents SET embedding_id = ? WHERE id = ?",
(str(doc_id), doc_id)
)
documents.append("Manual Input")
conn.commit()
conn.close()
return chroma_db, documents
# Query engine
def get_query_engine(chroma_db):
prompt = ChatPromptTemplate.from_template("""
[INST] <<SYS>>
You are an expert financial analyst for ITC Limited.
Use only the provided context to answer.
Cite sources like: [Source: {source}, page X]
<</SYS>>
Context: {context}
Question: {question}[/INST]
""")
def format_docs(docs):
return "\n\n".join(
f"Document Excerpt: {doc.page_content}\nSource: {doc.metadata['source']}"
for doc in docs
)
retriever = chroma_db.as_retriever(search_kwargs={"k": 3})
return (
{
"context": retriever | format_docs,
"question": lambda x: x["question"]
}
| prompt
| llm
| StrOutputParser()
)
# Main app logic
if uploaded_files or manual_text:
with st.spinner("Processing documents..."):
chroma_db, processed_docs = process_documents(uploaded_files, manual_text)
st.success(f"Processed {len(processed_docs)} documents")
query_engine = get_query_engine(chroma_db)
# Query interface
st.divider()
question = st.text_input("Ask about ITC's finances:", placeholder="E.g. What was the revenue growth in 2023?")
if question:
with st.spinner("Analyzing..."):
answer = query_engine.invoke({"question": question})
st.subheader("Analysis Result")
st.markdown(answer)
with st.expander("View source documents"):
st.write(chroma_db.similarity_search(question))
else:
st.info("Please upload documents or enter text to begin analysis")
# Sidebar with info
with st.sidebar:
st.markdown("## How to Use")
st.markdown("""
1. Upload PDF reports/presentations
2. Or paste financial text
3. Ask questions about the data
""")
st.markdown("## Sample Questions")
st.markdown("""
- What was ITC's net profit in 2023?
- Compare revenue between 2022-2024
- Show me key financial ratios
""")
st.markdown("## System Info")
st.code(f"Using: Llama 3 (local)\nEmbeddings: sentence-transformers/all-MiniLM-L6-v2")