File size: 6,412 Bytes
c3b001b 3940386 ecf4a3e 3940386 009a93d 3940386 ecf4a3e 3940386 ecf4a3e 3940386 ecf4a3e 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 f3eb4c3 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 f3eb4c3 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d 3940386 009a93d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
import streamlit as st
import sqlite3
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import Ollama
from langchain_core.output_parsers import StrOutputParser
from sentence_transformers import SentenceTransformer
from langchain_community.document_loaders import PyPDFLoader
from langchain.docstore.document import Document
# Initialize models
@st.cache_resource
def load_models():
llm = Ollama(model="llama3")
sentence_transformer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
return llm, sentence_transformer
llm, sentence_transformer = load_models()
# Custom embedding function
def sentence_transformer_embedding(texts):
return sentence_transformer.encode(texts).tolist()
# Streamlit UI
st.title("π ITC Financial Analysis Assistant")
st.markdown("""
Analyze ITC's financial documents using local AI (Llama 3).
Upload annual reports, presentations, or paste text below.
""")
# File upload section
uploaded_files = st.file_uploader(
"Upload financial documents (PDF or TXT)",
type=["pdf", "txt"],
accept_multiple_files=True
)
# Text input alternative
manual_text = st.text_area("Or paste financial text directly:")
# Database setup
def init_database():
conn = sqlite3.connect('itc_finance.db')
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source TEXT,
content TEXT,
embedding_id TEXT
)
''')
conn.commit()
conn.close()
# Process uploaded files
@st.cache_resource
def process_documents(_uploaded_files, manual_text=""):
init_database()
conn = sqlite3.connect('itc_finance.db')
cursor = conn.cursor()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
chroma_db = Chroma(
embedding_function=sentence_transformer_embedding,
persist_directory="./chroma_db"
)
documents = []
# Process uploaded files
for uploaded_file in _uploaded_files:
file_path = f"./temp_{uploaded_file.name}"
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
if uploaded_file.name.endswith('.pdf'):
loader = PyPDFLoader(file_path)
pages = loader.load_and_split()
else:
with open(file_path, 'r') as f:
text = f.read()
pages = [Document(page_content=text)]
for page in pages:
chunks = text_splitter.split_text(page.page_content)
for chunk in chunks:
cursor.execute(
"INSERT INTO documents (source, content) VALUES (?, ?)",
(uploaded_file.name, chunk)
)
doc_id = cursor.lastrowid
chroma_db.add_texts(
texts=[chunk],
metadatas=[{"source": uploaded_file.name, "sql_id": doc_id}]
)
cursor.execute(
"UPDATE documents SET embedding_id = ? WHERE id = ?",
(str(doc_id), doc_id)
os.remove(file_path)
documents.append(uploaded_file.name)
# Process manual text
if manual_text:
chunks = text_splitter.split_text(manual_text)
for chunk in chunks:
cursor.execute(
"INSERT INTO documents (source, content) VALUES (?, ?)",
("Manual Input", chunk)
)
doc_id = cursor.lastrowid
chroma_db.add_texts(
texts=[chunk],
metadatas=[{"source": "Manual Input", "sql_id": doc_id}]
)
cursor.execute(
"UPDATE documents SET embedding_id = ? WHERE id = ?",
(str(doc_id), doc_id)
)
documents.append("Manual Input")
conn.commit()
conn.close()
return chroma_db, documents
# Query engine
def get_query_engine(chroma_db):
prompt = ChatPromptTemplate.from_template("""
[INST] <<SYS>>
You are an expert financial analyst for ITC Limited.
Use only the provided context to answer.
Cite sources like: [Source: {source}, page X]
<</SYS>>
Context: {context}
Question: {question}[/INST]
""")
def format_docs(docs):
return "\n\n".join(
f"Document Excerpt: {doc.page_content}\nSource: {doc.metadata['source']}"
for doc in docs
)
retriever = chroma_db.as_retriever(search_kwargs={"k": 3})
return (
{
"context": retriever | format_docs,
"question": lambda x: x["question"]
}
| prompt
| llm
| StrOutputParser()
)
# Main app logic
if uploaded_files or manual_text:
with st.spinner("Processing documents..."):
chroma_db, processed_docs = process_documents(uploaded_files, manual_text)
st.success(f"Processed {len(processed_docs)} documents")
query_engine = get_query_engine(chroma_db)
# Query interface
st.divider()
question = st.text_input("Ask about ITC's finances:", placeholder="E.g. What was the revenue growth in 2023?")
if question:
with st.spinner("Analyzing..."):
answer = query_engine.invoke({"question": question})
st.subheader("Analysis Result")
st.markdown(answer)
with st.expander("View source documents"):
st.write(chroma_db.similarity_search(question))
else:
st.info("Please upload documents or enter text to begin analysis")
# Sidebar with info
with st.sidebar:
st.markdown("## How to Use")
st.markdown("""
1. Upload PDF reports/presentations
2. Or paste financial text
3. Ask questions about the data
""")
st.markdown("## Sample Questions")
st.markdown("""
- What was ITC's net profit in 2023?
- Compare revenue between 2022-2024
- Show me key financial ratios
""")
st.markdown("## System Info")
st.code(f"Using: Llama 3 (local)\nEmbeddings: sentence-transformers/all-MiniLM-L6-v2") |