Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import
|
| 3 |
import re
|
| 4 |
import traceback
|
| 5 |
import faiss
|
|
@@ -16,11 +16,9 @@ st.set_page_config(page_title="Financial Insights Chatbot", page_icon="π", la
|
|
| 16 |
|
| 17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 18 |
|
| 19 |
-
|
| 20 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 21 |
ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
|
| 22 |
|
| 23 |
-
|
| 24 |
try:
|
| 25 |
llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
|
| 26 |
st.success("β
LLM initialized successfully. Using llama3-70b-8192")
|
|
@@ -31,6 +29,7 @@ except Exception as e:
|
|
| 31 |
embedding_model = SentenceTransformer("baconnier/Finance2_embedding_small_en-V1.5", device=device)
|
| 32 |
|
| 33 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
|
|
|
| 34 |
def fetch_financial_data(company_ticker):
|
| 35 |
if not company_ticker:
|
| 36 |
return "No ticker symbol provided. Please enter a valid company ticker."
|
|
@@ -61,19 +60,17 @@ def fetch_financial_data(company_ticker):
|
|
| 61 |
traceback.print_exc()
|
| 62 |
return "Error fetching financial data."
|
| 63 |
|
| 64 |
-
|
| 65 |
def extract_and_embed_text(pdf_file):
|
| 66 |
-
"""Processes PDFs and generates embeddings with GPU acceleration."""
|
| 67 |
try:
|
| 68 |
docs, tokenized_texts = [], []
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
tokenized_texts.append(chunk.split())
|
| 77 |
|
| 78 |
embeddings = embedding_model.encode(docs, batch_size=64, convert_to_numpy=True, normalize_embeddings=True)
|
| 79 |
|
|
@@ -121,7 +118,6 @@ def generate_response(user_query, company_ticker, mode, uploaded_file):
|
|
| 121 |
traceback.print_exc()
|
| 122 |
return "Error generating response."
|
| 123 |
|
| 124 |
-
|
| 125 |
st.markdown(
|
| 126 |
"<h1 style='text-align: center; color: #4CAF50;'>π AI-Powered Financial Insights Chatbot</h1>",
|
| 127 |
unsafe_allow_html=True
|
|
@@ -141,7 +137,6 @@ with col2:
|
|
| 141 |
st.markdown("### π **Enter Your Query**")
|
| 142 |
user_query = st.text_input("π¬ What financial insights are you looking for?")
|
| 143 |
|
| 144 |
-
st.markdown("---")
|
| 145 |
if mode == "π PDF Upload Mode":
|
| 146 |
st.markdown("### π Upload Your Financial Report")
|
| 147 |
uploaded_file = st.file_uploader("πΌ Upload PDF (Only for PDF Mode)", type=["pdf"])
|
|
@@ -157,7 +152,7 @@ if st.button("π Analyze Now"):
|
|
| 157 |
elif mode == "π Live Data Mode" and not company_ticker:
|
| 158 |
st.error("β Please enter a valid company ticker symbol.")
|
| 159 |
else:
|
| 160 |
-
with st.spinner("π Your Query is Processing, this can take
|
| 161 |
response = generate_response(user_query, company_ticker, mode, uploaded_file)
|
| 162 |
st.markdown("---")
|
| 163 |
st.markdown("<h3 style='color: #4CAF50;'>π‘ AI Response</h3>", unsafe_allow_html=True)
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import pymupdf # Using pymupdf directly
|
| 3 |
import re
|
| 4 |
import traceback
|
| 5 |
import faiss
|
|
|
|
| 16 |
|
| 17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 18 |
|
|
|
|
| 19 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 20 |
ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
|
| 21 |
|
|
|
|
| 22 |
try:
|
| 23 |
llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
|
| 24 |
st.success("β
LLM initialized successfully. Using llama3-70b-8192")
|
|
|
|
| 29 |
embedding_model = SentenceTransformer("baconnier/Finance2_embedding_small_en-V1.5", device=device)
|
| 30 |
|
| 31 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
| 32 |
+
|
| 33 |
def fetch_financial_data(company_ticker):
|
| 34 |
if not company_ticker:
|
| 35 |
return "No ticker symbol provided. Please enter a valid company ticker."
|
|
|
|
| 60 |
traceback.print_exc()
|
| 61 |
return "Error fetching financial data."
|
| 62 |
|
|
|
|
| 63 |
def extract_and_embed_text(pdf_file):
|
| 64 |
+
"""Processes PDFs and generates embeddings with GPU acceleration using pymupdf."""
|
| 65 |
try:
|
| 66 |
docs, tokenized_texts = [], []
|
| 67 |
+
|
| 68 |
+
with pymupdf.open(stream=pdf_file.read(), filetype="pdf") as doc:
|
| 69 |
+
full_text = "\n".join(page.get_text("text") for page in doc)
|
| 70 |
+
chunks = text_splitter.split_text(full_text)
|
| 71 |
+
for chunk in chunks:
|
| 72 |
+
docs.append(chunk)
|
| 73 |
+
tokenized_texts.append(chunk.split())
|
|
|
|
| 74 |
|
| 75 |
embeddings = embedding_model.encode(docs, batch_size=64, convert_to_numpy=True, normalize_embeddings=True)
|
| 76 |
|
|
|
|
| 118 |
traceback.print_exc()
|
| 119 |
return "Error generating response."
|
| 120 |
|
|
|
|
| 121 |
st.markdown(
|
| 122 |
"<h1 style='text-align: center; color: #4CAF50;'>π AI-Powered Financial Insights Chatbot</h1>",
|
| 123 |
unsafe_allow_html=True
|
|
|
|
| 137 |
st.markdown("### π **Enter Your Query**")
|
| 138 |
user_query = st.text_input("π¬ What financial insights are you looking for?")
|
| 139 |
|
|
|
|
| 140 |
if mode == "π PDF Upload Mode":
|
| 141 |
st.markdown("### π Upload Your Financial Report")
|
| 142 |
uploaded_file = st.file_uploader("πΌ Upload PDF (Only for PDF Mode)", type=["pdf"])
|
|
|
|
| 152 |
elif mode == "π Live Data Mode" and not company_ticker:
|
| 153 |
st.error("β Please enter a valid company ticker symbol.")
|
| 154 |
else:
|
| 155 |
+
with st.spinner("π Your Query is Processing, this can take up to 5 - 7 minutes β³"):
|
| 156 |
response = generate_response(user_query, company_ticker, mode, uploaded_file)
|
| 157 |
st.markdown("---")
|
| 158 |
st.markdown("<h3 style='color: #4CAF50;'>π‘ AI Response</h3>", unsafe_allow_html=True)
|