avimittal30 commited on
Commit
0e2d97d
·
1 Parent(s): 7adfdd6

push files for app

Browse files
Files changed (4) hide show
  1. .env +2 -0
  2. app.py +192 -0
  3. helper.py +185 -0
  4. requirements.txt +15 -0
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ OPENAI_KEY="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"
2
+ Alphavantage_key="8FI4KAKZWM1Z4LBU"
app.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from dotenv import load_dotenv
4
+ from helper import get_model_response, get_income_statement, get_balance_sheet, get_ticker, process_pdf
5
+ import matplotlib.pyplot as plt
6
+ import pandas as pd
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+ # Set Streamlit page config with black background and colored text
12
+ st.set_page_config(page_title="Fundamental Analysis Dashboard", layout="wide")
13
+ st.markdown(
14
+ """
15
+ <style>
16
+ .reportview-container, .main, .block-container {
17
+ background-color: black;
18
+ color: white;
19
+ }
20
+ .stTextArea, .stTextArea textarea {
21
+ background-color: #333333;
22
+ color: white;
23
+ }
24
+ .stDataFrame {
25
+ color: white;
26
+ }
27
+ th {
28
+ color: white;
29
+ font-weight: bold;
30
+ }
31
+ td {
32
+ color: white;
33
+ }
34
+ .stButton button {
35
+ background-color: #333333;
36
+ color: white;
37
+ }
38
+ h1, h2, h3, h4, h5 {
39
+ color: #ffcc00 !important; /* Brighter color for title and subtitles */
40
+ }
41
+ </style>
42
+ """,
43
+ unsafe_allow_html=True,
44
+ )
45
+
46
+ # Function to style the dataframe with black background and white text/borders
47
+ def style_dataframe(df, highlight_columns=None, highlight_rows=None):
48
+ # Apply comma formatting to numeric columns
49
+ df = df.applymap(lambda x: f"{x:,.0f}" if isinstance(x, (int, float)) else x)
50
+
51
+ # Style DataFrame with black background, white text, and yellow borders
52
+ styled_df = df.style.set_properties(
53
+ **{
54
+ 'background-color': 'black',
55
+ 'color': 'white',
56
+ 'border-color': '#ffcc00', # Yellow border (same as title color)
57
+ 'border-style': 'solid',
58
+ 'border-width': '1px'
59
+ }
60
+ ).set_table_styles(
61
+ [
62
+ {'selector': 'thead th', 'props': [('color', 'white'), ('font-weight', 'bold'), ('border-color', '#ffcc00')]},
63
+ {'selector': 'thead', 'props': [('border-color', '#ffcc00')]},
64
+ {'selector': 'td', 'props': [('color', 'white'), ('border-color', '#ffcc00')]}, # Yellow borders in body
65
+ ]
66
+ )
67
+
68
+ # Set text within white background to black
69
+ styled_df = styled_df.set_properties(subset=df.columns, **{'background-color': 'white', 'color': 'black'})
70
+
71
+ # Highlight specific columns (e.g., dates) with black text
72
+ if highlight_columns:
73
+ styled_df = styled_df.set_properties(subset=highlight_columns, **{'color': 'black', 'background-color': 'white'})
74
+
75
+ # Highlight specific rows (e.g., KPIs like total_rev, ebitda, net_income) with black text
76
+ if highlight_rows:
77
+ for row in highlight_rows:
78
+ styled_df = styled_df.set_properties(subset=pd.IndexSlice[row, :], **{'color': 'black', 'background-color': 'white'})
79
+
80
+ return styled_df
81
+
82
+
83
+ # Title of the app
84
+ st.title("Fundamental Analysis Dashboard with LLM Insights")
85
+
86
+ # Placeholder for uploaded report
87
+ st.header("Upload Annual Report")
88
+
89
+ # File uploader for the annual report
90
+ uploaded_file = st.file_uploader("Choose an annual report (PDF format)", type="pdf")
91
+
92
+ # Define the query for LLM
93
+ query = "How has the performance been in this year compared to last year?"
94
+
95
+ # Check if a file has been uploaded
96
+ if uploaded_file is not None:
97
+ # Save the uploaded file locally
98
+ with open(uploaded_file.name, "wb") as f:
99
+ f.write(uploaded_file.getbuffer())
100
+
101
+ # Process the PDF and get the database (Chroma object)
102
+ db = process_pdf(uploaded_file.name)
103
+
104
+ # Use the LLM to search for relevant context
105
+ docs = db.similarity_search(query)
106
+ context = docs[0].page_content
107
+
108
+ # Extract company name from the uploaded file name
109
+ company_name = uploaded_file.name.split('.')[0]
110
+
111
+ # Display insights generated from LLM
112
+ insights = get_model_response(query, context)
113
+ st.subheader("Insights from Annual Report")
114
+ st.text_area("Report Insights", value=insights, height=180)
115
+
116
+ st.write(f"Analyzing the report for: {company_name}")
117
+
118
+ # Get the ticker symbol for the company
119
+ ticker = get_ticker(company_name)
120
+
121
+ if ticker:
122
+ st.write(f"Ticker Symbol: {ticker}")
123
+
124
+ # Get income statement and balance sheet data
125
+ st.header(f"Profit and Loss KPIs for {company_name} (Last 9 Years)")
126
+ income_statement_df = get_income_statement(ticker)
127
+
128
+ if income_statement_df is not None:
129
+ st.write("All figures are in millions.")
130
+ transposed_df = income_statement_df.set_index('dates').T # Transpose to make years as columns
131
+
132
+ # Create columns for layout
133
+ col1, col2 = st.columns([2, 1])
134
+
135
+ # Display income statement table in the left column with styled DataFrame
136
+ with col1:
137
+ st.dataframe(style_dataframe(transposed_df))
138
+
139
+ # Extract year from dates for graph
140
+ income_statement_df['dates'] = pd.to_datetime(income_statement_df['dates'])
141
+ income_statement_df['year'] = income_statement_df['dates'].dt.year
142
+
143
+ # Display net_income graph in the right column
144
+ with col2:
145
+ fig, ax = plt.subplots()
146
+ fig.patch.set_facecolor('black')
147
+ ax.set_facecolor('black')
148
+ ax.plot(income_statement_df['year'], income_statement_df['net_income'], color='cyan', marker='o')
149
+ ax.set_title('Net Income Over Years', color='#ffcc00') # Brighter color for the graph title
150
+ ax.set_ylabel('Net Income (millions)', color='white')
151
+ ax.set_xlabel('Year', color='white')
152
+ ax.tick_params(colors='white')
153
+ st.pyplot(fig)
154
+ else:
155
+ st.write("No income statement data available.")
156
+
157
+ st.header(f"Balance Sheet KPIs for {company_name} (Last 9 Years)")
158
+ balance_sheet_df = get_balance_sheet(ticker)
159
+
160
+ if balance_sheet_df is not None:
161
+ st.write("All figures are in millions.")
162
+ transposed_balance_sheet_df = balance_sheet_df.set_index('dates').T
163
+
164
+ # Create columns for layout
165
+ col1, col2 = st.columns([2, 1])
166
+
167
+ # Display balance sheet table in the left column with styled DataFrame
168
+ with col1:
169
+ st.dataframe(style_dataframe(transposed_balance_sheet_df))
170
+
171
+ # Extract year from dates for graph
172
+ balance_sheet_df['dates'] = pd.to_datetime(balance_sheet_df['dates'])
173
+ balance_sheet_df['year'] = balance_sheet_df['dates'].dt.year
174
+
175
+ # Display current_assets graph in the right column
176
+ with col2:
177
+ fig, ax = plt.subplots()
178
+ fig.patch.set_facecolor('black')
179
+ ax.set_facecolor('black')
180
+ ax.plot(balance_sheet_df['year'], balance_sheet_df['current_assets'], color='green', marker='o')
181
+ ax.set_title('Current Assets Over Years', color='#ffcc00') # Brighter color for the graph title
182
+ ax.set_ylabel('Current Assets (millions)', color='white')
183
+ ax.set_xlabel('Year', color='white')
184
+ ax.tick_params(colors='white')
185
+ st.pyplot(fig)
186
+ else:
187
+ st.write("No balance sheet data available.")
188
+ else:
189
+ st.write(f"Unable to retrieve ticker symbol for {company_name}.")
190
+
191
+ # Note or disclaimer
192
+ st.markdown("**Note:** Data is fetched from Alphavantage API based on the uploaded PDF file name.")
helper.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import OpenAI
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain_text_splitters import CharacterTextSplitter
4
+ from langchain_openai import OpenAIEmbeddings
5
+ from langchain_chroma import Chroma
6
+ from dotenv import load_dotenv
7
+ import os
8
+ import requests
9
+ import pandas as pd
10
+
11
+
12
+ load_dotenv()
13
+
14
+ OPENAI_API_KEY=os.getenv('OPENAI_KEY')
15
+ AV_API_KEY = os.getenv('Alphavantage_key')
16
+
17
+ llm = OpenAI(openai_api_key=OPENAI_API_KEY,temperature=0, model_name="gpt-3.5-turbo-instruct", max_tokens=-1)
18
+
19
+ def process_pdf(file_path):
20
+ """
21
+ This function processes the uploaded PDF, splits it into text chunks,
22
+ and stores them in a Chroma database using OpenAI embeddings.
23
+
24
+ Args:
25
+ file_path (str): The path to the uploaded PDF file.
26
+ openai_api_key (str): Your OpenAI API key for embeddings.
27
+
28
+ Returns:
29
+ db: The Chroma database containing the embedded documents.
30
+ """
31
+ # Set up OpenAI API key
32
+ os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
33
+
34
+ # Load the PDF file
35
+ loader = PyPDFLoader(file_path)
36
+ pages = loader.load_and_split()
37
+
38
+ # Split text into chunks
39
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
40
+ documents = text_splitter.split_documents(pages)
41
+
42
+ # Create a Chroma database from the documents using OpenAI embeddings
43
+ embeddings = OpenAIEmbeddings()
44
+ db = Chroma.from_documents(documents, embeddings)
45
+
46
+ # Return the Chroma database
47
+ return db
48
+
49
+
50
+ from openai import OpenAI
51
+ def get_model_response(query, context):
52
+ load_dotenv()
53
+ OPENAI_API_KEY = os.getenv('OPENAI_KEY')
54
+ os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
55
+
56
+ prompt = f"""
57
+ You are a chatbot that is supposed to give response to user query's about a company's financials based on the following context.
58
+ You are given the following context:
59
+ {context}
60
+ You are asked to generate a short and accurate answer to the following question using the above context.
61
+ question: {query}
62
+ strictly do not hallucinate. Only use the above context to generate an answer. Please give your response in bullet points.
63
+ Remove any unwanted characters or symbols.
64
+ """
65
+
66
+ client = OpenAI()
67
+ response = client.chat.completions.create(
68
+ model="gpt-4-1106-preview",
69
+ max_tokens=1024,
70
+ temperature=0,
71
+ messages=[
72
+ {"role": "system", "content": prompt}
73
+ ]
74
+ )
75
+
76
+ model_response = response.choices[0].message.content
77
+ return model_response
78
+
79
+ # query = "How has the performance been in this year compared to last year?"
80
+ # docs = db.similarity_search(query)
81
+ # print(docs[0].page_content)
82
+ # context=docs[0].page_content
83
+
84
+
85
+ def get_income_statement(symbol='INFY'):
86
+ load_dotenv()
87
+
88
+
89
+ url = "https://www.alphavantage.co/query"
90
+ params = {
91
+ "function": "INCOME_STATEMENT",
92
+ "symbol": symbol,
93
+ "apikey": AV_API_KEY
94
+ }
95
+
96
+ response = requests.get(url, params=params)
97
+ if response.status_code == 200:
98
+ data = response.json()
99
+ if not data:
100
+ print(f"No data found for {symbol}")
101
+ return None
102
+
103
+ rev = {'dates': [], 'total_rev': [], 'ebitda': [], 'net_income': []}
104
+ for i in range(0, 9):
105
+ rev['dates'].append(data['annualReports'][i]['fiscalDateEnding'])
106
+ rev['total_rev'].append(int(data['annualReports'][i]['totalRevenue']) / 1_000_000)
107
+ rev['ebitda'].append(int(data['annualReports'][i]['ebitda']) / 1_000_000)
108
+ rev['net_income'].append(int(data['annualReports'][i]['netIncome']) / 1_000_000)
109
+
110
+ is_df = pd.DataFrame(rev)
111
+ is_df= is_df.sort_values(by=['dates'], ascending=True)
112
+ is_df[['total_rev', 'ebitda', 'net_income']] = is_df[['total_rev', 'ebitda', 'net_income']].round(0).astype(int)
113
+ return is_df
114
+ else:
115
+ print(f"Error fetching data: {response.status_code}")
116
+ return None
117
+
118
+
119
+
120
+ def get_balance_sheet(symbol='INFY'):
121
+ load_dotenv()
122
+
123
+ url = "https://www.alphavantage.co/query"
124
+ params = {
125
+ "function": "BALANCE_SHEET",
126
+ "symbol": symbol,
127
+ "apikey": AV_API_KEY
128
+ }
129
+
130
+ response = requests.get(url, params=params)
131
+ if response.status_code == 200:
132
+ bs_data = response.json()
133
+ if not bs_data:
134
+ print(f"No data found for {symbol}")
135
+ return None
136
+
137
+ bs = {'dates': [], 'debt': [], 'current_assets': [], 'cash_equivalents': []}
138
+ for i in range(0, 9):
139
+ bs['dates'].append(bs_data['annualReports'][i]['fiscalDateEnding'])
140
+ long_term_debt = bs_data['annualReports'][i].get('longTermDebt', '0')
141
+ bs['debt'].append(int(long_term_debt) / 1_000_000 if long_term_debt not in ['0', None, 'None', ''] else 0)
142
+ bs['current_assets'].append(int(bs_data['annualReports'][i]['totalCurrentAssets']) / 1_000_000)
143
+ bs['cash_equivalents'].append(int(bs_data['annualReports'][i]['cashAndCashEquivalentsAtCarryingValue']) / 1_000_000)
144
+
145
+ bs_df = pd.DataFrame(bs)
146
+ bs_df[['debt', 'current_assets', 'cash_equivalents']] = bs_df[['debt', 'current_assets', 'cash_equivalents']].round(0).astype(int)
147
+ bs_df= bs_df.sort_values(by=['dates'], ascending=True)
148
+ return bs_df
149
+ else:
150
+ print(f"Error fetching data: {response.status_code}")
151
+ return None
152
+
153
+
154
+
155
+ def get_ticker(company):
156
+ # Define the desired market
157
+ desired_market = 'India/Bombay'
158
+
159
+ # API URL to search for the company symbol
160
+ url = f'https://www.alphavantage.co/query?function=SYMBOL_SEARCH&keywords={company}&apikey={AV_API_KEY}'
161
+
162
+ # Make a GET request
163
+ r = requests.get(url)
164
+
165
+ # Parse the JSON response
166
+ data = r.json()
167
+
168
+ # Iterate over the bestMatches to find the symbol for the desired market
169
+ for match in data.get('bestMatches', []):
170
+ if match['4. region'] == desired_market :
171
+ symbol = match['1. symbol'].split('.')[0]
172
+ print(f"The symbol for {desired_market} is: {symbol}")
173
+ return symbol
174
+ elif match['4. region'] == 'United States' :
175
+ symbol = match['1. symbol']
176
+ print(f"The symbol for {desired_market} is: {symbol}")
177
+ return symbol
178
+ else:
179
+ print(f"No symbol found for the market: {desired_market}")
180
+ return None
181
+
182
+
183
+
184
+
185
+
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ pypdf
3
+ langchain_community
4
+ langchain-chroma
5
+ openai
6
+ langchain-openai
7
+ python-dotenv
8
+ matplotlib
9
+ langchain_chroma
10
+ streamlit
11
+ langchain
12
+ langchain_community
13
+ langchain_openai
14
+ matplotlib
15
+ pypdf