Spaces:

avimittal30
/

fundamental_analysis

Sleeping

App Files Files Community

avimittal30 commited on Mar 20, 2025

Commit

0e2d97d

1 Parent(s): 7adfdd6

push files for app

Browse files

Files changed (4) hide show

.env +2 -0
app.py +192 -0
helper.py +185 -0
requirements.txt +15 -0

.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ OPENAI_KEY="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"
2	+ Alphavantage_key="8FI4KAKZWM1Z4LBU"

app.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import os
+import streamlit as st
+from dotenv import load_dotenv
+from helper import get_model_response, get_income_statement, get_balance_sheet, get_ticker, process_pdf
+import matplotlib.pyplot as plt
+import pandas as pd
+# Load environment variables
+load_dotenv()
+# Set Streamlit page config with black background and colored text
+st.set_page_config(page_title="Fundamental Analysis Dashboard", layout="wide")
+st.markdown(
+    """
+    <style>
+    .reportview-container, .main, .block-container {
+        background-color: black;
+        color: white;
+    }
+    .stTextArea, .stTextArea textarea {
+        background-color: #333333;
+        color: white;
+    }
+    .stDataFrame {
+        color: white;
+    }
+    th {
+        color: white;
+        font-weight: bold;
+    }
+    td {
+        color: white;
+    }
+    .stButton button {
+        background-color: #333333;
+        color: white;
+    }
+    h1, h2, h3, h4, h5 {
+        color: #ffcc00 !important; /* Brighter color for title and subtitles */
+    }
+    </style>
+    """,
+    unsafe_allow_html=True,
+)
+# Function to style the dataframe with black background and white text/borders
+def style_dataframe(df, highlight_columns=None, highlight_rows=None):
+    # Apply comma formatting to numeric columns
+    df = df.applymap(lambda x: f"{x:,.0f}" if isinstance(x, (int, float)) else x)
+    # Style DataFrame with black background, white text, and yellow borders
+    styled_df = df.style.set_properties(
+        **{
+            'background-color': 'black',
+            'color': 'white',
+            'border-color': '#ffcc00',  # Yellow border (same as title color)
+            'border-style': 'solid',
+            'border-width': '1px'
+        }
+    ).set_table_styles(
+        [
+            {'selector': 'thead th', 'props': [('color', 'white'), ('font-weight', 'bold'), ('border-color', '#ffcc00')]},
+            {'selector': 'thead', 'props': [('border-color', '#ffcc00')]},
+            {'selector': 'td', 'props': [('color', 'white'), ('border-color', '#ffcc00')]},  # Yellow borders in body
+        ]
+    )
+    # Set text within white background to black
+    styled_df = styled_df.set_properties(subset=df.columns, **{'background-color': 'white', 'color': 'black'})
+    # Highlight specific columns (e.g., dates) with black text
+    if highlight_columns:
+        styled_df = styled_df.set_properties(subset=highlight_columns, **{'color': 'black', 'background-color': 'white'})
+    # Highlight specific rows (e.g., KPIs like total_rev, ebitda, net_income) with black text
+    if highlight_rows:
+        for row in highlight_rows:
+            styled_df = styled_df.set_properties(subset=pd.IndexSlice[row, :], **{'color': 'black', 'background-color': 'white'})
+    return styled_df
+# Title of the app
+st.title("Fundamental Analysis Dashboard with LLM Insights")
+# Placeholder for uploaded report
+st.header("Upload Annual Report")
+# File uploader for the annual report
+uploaded_file = st.file_uploader("Choose an annual report (PDF format)", type="pdf")
+# Define the query for LLM
+query = "How has the performance been in this year compared to last year?"
+# Check if a file has been uploaded
+if uploaded_file is not None:
+    # Save the uploaded file locally
+    with open(uploaded_file.name, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    # Process the PDF and get the database (Chroma object)
+    db = process_pdf(uploaded_file.name)
+    # Use the LLM to search for relevant context
+    docs = db.similarity_search(query)
+    context = docs[0].page_content
+    # Extract company name from the uploaded file name
+    company_name = uploaded_file.name.split('.')[0]
+    # Display insights generated from LLM
+    insights = get_model_response(query, context)
+    st.subheader("Insights from Annual Report")
+    st.text_area("Report Insights", value=insights, height=180)
+    st.write(f"Analyzing the report for: {company_name}")
+    # Get the ticker symbol for the company
+    ticker = get_ticker(company_name)
+    if ticker:
+        st.write(f"Ticker Symbol: {ticker}")
+        # Get income statement and balance sheet data
+        st.header(f"Profit and Loss KPIs for {company_name} (Last 9 Years)")
+        income_statement_df = get_income_statement(ticker)
+        if income_statement_df is not None:
+            st.write("All figures are in millions.")
+            transposed_df = income_statement_df.set_index('dates').T  # Transpose to make years as columns
+            # Create columns for layout
+            col1, col2 = st.columns([2, 1])
+            # Display income statement table in the left column with styled DataFrame
+            with col1:
+                st.dataframe(style_dataframe(transposed_df))
+            # Extract year from dates for graph
+            income_statement_df['dates'] = pd.to_datetime(income_statement_df['dates'])
+            income_statement_df['year'] = income_statement_df['dates'].dt.year
+            # Display net_income graph in the right column
+            with col2:
+                fig, ax = plt.subplots()
+                fig.patch.set_facecolor('black')
+                ax.set_facecolor('black')
+                ax.plot(income_statement_df['year'], income_statement_df['net_income'], color='cyan', marker='o')
+                ax.set_title('Net Income Over Years', color='#ffcc00')  # Brighter color for the graph title
+                ax.set_ylabel('Net Income (millions)', color='white')
+                ax.set_xlabel('Year', color='white')
+                ax.tick_params(colors='white')
+                st.pyplot(fig)
+        else:
+            st.write("No income statement data available.")
+        st.header(f"Balance Sheet KPIs for {company_name} (Last 9 Years)")
+        balance_sheet_df = get_balance_sheet(ticker)
+        if balance_sheet_df is not None:
+            st.write("All figures are in millions.")
+            transposed_balance_sheet_df = balance_sheet_df.set_index('dates').T
+            # Create columns for layout
+            col1, col2 = st.columns([2, 1])
+            # Display balance sheet table in the left column with styled DataFrame
+            with col1:
+                st.dataframe(style_dataframe(transposed_balance_sheet_df))
+            # Extract year from dates for graph
+            balance_sheet_df['dates'] = pd.to_datetime(balance_sheet_df['dates'])
+            balance_sheet_df['year'] = balance_sheet_df['dates'].dt.year
+            # Display current_assets graph in the right column
+            with col2:
+                fig, ax = plt.subplots()
+                fig.patch.set_facecolor('black')
+                ax.set_facecolor('black')
+                ax.plot(balance_sheet_df['year'], balance_sheet_df['current_assets'], color='green', marker='o')
+                ax.set_title('Current Assets Over Years', color='#ffcc00')  # Brighter color for the graph title
+                ax.set_ylabel('Current Assets (millions)', color='white')
+                ax.set_xlabel('Year', color='white')
+                ax.tick_params(colors='white')
+                st.pyplot(fig)
+        else:
+            st.write("No balance sheet data available.")
+    else:
+        st.write(f"Unable to retrieve ticker symbol for {company_name}.")
+# Note or disclaimer
+st.markdown("**Note:** Data is fetched from Alphavantage API based on the uploaded PDF file name.")

helper.py ADDED Viewed

	@@ -0,0 +1,185 @@

+from langchain import OpenAI
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_text_splitters import CharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings
+from langchain_chroma import Chroma
+from dotenv import load_dotenv
+import os
+import requests
+import pandas as pd
+load_dotenv()
+OPENAI_API_KEY=os.getenv('OPENAI_KEY')
+AV_API_KEY = os.getenv('Alphavantage_key')
+llm = OpenAI(openai_api_key=OPENAI_API_KEY,temperature=0, model_name="gpt-3.5-turbo-instruct", max_tokens=-1)
+def process_pdf(file_path):
+    """
+    This function processes the uploaded PDF, splits it into text chunks,
+    and stores them in a Chroma database using OpenAI embeddings.
+    Args:
+        file_path (str): The path to the uploaded PDF file.
+        openai_api_key (str): Your OpenAI API key for embeddings.
+    Returns:
+        db: The Chroma database containing the embedded documents.
+    """
+    # Set up OpenAI API key
+    os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
+    # Load the PDF file
+    loader = PyPDFLoader(file_path)
+    pages = loader.load_and_split()
+    # Split text into chunks
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+    documents = text_splitter.split_documents(pages)
+    # Create a Chroma database from the documents using OpenAI embeddings
+    embeddings = OpenAIEmbeddings()
+    db = Chroma.from_documents(documents, embeddings)
+    # Return the Chroma database
+    return db
+from openai import OpenAI
+def get_model_response(query, context):
+    load_dotenv()
+    OPENAI_API_KEY = os.getenv('OPENAI_KEY')
+    os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
+    prompt = f"""
+            You are a chatbot that is supposed to give response to user query's about a company's financials based on the following context.
+            You are given the following context:
+            {context}
+            You are asked to generate a short and accurate answer to the following question using the above context.
+            question: {query}
+            strictly do not hallucinate. Only use the above context to generate an answer. Please give your response in bullet points.
+            Remove any unwanted characters or symbols.
+            """
+    client = OpenAI()
+    response = client.chat.completions.create(
+        model="gpt-4-1106-preview",
+        max_tokens=1024,
+        temperature=0,
+        messages=[
+            {"role": "system", "content": prompt}
+        ]
+    )
+    model_response = response.choices[0].message.content
+    return model_response
+# query = "How has the performance been in this year compared to last year?"
+# docs = db.similarity_search(query)
+# print(docs[0].page_content)
+# context=docs[0].page_content
+def get_income_statement(symbol='INFY'):
+    load_dotenv()
+    url = "https://www.alphavantage.co/query"
+    params = {
+        "function": "INCOME_STATEMENT",
+        "symbol": symbol,
+        "apikey": AV_API_KEY
+    }
+    response = requests.get(url, params=params)
+    if response.status_code == 200:
+        data = response.json()
+        if not data:
+            print(f"No data found for {symbol}")
+            return None
+        rev = {'dates': [], 'total_rev': [], 'ebitda': [], 'net_income': []}
+        for i in range(0, 9):
+            rev['dates'].append(data['annualReports'][i]['fiscalDateEnding'])
+            rev['total_rev'].append(int(data['annualReports'][i]['totalRevenue']) / 1_000_000)
+            rev['ebitda'].append(int(data['annualReports'][i]['ebitda']) / 1_000_000)
+            rev['net_income'].append(int(data['annualReports'][i]['netIncome']) / 1_000_000)
+        is_df = pd.DataFrame(rev)
+        is_df= is_df.sort_values(by=['dates'], ascending=True)
+        is_df[['total_rev', 'ebitda', 'net_income']] = is_df[['total_rev', 'ebitda', 'net_income']].round(0).astype(int)
+        return is_df
+    else:
+        print(f"Error fetching data: {response.status_code}")
+        return None
+def get_balance_sheet(symbol='INFY'):
+    load_dotenv()
+    url = "https://www.alphavantage.co/query"
+    params = {
+        "function": "BALANCE_SHEET",
+        "symbol": symbol,
+        "apikey": AV_API_KEY
+    }
+    response = requests.get(url, params=params)
+    if response.status_code == 200:
+        bs_data = response.json()
+        if not bs_data:
+            print(f"No data found for {symbol}")
+            return None
+        bs = {'dates': [], 'debt': [], 'current_assets': [], 'cash_equivalents': []}
+        for i in range(0, 9):
+            bs['dates'].append(bs_data['annualReports'][i]['fiscalDateEnding'])
+            long_term_debt = bs_data['annualReports'][i].get('longTermDebt', '0')
+            bs['debt'].append(int(long_term_debt) / 1_000_000 if long_term_debt not in ['0', None, 'None', ''] else 0)
+            bs['current_assets'].append(int(bs_data['annualReports'][i]['totalCurrentAssets']) / 1_000_000)
+            bs['cash_equivalents'].append(int(bs_data['annualReports'][i]['cashAndCashEquivalentsAtCarryingValue']) / 1_000_000)
+        bs_df = pd.DataFrame(bs)
+        bs_df[['debt', 'current_assets', 'cash_equivalents']] = bs_df[['debt', 'current_assets', 'cash_equivalents']].round(0).astype(int)
+        bs_df= bs_df.sort_values(by=['dates'], ascending=True)
+        return bs_df
+    else:
+        print(f"Error fetching data: {response.status_code}")
+        return None
+def get_ticker(company):
+    # Define the desired market
+    desired_market = 'India/Bombay'
+    # API URL to search for the company symbol
+    url = f'https://www.alphavantage.co/query?function=SYMBOL_SEARCH&keywords={company}&apikey={AV_API_KEY}'
+    # Make a GET request
+    r = requests.get(url)
+    # Parse the JSON response
+    data = r.json()
+    # Iterate over the bestMatches to find the symbol for the desired market
+    for match in data.get('bestMatches', []):
+        if match['4. region'] == desired_market   :
+            symbol = match['1. symbol'].split('.')[0]
+            print(f"The symbol for {desired_market} is: {symbol}")
+            return symbol
+        elif match['4. region'] == 'United States'   :
+            symbol = match['1. symbol']
+            print(f"The symbol for {desired_market} is: {symbol}")
+            return symbol
+    else:
+        print(f"No symbol found for the market: {desired_market}")
+        return None

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+langchain
+pypdf
+langchain_community
+langchain-chroma
+openai
+langchain-openai
+python-dotenv
+matplotlib
+langchain_chroma
+streamlit
+langchain
+langchain_community
+langchain_openai
+matplotlib
+pypdf