File size: 8,528 Bytes
3616a04
406fad4
 
1e16f51
87bc35c
406fad4
 
 
87bc35c
 
3616a04
 
 
 
 
 
b06b2f5
a0c903a
 
8390921
b06b2f5
3616a04
9672ec8
 
3616a04
 
 
 
 
 
 
 
 
 
 
 
a0c903a
 
 
 
 
 
 
 
14db34a
8390921
 
 
 
 
 
 
a0c903a
8390921
 
 
 
 
 
a0c903a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3616a04
a0c903a
5b46520
 
3616a04
 
 
a0c903a
5b46520
3616a04
 
 
 
 
 
 
 
1e16f51
7155194
 
 
 
 
 
 
 
1e16f51
135137f
1e16f51
3616a04
1e16f51
 
bea36fa
1e16f51
 
 
66876cf
1e16f51
 
bea36fa
5acc872
 
6c1639c
1e16f51
 
 
 
6c1639c
5acc872
1e16f51
5acc872
1e16f51
 
6c1639c
1e16f51
 
6c1639c
1e16f51
 
 
 
3616a04
1e16f51
 
3616a04
1e16f51
 
 
 
3616a04
1e16f51
 
 
 
3616a04
1e16f51
 
 
 
 
3616a04
1e16f51
 
3616a04
 
1e16f51
 
3616a04
 
 
 
 
a0c903a
3616a04
 
 
 
 
 
 
 
 
 
a0c903a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import os
import nltk

# Use a directory within the user's home directory
nltk_data_dir = os.path.expanduser("~/.nltk_data")
os.makedirs(nltk_data_dir, exist_ok=True)
nltk.data.path.append(nltk_data_dir)

# Download NLTK data
nltk.download('punkt', download_dir=nltk_data_dir, quiet=True)
import chainlit as cl
from llama_index.core import VectorStoreIndex, Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.groq import Groq
from llama_index.core import ServiceContext
from llama_index.core.node_parser import SentenceSplitter
from dotenv import load_dotenv
import yfinance as yf
import pandas as pd

load_dotenv()

# Fetch the API key from environment variables
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Initialize models
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = Groq(model="llama3-70b-8192", api_key=GROQ_API_KEY)

# Create service context
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
    node_parser=SentenceSplitter(chunk_size=1000, chunk_overlap=200)
)

def format_financial_data(company_info, analysts_target, income_statement, balance_sheet, cash_flow):
    summary = f"# {company_info.get('longName', '')} Financial Summary\n"
    
    # Company Overview
    summary += f"\n## Company Overview\n"
    business_summary = company_info.get('longBusinessSummary', '')
    summary += f"{business_summary}\n"

    # Analyst Price Targets
    # Commenting out this section
    # if analysts_target is not None and len(analysts_target) > 0:
    #     current_price = company_info.get('currentPrice', 'N/A')
    #     target_mean_price = analysts_target.get('targetMeanPrice', 'N/A')
    #     target_high_price = analysts_target.get('targetHighPrice', 'N/A')
    #     target_low_price = analysts_target.get('targetLowPrice', 'N/A')
    #     number_of_analysts = analysts_target.get('numberOfAnalysts', 'N/A')
        
    #     summary += f"\n## Analyst Price Targets\n"
    #     summary += f"- **Current Price**: ${current_price}\n"
    #     summary += f"- **Mean Target Price**: ${target_mean_price}\n"
    #     summary += f"- **High Target Price**: ${target_high_price}\n"
    #     summary += f"- **Low Target Price**: ${target_low_price}\n"
    #     summary += f"- **Number of Analysts**: {number_of_analysts}\n"

    # Function to format financial tables
    def format_financial_table(df, title):
        summary = f"\n## {title}\n\n"
        df = df.copy()
        df.columns = [col.strftime('%Y-%m-%d') if hasattr(col, 'strftime') else col for col in df.columns]
        df.index = df.index.str.replace(r'([a-z])([A-Z])', r'\1 \2', regex=True)  # Add spaces between words
        df = df.apply(lambda x: x.apply(lambda y: f"${y:,.0f}" if pd.notnull(y) and isinstance(y, (int, float)) else y))
        summary += df.to_markdown()
        return summary

    # Format the quarterly income statement
    if income_statement is not None and not income_statement.empty:
        summary += format_financial_table(income_statement, "Quarterly Income Statement")

    # Format the quarterly balance sheet
    if balance_sheet is not None and not balance_sheet.empty:
        summary += format_financial_table(balance_sheet, "Quarterly Balance Sheet")

    # Format the quarterly cash flow statement
    if cash_flow is not None and not cash_flow.empty:
        summary += format_financial_table(cash_flow, "Quarterly Cash Flow Statement")
    
    return summary

summary_prompt = (
    "You are a world-class financial analyst with extensive experience analyzing financial data. "
    "Give me a comprehensive summary of the financial data. Provide key insights on the company's financial performance, including trends, strengths, weaknesses, and any potential concerns."
    #"Answer in extensive bullet points please."
)

question_prompt = (
    "You are a financial analyst with extensive experience analyzing financial data. "
    "Read the financial summary and generate 10 questions focusing financial performance, including trends, strengths, weaknesses, and any potential concerns."
    "Ask questions that require precise answers and provide strategic insight into the company's financial and strategic performance, such as revenue growth, market trends, profit margins, and more. "
    "Only ask questions that can be answered using the provided document, without making any assumptions or inferences beyond the text. "
    "Please format the questions as a list with a simple '1. Question 1', '2. Question 2', etc. structure. "
    "Unless retrievable from the documents, don't ask questions which cannot be compared to previous periods."
)

@cl.on_chat_start
async def on_chat_start():
    ticker_response = await cl.AskUserMessage(
        #content="This tool is designed analyze detailed financial data for publicly traded companies. Provide company's ticker symbol and the tool fetches financial information, including income statements, balance sheets, and cash flow reports. It generates summaries and strategic due diligence. Please enter the ticker symbol for the company you want to analyze:"
        content = (
        "This tool is designed analyze detailed financial data for publicly traded companies. "
        "Provide company's ticker symbol and the tool fetches financial information, including income statements, "
        "balance sheets, and cash flow reports. It generates summaries and strategic due diligence.\n\n"
        "Please enter the ticker symbol for the company you want to analyze:"
        )
    
    ).send()

    ticker_symbol = ticker_response['content'].upper()

    msg = cl.Message(content=f"Retrieving financial data for {ticker_symbol}...")
    await msg.send()

    try:
        # Get the data for the company
        company = yf.Ticker(ticker_symbol)

        # Extract company information
        company_info = company.info

        # Commenting out this line
        # analysts_target = company.analyst_price_targets

        # Retrieve the Quarterly Financial Statements
        quarterly_income_statement = company.quarterly_financials
        quarterly_balance_sheet = company.quarterly_balance_sheet
        quarterly_cash_flow = company.quarterly_cashflow

        # Pass `None` for analysts_target
        financial_summary = format_financial_data(
            company_info, None, quarterly_income_statement,
            quarterly_balance_sheet, quarterly_cash_flow
        )

        # Create a Document object with the financial summary
        document = Document(text=financial_summary, metadata={"company": ticker_symbol})

        # Create index
        index = VectorStoreIndex.from_documents(
            [document], service_context=service_context
        )

        # Store the index in the user session
        cl.user_session.set("index", index)

        # Generate summary
        query_engine = index.as_query_engine()
        summary_response = await cl.make_async(query_engine.query)(summary_prompt)
        await cl.Message(content=f"**Summary:**\n{summary_response}").send()

        # Generate questions
        questions_response = await cl.make_async(query_engine.query)(question_prompt)
        questions_format = str(questions_response).split('\n')
        relevant_questions = [question.strip() for question in questions_format if question.strip() and question.strip()[0].isdigit()]

        # Answer generated questions
        await cl.Message(content="Generated questions and answers:").send()
        for question in relevant_questions:
            response = await cl.make_async(query_engine.query)(question)
            await cl.Message(content=f"**{question}**\n{response}").send()

        msg.content = "Processing done. You can now ask more questions about the financial data!"
        await msg.update()

    except Exception as e:
        await cl.Message(content=f"An error occurred during processing: {str(e)}").send()
        
@cl.on_message
async def main(message: cl.Message):
    index = cl.user_session.get("index")
    
    if index is None:
        await cl.Message(content="Please provide a ticker symbol first before asking questions.").send()
        return

    query_engine = index.as_query_engine()
    
    response = await cl.make_async(query_engine.query)(message.content)
    
    response_message = cl.Message(content="")
    for token in str(response):
        await response_message.stream_token(token=token)
    
    await response_message.send()