Space25 / app.py
QuantumLearner's picture
Update app.py
d081699 verified
import os
import nltk
import requests
import datetime
# Use a directory within the user's home directory
nltk_data_dir = os.path.expanduser("~/.nltk_data")
os.makedirs(nltk_data_dir, exist_ok=True)
nltk.data.path.append(nltk_data_dir)
# Download NLTK data
nltk.download('punkt', download_dir=nltk_data_dir, quiet=True)
import chainlit as cl
from llama_index.core import VectorStoreIndex, Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.groq import Groq
from llama_index.core import ServiceContext
from llama_index.core.node_parser import SentenceSplitter
from dotenv import load_dotenv
import yfinance as yf
import pandas as pd
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
FMP_API_KEY = os.getenv("FMP_API_KEY")
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = Groq(model="llama3-70b-8192", api_key=GROQ_API_KEY)
service_context = ServiceContext.from_defaults(
llm=llm,
embed_model=embed_model,
node_parser=SentenceSplitter(chunk_size=1000, chunk_overlap=200)
)
def fetch_annual_report_10k(symbol: str) -> str:
"""
Tries up to 5 years (current year backward) to find a 10-K.
Returns the raw text of the first successful result.
"""
current_year = datetime.datetime.now().year
# We'll attempt up to 5 years back
for year_try in range(current_year, current_year - 5, -1):
url = (
"https://financialmodelingprep.com/api/v4/financial-reports-json"
f"?symbol={symbol}&year={year_try}&period=FY&apikey={FMP_API_KEY}"
)
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
text_data = response.text
# If FMP returns an error message inside the JSON, skip and try the next year
if "Error Message" in text_data or len(text_data.strip()) < 10:
continue
# If we got meaningful data, return it immediately
return text_data
except requests.exceptions.RequestException:
# On request error or no data, try older year
pass
# If we exit the loop, no data was found for any year in that range
return (
f"No 10-K data found for {symbol} in the last 5 years "
"(or API returned an error)."
)
summary_prompt = (
"You are a world-class financial analyst with extensive experience analyzing annual reports. "
"Provide a comprehensive summary of the 10-K report. Focus on Strategic Insights, Key Financial Figures, and Risk Factors. "
"Answer in extensive bullet points, summarizing the company's performance, strengths, and weaknesses."
)
question_prompt = (
"You are a financial analyst with extensive experience analyzing annual reports. "
"Read the 10-K report and generate 10 strategic questions focusing on the company's performance, risks, and financial figures. "
"Ask questions that provide strategic insights into the company's long-term goals, revenue trends, competitive position, and more. "
"Format the questions as a numbered list (e.g., '1. Question')."
)
@cl.on_chat_start
async def on_chat_start():
ticker_response = await cl.AskUserMessage(
content=(
"This tool is designed to analyze 10-K annual reports for publicly traded companies. "
"Provide the company's ticker symbol, and the tool will fetch the latest available 10-K report "
"from the last few years. It generates summaries and strategic due diligence. "
"Ask your own questions afterwards.\n\n"
"Please enter the ticker symbol for the company you want to analyze (e.g. MSFT):"
)
).send()
if not ticker_response or 'content' not in ticker_response:
await cl.Message(content="No ticker symbol provided. Please enter a valid ticker symbol to proceed.").send()
return
ticker_symbol = ticker_response['content'].upper()
msg = cl.Message(content=f"Retrieving the latest 10-K report for {ticker_symbol}...")
await msg.send()
try:
annual_report_text = fetch_annual_report_10k(ticker_symbol)
# Check if we failed for all years
if annual_report_text.startswith("No 10-K data found") or \
annual_report_text.startswith("HTTP error") or \
annual_report_text.startswith("Request error") or \
annual_report_text.startswith("An unexpected error occurred"):
await cl.Message(content=annual_report_text).send()
return
document = Document(text=annual_report_text, metadata={"company": ticker_symbol})
index = VectorStoreIndex.from_documents([document], service_context=service_context)
cl.user_session.set("index", index)
query_engine = index.as_query_engine()
summary_response = await cl.make_async(query_engine.query)(summary_prompt)
await cl.Message(content=f"**Summary:**\n{summary_response}").send()
questions_response = await cl.make_async(query_engine.query)(question_prompt)
questions_format = str(questions_response).split('\n')
relevant_questions = [
question.strip()
for question in questions_format
if question.strip() and question.strip()[0].isdigit()
]
await cl.Message(content="Generated strategic questions and answers:").send()
for question in relevant_questions:
await cl.Message(content=f"**{question}**").send()
answer = await cl.make_async(query_engine.query)(question)
await cl.Message(content=f"**Answer:**\n{answer}").send()
msg.content = "Processing done. You can now ask more questions about the 10-K report!"
await msg.update()
except Exception as e:
await cl.Message(content=f"An error occurred during processing: {str(e)}").send()
@cl.on_message
async def main(message: cl.Message):
index = cl.user_session.get("index")
if index is None:
await cl.Message(content="Please provide a ticker symbol first before asking questions.").send()
return
query_engine = index.as_query_engine()
response = await cl.make_async(query_engine.query)(message.content)
response_message = cl.Message(content="")
for token in str(response):
await response_message.stream_token(token=token)
await response_message.send()