Entreprenerdly commited on
Commit
3616a04
·
verified ·
1 Parent(s): fd69599

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -0
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import chainlit as cl
3
+ from llama_index.core import VectorStoreIndex, Document
4
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
+ from llama_index.llms.groq import Groq
6
+ from llama_index.core import ServiceContext
7
+ from llama_index.core.node_parser import SentenceSplitter
8
+ from PyPDF2 import PdfReader
9
+ import tempfile
10
+
11
+ GROQ_API_KEY = "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"
12
+
13
+ # Initialize models
14
+ embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
15
+ llm = Groq(model="llama3-70b-8192", api_key=GROQ_API_KEY)
16
+
17
+ # Create service context
18
+ service_context = ServiceContext.from_defaults(
19
+ llm=llm,
20
+ embed_model=embed_model,
21
+ node_parser=SentenceSplitter(chunk_size=1000, chunk_overlap=200)
22
+ )
23
+
24
+ summary_prompt = (
25
+ "You are a world-class financial analyst with extensive experience analyzing quarterly reports. "
26
+ "Give me a comprehensive summary of the earnings report. Focus on the Strategic Insights and Key Financial Figures. "
27
+ "Answer in extensive bullet points please."
28
+ )
29
+
30
+ question_prompt = (
31
+ "You are a financial analyst with extensive experience analyzing quarterly reports. "
32
+ "Read the earnings call transcript and earnings presentation report and generate 10 questions focusing on the strategic insights and financial figures. "
33
+ "Ask questions that require precise answers and provide strategic insight into the company's financial and strategic performance, such as revenue growth, market trends, profit margins, and more. "
34
+ "Only ask questions that can be answered using the provided document, without making any assumptions or inferences beyond the text. "
35
+ "Please format the questions as a list with a simple '1. Question 1', '2. Question 2', etc. structure. "
36
+ "Unless retrievable from the documents, don't ask questions which cannot be compared to previous periods."
37
+ )
38
+
39
+ def read_file_content(file):
40
+ if file.name.lower().endswith('.pdf'):
41
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
42
+ temp_file.write(file.content)
43
+ temp_file_path = temp_file.name
44
+
45
+ try:
46
+ pdf_reader = PdfReader(temp_file_path)
47
+ text = ""
48
+ for page in pdf_reader.pages:
49
+ text += page.extract_text()
50
+ finally:
51
+ os.unlink(temp_file_path)
52
+ elif file.name.lower().endswith('.txt'):
53
+ text = file.content.decode('utf-8')
54
+ else:
55
+ raise ValueError("Unsupported file type")
56
+
57
+ return text
58
+
59
+ @cl.on_chat_start
60
+ async def on_chat_start():
61
+ files = await cl.AskFileMessage(
62
+ content="Please upload PDF or TXT files to begin!",
63
+ accept=["application/pdf", "text/plain"],
64
+ max_files=5,
65
+ max_size_mb=20,
66
+ ).send()
67
+
68
+ if not files:
69
+ await cl.Message(content="No files were uploaded. Please try again.").send()
70
+ return
71
+
72
+ msg = cl.Message(content="Processing files...")
73
+ await msg.send()
74
+
75
+ try:
76
+ documents = []
77
+ for file in files:
78
+ text = read_file_content(file)
79
+ documents.append(Document(text=text, metadata={"filename": file.name}))
80
+
81
+ # Create index
82
+ index = VectorStoreIndex.from_documents(
83
+ documents, service_context=service_context
84
+ )
85
+
86
+ # Store the index in the user session
87
+ cl.user_session.set("index", index)
88
+
89
+ # Generate summary
90
+ query_engine = index.as_query_engine()
91
+ summary_response = await cl.make_async(query_engine.query)(summary_prompt)
92
+ await cl.Message(content=f"**Summary:**\n{summary_response}").send()
93
+
94
+ # Generate questions
95
+ questions_response = await cl.make_async(query_engine.query)(question_prompt)
96
+ questions_format = str(questions_response).split('\n')
97
+ relevant_questions = [question.strip() for question in questions_format if question.strip() and question.strip()[0].isdigit()]
98
+
99
+ # Answer generated questions
100
+ await cl.Message(content="Generated questions and answers:").send()
101
+ for question in relevant_questions:
102
+ response = await cl.make_async(query_engine.query)(question)
103
+ await cl.Message(content=f"**{question}**\n{response}").send()
104
+
105
+ msg.content = "Processing done. You can now ask more questions!"
106
+ await msg.update()
107
+
108
+ except Exception as e:
109
+ await cl.Message(content=f"An error occurred during processing: {str(e)}").send()
110
+
111
+ @cl.on_message
112
+ async def main(message: cl.Message):
113
+ index = cl.user_session.get("index")
114
+
115
+ if index is None:
116
+ await cl.Message(content="Please upload files first before asking questions.").send()
117
+ return
118
+
119
+ query_engine = index.as_query_engine()
120
+
121
+ response = await cl.make_async(query_engine.query)(message.content)
122
+
123
+ response_message = cl.Message(content="")
124
+ for token in str(response):
125
+ await response_message.stream_token(token=token)
126
+
127
+ await response_message.send()