Space25

Sleeping

App Files Files Community

Entreprenerdly commited on Aug 11, 2024

Commit

3616a04

verified ·

1 Parent(s): fd69599

Create app.py

Browse files

Files changed (1) hide show

app.py +127 -0

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import os
+import chainlit as cl
+from llama_index.core import VectorStoreIndex, Document
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.groq import Groq
+from llama_index.core import ServiceContext
+from llama_index.core.node_parser import SentenceSplitter
+from PyPDF2 import PdfReader
+import tempfile
+GROQ_API_KEY = "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"
+# Initialize models
+embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
+llm = Groq(model="llama3-70b-8192", api_key=GROQ_API_KEY)
+# Create service context
+service_context = ServiceContext.from_defaults(
+    llm=llm,
+    embed_model=embed_model,
+    node_parser=SentenceSplitter(chunk_size=1000, chunk_overlap=200)
+)
+summary_prompt = (
+    "You are a world-class financial analyst with extensive experience analyzing quarterly reports. "
+    "Give me a comprehensive summary of the earnings report. Focus on the Strategic Insights and Key Financial Figures. "
+    "Answer in extensive bullet points please."
+)
+question_prompt = (
+    "You are a financial analyst with extensive experience analyzing quarterly reports. "
+    "Read the earnings call transcript and earnings presentation report and generate 10 questions focusing on the strategic insights and financial figures. "
+    "Ask questions that require precise answers and provide strategic insight into the company's financial and strategic performance, such as revenue growth, market trends, profit margins, and more. "
+    "Only ask questions that can be answered using the provided document, without making any assumptions or inferences beyond the text. "
+    "Please format the questions as a list with a simple '1. Question 1', '2. Question 2', etc. structure. "
+    "Unless retrievable from the documents, don't ask questions which cannot be compared to previous periods."
+)
+def read_file_content(file):
+    if file.name.lower().endswith('.pdf'):
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+            temp_file.write(file.content)
+            temp_file_path = temp_file.name
+        try:
+            pdf_reader = PdfReader(temp_file_path)
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text()
+        finally:
+            os.unlink(temp_file_path)
+    elif file.name.lower().endswith('.txt'):
+        text = file.content.decode('utf-8')
+    else:
+        raise ValueError("Unsupported file type")
+    return text
+@cl.on_chat_start
+async def on_chat_start():
+    files = await cl.AskFileMessage(
+        content="Please upload PDF or TXT files to begin!",
+        accept=["application/pdf", "text/plain"],
+        max_files=5,
+        max_size_mb=20,
+    ).send()
+    if not files:
+        await cl.Message(content="No files were uploaded. Please try again.").send()
+        return
+    msg = cl.Message(content="Processing files...")
+    await msg.send()
+    try:
+        documents = []
+        for file in files:
+            text = read_file_content(file)
+            documents.append(Document(text=text, metadata={"filename": file.name}))
+        # Create index
+        index = VectorStoreIndex.from_documents(
+            documents, service_context=service_context
+        )
+        # Store the index in the user session
+        cl.user_session.set("index", index)
+        # Generate summary
+        query_engine = index.as_query_engine()
+        summary_response = await cl.make_async(query_engine.query)(summary_prompt)
+        await cl.Message(content=f"**Summary:**\n{summary_response}").send()
+        # Generate questions
+        questions_response = await cl.make_async(query_engine.query)(question_prompt)
+        questions_format = str(questions_response).split('\n')
+        relevant_questions = [question.strip() for question in questions_format if question.strip() and question.strip()[0].isdigit()]
+        # Answer generated questions
+        await cl.Message(content="Generated questions and answers:").send()
+        for question in relevant_questions:
+            response = await cl.make_async(query_engine.query)(question)
+            await cl.Message(content=f"**{question}**\n{response}").send()
+        msg.content = "Processing done. You can now ask more questions!"
+        await msg.update()
+    except Exception as e:
+        await cl.Message(content=f"An error occurred during processing: {str(e)}").send()
+@cl.on_message
+async def main(message: cl.Message):
+    index = cl.user_session.get("index")
+    if index is None:
+        await cl.Message(content="Please upload files first before asking questions.").send()
+        return
+    query_engine = index.as_query_engine()
+    response = await cl.make_async(query_engine.query)(message.content)
+    response_message = cl.Message(content="")
+    for token in str(response):
+        await response_message.stream_token(token=token)
+    await response_message.send()