Spaces:
No application file
No application file
Create app.py
#1
by
kristada673
- opened
app.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, gradio
|
| 2 |
+
from langchain.document_loaders import UnstructuredPDFLoader
|
| 3 |
+
from langchain.indexes import VectorstoreIndexCreator
|
| 4 |
+
from vectorstore import VectorstoreIndexCreator
|
| 5 |
+
|
| 6 |
+
os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
|
| 7 |
+
|
| 8 |
+
text_folder = '10K_Annual_Reports'
|
| 9 |
+
loaders = [UnstructuredPDFLoader(os.path.join(text_folder, fn)) for fn in os.listdir(text_folder)]
|
| 10 |
+
|
| 11 |
+
# Create the index, if it does not exist, and save it
|
| 12 |
+
if not os.path.isfile('VectorStoreIndex/chroma-embeddings.parquet'):
|
| 13 |
+
from langchain.vectorstores import Chroma
|
| 14 |
+
index = VectorstoreIndexCreator(vectorstore_cls=Chroma, vectorstore_kwargs={ "persist_directory": "VectorStoreIndex/"}).from_loaders(loaders)
|
| 15 |
+
index.vectorstore.persist()
|
| 16 |
+
|
| 17 |
+
# Load the saved index
|
| 18 |
+
index_saved = VectorstoreIndexCreator().from_persistent_index("VectorStoreIndex/")
|
| 19 |
+
|
| 20 |
+
description = """This is an AI conversational agent where you provide it with the annual reports of companies, and it can study it and answer any questions
|
| 21 |
+
you have about it. Currently, the LLM has been trained on the following companies' 10-K reports: Amazon, Apple, Alphabet (Google), Meta (Facebook), Microsoft,
|
| 22 |
+
Netflix and Tesla.' I plan to include more companies' 10-K reports in future.
|
| 23 |
+
|
| 24 |
+
Once the LLM is trained on a new 10-K report, it stores the vector embeddings of the document locally using ChromaDB to make the querying faster and also to
|
| 25 |
+
save time and money on creating the vector embeddings for the same document in future.
|
| 26 |
+
|
| 27 |
+
The LLM's universe is only the 10-K reports it has been trained on; it cannot pull information from the internet. So, you can ask it about anything that's
|
| 28 |
+
contained in their 10-K reports. If it cannot find an answer to your query within the 10-K reports, it will reply with "I don't know". Some example of questions
|
| 29 |
+
you can ask are:
|
| 30 |
+
|
| 31 |
+
- What are the risks for Tesla?
|
| 32 |
+
- What was Google's earnings for the last fiscal year?
|
| 33 |
+
- Who are the competetors of Apple?
|
| 34 |
+
|
| 35 |
+
An example of querying about something the LLM's training did not include:
|
| 36 |
+
|
| 37 |
+
- Query: "What is Tesco?"
|
| 38 |
+
- Response: " Tesco is not mentioned in the context, so I don't know."
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
def chat_response(query):
|
| 42 |
+
return index_saved.query(query)
|
| 43 |
+
|
| 44 |
+
interface = gradio.Interface(fn=chat_response, inputs="text", outputs="text", title='Annual Reports GPT', description=description)
|
| 45 |
+
|
| 46 |
+
interface.launch() #server_name="0.0.0.0", server_port=8080, share=True)
|