Spaces:

ymath
/

Teach2LearnVirtualStudent

Sleeping

App Files Files Community

whymath commited on May 15, 2024

Commit

08f0208

1 Parent(s): c1a97fb

Removing openai from requirements

Browse files

Files changed (2) hide show

requirements.txt +0 -1
utils.py +20 -3

requirements.txt CHANGED Viewed

@@ -11,4 +11,3 @@ pymupdf
 wandb
 chainlit
 huggingface_hub
-openai

 wandb
 chainlit
 huggingface_hub

utils.py CHANGED Viewed

@@ -23,8 +23,11 @@ def chunk_documents(docs, tiktoken_len):
         chunk_overlap = 0,
         length_function = tiktoken_len,
     )
     split_chunks = text_splitter.split_documents(docs)
     print('len(split_chunks) =', len(split_chunks))
     return split_chunks
@@ -32,22 +35,31 @@ def create_raqa_chain_from_docs():
     # Load the documents from a PDF file using PyMuPDFLoader
     # docs = PyMuPDFLoader("data/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
     docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
     print("Loaded", len(docs), "documents")
     print(docs[0])
-    # Chunk documents, load embedding model, create vectorstore and retriever
     split_chunks = chunk_documents(docs, tiktoken_len)
     embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
     qdrant_vectorstore = Qdrant.from_documents(
         split_chunks,
         embedding_model,
         location=":memory:",
         collection_name="Meta 10-k Filings",
     )
     qdrant_retriever = qdrant_vectorstore.as_retriever()
     # Define the RAG prompt template
-    openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
     RAG_PROMPT = """
     CONTEXT:
     {context}
@@ -57,9 +69,14 @@ def create_raqa_chain_from_docs():
     Use the provided context to answer the provided user query. Only use the provided context to answer the query. If you do not know the answer, respond with "I don't know".
     """
     rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
-    # Define the RAQA chain
     retrieval_augmented_qa_chain = (
         {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
         | RunnablePassthrough.assign(context=itemgetter("context"))

         chunk_overlap = 0,
         length_function = tiktoken_len,
     )
     split_chunks = text_splitter.split_documents(docs)
     print('len(split_chunks) =', len(split_chunks))
     return split_chunks
     # Load the documents from a PDF file using PyMuPDFLoader
     # docs = PyMuPDFLoader("data/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
     docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
+    # Print the number of loaded documents
     print("Loaded", len(docs), "documents")
+    # Print the first document
     print(docs[0])
+    # Split the documents into chunks based on their length
     split_chunks = chunk_documents(docs, tiktoken_len)
+    # Create an instance of the OpenAIEmbeddings model for text embeddings
     embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
+    # Create a Qdrant vector store from the split chunks
     qdrant_vectorstore = Qdrant.from_documents(
         split_chunks,
         embedding_model,
         location=":memory:",
         collection_name="Meta 10-k Filings",
     )
+    # Create a retriever from the Qdrant vector store
     qdrant_retriever = qdrant_vectorstore.as_retriever()
     # Define the RAG prompt template
     RAG_PROMPT = """
     CONTEXT:
     {context}
     Use the provided context to answer the provided user query. Only use the provided context to answer the query. If you do not know the answer, respond with "I don't know".
     """
+    # Create a ChatPromptTemplate instance from the RAG prompt template
     rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
+    # Create an instance of the ChatOpenAI model
+    openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
+    # Define the retrieval augmented QA chain
     retrieval_augmented_qa_chain = (
         {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
         | RunnablePassthrough.assign(context=itemgetter("context"))