whymath commited on
Commit
08f0208
·
1 Parent(s): c1a97fb

Removing openai from requirements

Browse files
Files changed (2) hide show
  1. requirements.txt +0 -1
  2. utils.py +20 -3
requirements.txt CHANGED
@@ -11,4 +11,3 @@ pymupdf
11
  wandb
12
  chainlit
13
  huggingface_hub
14
- openai
 
11
  wandb
12
  chainlit
13
  huggingface_hub
 
utils.py CHANGED
@@ -23,8 +23,11 @@ def chunk_documents(docs, tiktoken_len):
23
  chunk_overlap = 0,
24
  length_function = tiktoken_len,
25
  )
 
26
  split_chunks = text_splitter.split_documents(docs)
 
27
  print('len(split_chunks) =', len(split_chunks))
 
28
  return split_chunks
29
 
30
 
@@ -32,22 +35,31 @@ def create_raqa_chain_from_docs():
32
  # Load the documents from a PDF file using PyMuPDFLoader
33
  # docs = PyMuPDFLoader("data/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
34
  docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
 
 
35
  print("Loaded", len(docs), "documents")
 
 
36
  print(docs[0])
37
 
38
- # Chunk documents, load embedding model, create vectorstore and retriever
39
  split_chunks = chunk_documents(docs, tiktoken_len)
 
 
40
  embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
 
 
41
  qdrant_vectorstore = Qdrant.from_documents(
42
  split_chunks,
43
  embedding_model,
44
  location=":memory:",
45
  collection_name="Meta 10-k Filings",
46
  )
 
 
47
  qdrant_retriever = qdrant_vectorstore.as_retriever()
48
 
49
  # Define the RAG prompt template
50
- openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
51
  RAG_PROMPT = """
52
  CONTEXT:
53
  {context}
@@ -57,9 +69,14 @@ def create_raqa_chain_from_docs():
57
 
58
  Use the provided context to answer the provided user query. Only use the provided context to answer the query. If you do not know the answer, respond with "I don't know".
59
  """
 
 
60
  rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
61
 
62
- # Define the RAQA chain
 
 
 
63
  retrieval_augmented_qa_chain = (
64
  {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
65
  | RunnablePassthrough.assign(context=itemgetter("context"))
 
23
  chunk_overlap = 0,
24
  length_function = tiktoken_len,
25
  )
26
+
27
  split_chunks = text_splitter.split_documents(docs)
28
+
29
  print('len(split_chunks) =', len(split_chunks))
30
+
31
  return split_chunks
32
 
33
 
 
35
  # Load the documents from a PDF file using PyMuPDFLoader
36
  # docs = PyMuPDFLoader("data/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
37
  docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
38
+
39
+ # Print the number of loaded documents
40
  print("Loaded", len(docs), "documents")
41
+
42
+ # Print the first document
43
  print(docs[0])
44
 
45
+ # Split the documents into chunks based on their length
46
  split_chunks = chunk_documents(docs, tiktoken_len)
47
+
48
+ # Create an instance of the OpenAIEmbeddings model for text embeddings
49
  embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
50
+
51
+ # Create a Qdrant vector store from the split chunks
52
  qdrant_vectorstore = Qdrant.from_documents(
53
  split_chunks,
54
  embedding_model,
55
  location=":memory:",
56
  collection_name="Meta 10-k Filings",
57
  )
58
+
59
+ # Create a retriever from the Qdrant vector store
60
  qdrant_retriever = qdrant_vectorstore.as_retriever()
61
 
62
  # Define the RAG prompt template
 
63
  RAG_PROMPT = """
64
  CONTEXT:
65
  {context}
 
69
 
70
  Use the provided context to answer the provided user query. Only use the provided context to answer the query. If you do not know the answer, respond with "I don't know".
71
  """
72
+
73
+ # Create a ChatPromptTemplate instance from the RAG prompt template
74
  rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
75
 
76
+ # Create an instance of the ChatOpenAI model
77
+ openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
78
+
79
+ # Define the retrieval augmented QA chain
80
  retrieval_augmented_qa_chain = (
81
  {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
82
  | RunnablePassthrough.assign(context=itemgetter("context"))