sanjam99 commited on
Commit
e110be9
·
1 Parent(s): 293f100
Files changed (2) hide show
  1. __pycache__/app.cpython-310.pyc +0 -0
  2. app.py +6 -2
__pycache__/app.cpython-310.pyc ADDED
Binary file (2.08 kB). View file
 
app.py CHANGED
@@ -16,8 +16,12 @@ def load_and_retrieve_docs(url):
16
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
17
  splits = text_splitter.split_documents(docs)
18
  embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
19
- embeddings = [embedding_model.encode(doc.page_content) for doc in splits]
20
- vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
 
 
 
 
21
  return vectorstore.as_retriever()
22
 
23
  # Function to format documents
 
16
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
17
  splits = text_splitter.split_documents(docs)
18
  embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
19
+
20
+ # Create a custom embedding function that uses the embedding model's encode method
21
+ def embed_func(texts):
22
+ return embedding_model.encode(texts, convert_to_tensor=True)
23
+
24
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embed_func)
25
  return vectorstore.as_retriever()
26
 
27
  # Function to format documents