Update app.py
Browse files
app.py
CHANGED
|
@@ -21,27 +21,11 @@ db = Chroma(persist_directory="./chroma_langchain_db")
|
|
| 21 |
# Load the document, split it into chunks, embed each chunk and load it into the vector store.
|
| 22 |
#raw_documents = TextLoader('state_of_the_union.txt').load()
|
| 23 |
def embed_fn(inp):
|
| 24 |
-
print("Try Embeddings")
|
| 25 |
-
print(inp)
|
| 26 |
-
print("End Embeddings")
|
| 27 |
-
#for eaa in inp:
|
| 28 |
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=10)
|
| 29 |
-
#documents = text_splitter.split_documents([eaa])
|
| 30 |
documents = text_splitter.split_text(inp)
|
| 31 |
-
print("documents")
|
| 32 |
-
print(documents)
|
| 33 |
-
print("end documents")
|
| 34 |
out_emb= hf.embed_documents(documents)
|
| 35 |
-
#chain = history[:-1]
|
| 36 |
string_representation = dumps(out_emb, pretty=True)
|
| 37 |
-
print(string_representation)
|
| 38 |
-
#db = Chroma(collection_name="test1", embedding_function=HuggingFaceEmbeddings())
|
| 39 |
db.from_texts(documents,HuggingFaceEmbeddings(model_name=emb))
|
| 40 |
-
#from_documents(documents, HuggingFaceEmbeddings)
|
| 41 |
-
print("DB")
|
| 42 |
-
print(db)
|
| 43 |
-
print("end DB")
|
| 44 |
-
#return db
|
| 45 |
def proc_doc(doc_in):
|
| 46 |
for doc in doc_in:
|
| 47 |
if doc.endswith(".txt"):
|
|
|
|
| 21 |
# Load the document, split it into chunks, embed each chunk and load it into the vector store.
|
| 22 |
#raw_documents = TextLoader('state_of_the_union.txt').load()
|
| 23 |
def embed_fn(inp):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=10)
|
|
|
|
| 25 |
documents = text_splitter.split_text(inp)
|
|
|
|
|
|
|
|
|
|
| 26 |
out_emb= hf.embed_documents(documents)
|
|
|
|
| 27 |
string_representation = dumps(out_emb, pretty=True)
|
|
|
|
|
|
|
| 28 |
db.from_texts(documents,HuggingFaceEmbeddings(model_name=emb))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
def proc_doc(doc_in):
|
| 30 |
for doc in doc_in:
|
| 31 |
if doc.endswith(".txt"):
|