Spaces:

tree3po
/

RAG-EZ

Sleeping

tree3po commited on Nov 4, 2024

Commit

733b136

verified ·

1 Parent(s): 1b2534d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,17 +17,17 @@ token=""
 repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 emb = "sentence-transformers/all-mpnet-base-v2"
 hf = HuggingFaceEmbeddings(model_name=emb)
-#db = Chroma()
 #db.persist()
 # Load the document, split it into chunks, embed each chunk and load it into the vector store.
 #raw_documents = TextLoader('state_of_the_union.txt').load()
 def embed_fn(inp):
-    db=Chroma()
     text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=10)
     documents = text_splitter.split_text(inp)
     out_emb= hf.embed_documents(documents)
     string_representation = dumps(out_emb, pretty=True)
-    db.from_texts(documents,persist_directory=f"{cwd}/chroma_langchain_db",embedding_function=HuggingFaceEmbeddings(model_name=emb))
 def proc_doc(doc_in):
     for doc in doc_in:
@@ -59,11 +59,10 @@ def read_pdf(pdf_path):
         text = f'{text}\n{page.extract_text()}'
     return text
 def run_llm(input_text,history):
-    db=Chroma()
     MAX_TOKENS=20000
     try:
         qur= hf.embed_query(input_text)
-        docs = db.similarity_search_by_vector(qur, k=3,persist_directory=f"{cwd}/chroma_langchain_db",embedding_function=HuggingFaceEmbeddings(model_name=emb))
         print(docs)
     except Exception as e:

 repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 emb = "sentence-transformers/all-mpnet-base-v2"
 hf = HuggingFaceEmbeddings(model_name=emb)
+db = Chroma(persist_directory=f"{cwd}/chroma_langchain_db",embedding_function=HuggingFaceEmbeddings(model_name=emb))
 #db.persist()
 # Load the document, split it into chunks, embed each chunk and load it into the vector store.
 #raw_documents = TextLoader('state_of_the_union.txt').load()
 def embed_fn(inp):
+    #db=Chroma()
     text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=10)
     documents = text_splitter.split_text(inp)
     out_emb= hf.embed_documents(documents)
     string_representation = dumps(out_emb, pretty=True)
+    db.from_texts(documents)
 def proc_doc(doc_in):
     for doc in doc_in:
         text = f'{text}\n{page.extract_text()}'
     return text
 def run_llm(input_text,history):
     MAX_TOKENS=20000
     try:
         qur= hf.embed_query(input_text)
+        docs = db.similarity_search_by_vector(qur, k=3)
         print(docs)
     except Exception as e: