Update app.py
Browse files
app.py
CHANGED
|
@@ -17,17 +17,17 @@ token=""
|
|
| 17 |
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
| 18 |
emb = "sentence-transformers/all-mpnet-base-v2"
|
| 19 |
hf = HuggingFaceEmbeddings(model_name=emb)
|
| 20 |
-
|
| 21 |
#db.persist()
|
| 22 |
# Load the document, split it into chunks, embed each chunk and load it into the vector store.
|
| 23 |
#raw_documents = TextLoader('state_of_the_union.txt').load()
|
| 24 |
def embed_fn(inp):
|
| 25 |
-
db=Chroma()
|
| 26 |
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=10)
|
| 27 |
documents = text_splitter.split_text(inp)
|
| 28 |
out_emb= hf.embed_documents(documents)
|
| 29 |
string_representation = dumps(out_emb, pretty=True)
|
| 30 |
-
db.from_texts(documents
|
| 31 |
|
| 32 |
def proc_doc(doc_in):
|
| 33 |
for doc in doc_in:
|
|
@@ -59,11 +59,10 @@ def read_pdf(pdf_path):
|
|
| 59 |
text = f'{text}\n{page.extract_text()}'
|
| 60 |
return text
|
| 61 |
def run_llm(input_text,history):
|
| 62 |
-
db=Chroma()
|
| 63 |
MAX_TOKENS=20000
|
| 64 |
try:
|
| 65 |
qur= hf.embed_query(input_text)
|
| 66 |
-
docs = db.similarity_search_by_vector(qur, k=3
|
| 67 |
|
| 68 |
print(docs)
|
| 69 |
except Exception as e:
|
|
|
|
| 17 |
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
| 18 |
emb = "sentence-transformers/all-mpnet-base-v2"
|
| 19 |
hf = HuggingFaceEmbeddings(model_name=emb)
|
| 20 |
+
db = Chroma(persist_directory=f"{cwd}/chroma_langchain_db",embedding_function=HuggingFaceEmbeddings(model_name=emb))
|
| 21 |
#db.persist()
|
| 22 |
# Load the document, split it into chunks, embed each chunk and load it into the vector store.
|
| 23 |
#raw_documents = TextLoader('state_of_the_union.txt').load()
|
| 24 |
def embed_fn(inp):
|
| 25 |
+
#db=Chroma()
|
| 26 |
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=10)
|
| 27 |
documents = text_splitter.split_text(inp)
|
| 28 |
out_emb= hf.embed_documents(documents)
|
| 29 |
string_representation = dumps(out_emb, pretty=True)
|
| 30 |
+
db.from_texts(documents)
|
| 31 |
|
| 32 |
def proc_doc(doc_in):
|
| 33 |
for doc in doc_in:
|
|
|
|
| 59 |
text = f'{text}\n{page.extract_text()}'
|
| 60 |
return text
|
| 61 |
def run_llm(input_text,history):
|
|
|
|
| 62 |
MAX_TOKENS=20000
|
| 63 |
try:
|
| 64 |
qur= hf.embed_query(input_text)
|
| 65 |
+
docs = db.similarity_search_by_vector(qur, k=3)
|
| 66 |
|
| 67 |
print(docs)
|
| 68 |
except Exception as e:
|