Update llm.py
Browse files
llm.py
CHANGED
|
@@ -6,9 +6,10 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
| 6 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 7 |
from langchain.chains import create_retrieval_chain
|
| 8 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
| 12 |
splits = []
|
| 13 |
for i, row in df.iterrows():
|
| 14 |
# Create a Document object for each row, including page_content and metadata
|
|
@@ -21,17 +22,17 @@ def load_and_process_pdf(pdf_path):
|
|
| 21 |
}
|
| 22 |
)
|
| 23 |
# Append the Document object to the chunks list
|
| 24 |
-
|
| 25 |
return splits
|
| 26 |
|
| 27 |
-
def create_vectorstore(splits):
|
| 28 |
model_name = "nomic-ai/nomic-embed-text-v1"
|
| 29 |
model_kwargs = {
|
| 30 |
'device': 'cpu',
|
| 31 |
'trust_remote_code':True
|
| 32 |
}
|
| 33 |
encode_kwargs = {'normalize_embeddings': True}
|
| 34 |
-
|
| 35 |
model_name=model_name,
|
| 36 |
model_kwargs=model_kwargs,
|
| 37 |
encode_kwargs=encode_kwargs)
|
|
|
|
| 6 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 7 |
from langchain.chains import create_retrieval_chain
|
| 8 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
| 9 |
+
import pandas as pd
|
| 10 |
+
from langchain.schema import Document
|
| 11 |
+
def load_and_process_pdf():
|
| 12 |
+
df = pd.read_excel("content/chunk_metadata_template.xlsx")
|
| 13 |
splits = []
|
| 14 |
for i, row in df.iterrows():
|
| 15 |
# Create a Document object for each row, including page_content and metadata
|
|
|
|
| 22 |
}
|
| 23 |
)
|
| 24 |
# Append the Document object to the chunks list
|
| 25 |
+
splits.append(chunk_with_metadata)
|
| 26 |
return splits
|
| 27 |
|
| 28 |
+
def create_vectorstore(splits = load_and_process_pdf()):
|
| 29 |
model_name = "nomic-ai/nomic-embed-text-v1"
|
| 30 |
model_kwargs = {
|
| 31 |
'device': 'cpu',
|
| 32 |
'trust_remote_code':True
|
| 33 |
}
|
| 34 |
encode_kwargs = {'normalize_embeddings': True}
|
| 35 |
+
embeddings = HuggingFaceBgeEmbeddings(
|
| 36 |
model_name=model_name,
|
| 37 |
model_kwargs=model_kwargs,
|
| 38 |
encode_kwargs=encode_kwargs)
|