userlele commited on
Commit
b65d2b8
·
verified ·
1 Parent(s): 9e514c3

Update llm.py

Browse files
Files changed (1) hide show
  1. llm.py +7 -6
llm.py CHANGED
@@ -6,9 +6,10 @@ from langchain_core.prompts import ChatPromptTemplate
6
  from langchain.chains.combine_documents import create_stuff_documents_chain
7
  from langchain.chains import create_retrieval_chain
8
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
9
-
10
- def load_and_process_pdf(pdf_path):
11
- df = df.read_excel("chunk_metadata_template.xlsx", index=False)
 
12
  splits = []
13
  for i, row in df.iterrows():
14
  # Create a Document object for each row, including page_content and metadata
@@ -21,17 +22,17 @@ def load_and_process_pdf(pdf_path):
21
  }
22
  )
23
  # Append the Document object to the chunks list
24
- chunks.append(chunk_with_metadata)
25
  return splits
26
 
27
- def create_vectorstore(splits):
28
  model_name = "nomic-ai/nomic-embed-text-v1"
29
  model_kwargs = {
30
  'device': 'cpu',
31
  'trust_remote_code':True
32
  }
33
  encode_kwargs = {'normalize_embeddings': True}
34
- hf = HuggingFaceBgeEmbeddings(
35
  model_name=model_name,
36
  model_kwargs=model_kwargs,
37
  encode_kwargs=encode_kwargs)
 
6
  from langchain.chains.combine_documents import create_stuff_documents_chain
7
  from langchain.chains import create_retrieval_chain
8
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
9
+ import pandas as pd
10
+ from langchain.schema import Document
11
+ def load_and_process_pdf():
12
+ df = pd.read_excel("content/chunk_metadata_template.xlsx")
13
  splits = []
14
  for i, row in df.iterrows():
15
  # Create a Document object for each row, including page_content and metadata
 
22
  }
23
  )
24
  # Append the Document object to the chunks list
25
+ splits.append(chunk_with_metadata)
26
  return splits
27
 
28
+ def create_vectorstore(splits = load_and_process_pdf()):
29
  model_name = "nomic-ai/nomic-embed-text-v1"
30
  model_kwargs = {
31
  'device': 'cpu',
32
  'trust_remote_code':True
33
  }
34
  encode_kwargs = {'normalize_embeddings': True}
35
+ embeddings = HuggingFaceBgeEmbeddings(
36
  model_name=model_name,
37
  model_kwargs=model_kwargs,
38
  encode_kwargs=encode_kwargs)