dashrendar commited on
Commit
998bbdc
·
1 Parent(s): aa55748

Update app.py

Browse files

changed to load in files from kellogg folder

Files changed (1) hide show
  1. app.py +21 -3
app.py CHANGED
@@ -14,6 +14,15 @@ from langchain.prompts import MessagesPlaceholder
14
  from langchain.agents import AgentExecutor
15
  from langchain.agents.agent_toolkits import create_retriever_tool
16
 
 
 
 
 
 
 
 
 
 
17
  print("CHECK - Pinecone vector db setup")
18
 
19
  # set up OpenAI environment vars and embeddings
@@ -24,12 +33,21 @@ embeddings = OpenAIEmbeddings()
24
  index_name = "kellogg-course-assistant"
25
 
26
  pinecone.init(
27
- api_key=os.getenv("PINECONE_API_KEY"), # find at app.pinecone.io
28
- environment=os.getenv("PINECONE_ENV"), # next to api key in console
29
  )
30
 
31
- # load existing index
 
 
 
 
 
 
32
  vectorsearch = Pinecone.from_existing_index(index_name, embeddings)
 
 
 
33
  retriever = vectorsearch.as_retriever()
34
 
35
  print("CHECK - setting up conversational retrieval agent")
 
14
  from langchain.agents import AgentExecutor
15
  from langchain.agents.agent_toolkits import create_retriever_tool
16
 
17
+ # Function to read files from a folder
18
+ def read_files_from_folder(folder_path):
19
+ file_data = {}
20
+ for filename in os.listdir(folder_path):
21
+ if filename.endswith(".txt"): # Assuming text files
22
+ with open(os.path.join(folder_path, filename), 'r') as f:
23
+ file_data[filename] = f.read()
24
+ return file_data
25
+
26
  print("CHECK - Pinecone vector db setup")
27
 
28
  # set up OpenAI environment vars and embeddings
 
33
  index_name = "kellogg-course-assistant"
34
 
35
  pinecone.init(
36
+ api_key=os.getenv("PINECONE_API_KEY"),
37
+ environment=os.getenv("PINECONE_ENV"),
38
  )
39
 
40
+ # Read files from the "kellogg" folder into a dictionary
41
+ kellogg_data = read_files_from_folder("kellogg")
42
+
43
+ # Transform the text content to vectors
44
+ kellogg_vectors = {key: embeddings.transform(value) for key, value in kellogg_data.items()}
45
+
46
+ # Upload the vectors to Pinecone
47
  vectorsearch = Pinecone.from_existing_index(index_name, embeddings)
48
+ vectorsearch.upsert(items=kellogg_vectors)
49
+
50
+ # load existing index
51
  retriever = vectorsearch.as_retriever()
52
 
53
  print("CHECK - setting up conversational retrieval agent")