Spaces:
Runtime error
Runtime error
Commit ·
998bbdc
1
Parent(s): aa55748
Update app.py
Browse fileschanged to load in files from kellogg folder
app.py
CHANGED
|
@@ -14,6 +14,15 @@ from langchain.prompts import MessagesPlaceholder
|
|
| 14 |
from langchain.agents import AgentExecutor
|
| 15 |
from langchain.agents.agent_toolkits import create_retriever_tool
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
print("CHECK - Pinecone vector db setup")
|
| 18 |
|
| 19 |
# set up OpenAI environment vars and embeddings
|
|
@@ -24,12 +33,21 @@ embeddings = OpenAIEmbeddings()
|
|
| 24 |
index_name = "kellogg-course-assistant"
|
| 25 |
|
| 26 |
pinecone.init(
|
| 27 |
-
api_key=os.getenv("PINECONE_API_KEY"),
|
| 28 |
-
|
| 29 |
)
|
| 30 |
|
| 31 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
vectorsearch = Pinecone.from_existing_index(index_name, embeddings)
|
|
|
|
|
|
|
|
|
|
| 33 |
retriever = vectorsearch.as_retriever()
|
| 34 |
|
| 35 |
print("CHECK - setting up conversational retrieval agent")
|
|
|
|
| 14 |
from langchain.agents import AgentExecutor
|
| 15 |
from langchain.agents.agent_toolkits import create_retriever_tool
|
| 16 |
|
| 17 |
+
# Function to read files from a folder
|
| 18 |
+
def read_files_from_folder(folder_path):
|
| 19 |
+
file_data = {}
|
| 20 |
+
for filename in os.listdir(folder_path):
|
| 21 |
+
if filename.endswith(".txt"): # Assuming text files
|
| 22 |
+
with open(os.path.join(folder_path, filename), 'r') as f:
|
| 23 |
+
file_data[filename] = f.read()
|
| 24 |
+
return file_data
|
| 25 |
+
|
| 26 |
print("CHECK - Pinecone vector db setup")
|
| 27 |
|
| 28 |
# set up OpenAI environment vars and embeddings
|
|
|
|
| 33 |
index_name = "kellogg-course-assistant"
|
| 34 |
|
| 35 |
pinecone.init(
|
| 36 |
+
api_key=os.getenv("PINECONE_API_KEY"),
|
| 37 |
+
environment=os.getenv("PINECONE_ENV"),
|
| 38 |
)
|
| 39 |
|
| 40 |
+
# Read files from the "kellogg" folder into a dictionary
|
| 41 |
+
kellogg_data = read_files_from_folder("kellogg")
|
| 42 |
+
|
| 43 |
+
# Transform the text content to vectors
|
| 44 |
+
kellogg_vectors = {key: embeddings.transform(value) for key, value in kellogg_data.items()}
|
| 45 |
+
|
| 46 |
+
# Upload the vectors to Pinecone
|
| 47 |
vectorsearch = Pinecone.from_existing_index(index_name, embeddings)
|
| 48 |
+
vectorsearch.upsert(items=kellogg_vectors)
|
| 49 |
+
|
| 50 |
+
# load existing index
|
| 51 |
retriever = vectorsearch.as_retriever()
|
| 52 |
|
| 53 |
print("CHECK - setting up conversational retrieval agent")
|