juliaturc commited on
Commit
df1b188
·
1 Parent(s): 2730f0a

Split --index-name flag into --pinecone-index-name and --index-namespace.

Browse files
Files changed (2) hide show
  1. sage/index.py +12 -12
  2. sage/vector_store.py +16 -11
sage/index.py CHANGED
@@ -71,10 +71,15 @@ def main():
71
  help="Maximum chunks per batch. We recommend 2000 for the OpenAI embedder. Marqo enforces a limit of 64.",
72
  )
73
  parser.add_argument(
74
- "--index-name",
75
  default=None,
76
- help="Vector store index name. For Marqo, we default it to the repository name. Required for Pinecone. "
77
- "In Pinecone terminology, this is *not* the namespace (which we default to the repo ID).",
 
 
 
 
 
78
  )
79
  parser.add_argument(
80
  "--include",
@@ -133,15 +138,10 @@ def main():
133
  if args.embedder_type == "marqo" and args.vector_store_type != "marqo":
134
  parser.error("When using the marqo embedder, the vector store type must also be marqo.")
135
  if args.vector_store_type == "marqo":
136
- if not args.index_name:
137
- args.index_name = args.repo_id.split("/")[1]
138
- if "/" in args.index_name:
139
- parser.error("The index name cannot contain slashes when using Marqo as the vector store.")
140
- elif args.vector_store_type == "pinecone" and not args.index_name:
141
- parser.error(
142
- "When using Pinecone as the vector store, you must specify an index name. You can create one on "
143
- "the Pinecone website. Make sure to set it the right --embedding-size."
144
- )
145
 
146
  # Validate embedder parameters.
147
  if args.embedder_type == "marqo":
 
71
  help="Maximum chunks per batch. We recommend 2000 for the OpenAI embedder. Marqo enforces a limit of 64.",
72
  )
73
  parser.add_argument(
74
+ "--pinecone-index-name",
75
  default=None,
76
+ help="Pinecone index name. Required if using Pinecone as the vector store. If the index doesn't exist already, "
77
+ "we will create it.",
78
+ )
79
+ parser.add_argument(
80
+ "--index-namespace",
81
+ default=None,
82
+ help="Index namespace for this repo. When not specified, we default it to a derivative of the repo name."
83
  )
84
  parser.add_argument(
85
  "--include",
 
138
  if args.embedder_type == "marqo" and args.vector_store_type != "marqo":
139
  parser.error("When using the marqo embedder, the vector store type must also be marqo.")
140
  if args.vector_store_type == "marqo":
141
+ if "/" in args.index_namespace:
142
+ parser.error("The index namespace cannot contain slashes when using Marqo as the vector store.")
143
+ elif args.vector_store_type == "pinecone" and not args.pinecone_index_name:
144
+ parser.error("When using Pinecone as the vector store, you must specify --pinecone-index-name")
 
 
 
 
 
145
 
146
  # Validate embedder parameters.
147
  if args.embedder_type == "marqo":
sage/vector_store.py CHANGED
@@ -149,27 +149,32 @@ class MarqoVectorStore(VectorStore):
149
  def build_from_args(args: dict) -> VectorStore:
150
  """Builds a vector store from the given command-line arguments."""
151
  if args.vector_store_type == "pinecone":
152
- if not args.index_name:
153
- raise ValueError("Please specify --index-name for Pinecone.")
154
  dimension = args.embedding_size if "embedding_size" in args else None
155
 
156
- namespace = args.repo_id
157
- if args.commit_hash:
158
- namespace += "/" + args.commit_hash
 
 
159
 
160
  return PineconeVectorStore(
161
- index_name=args.index_name, namespace=namespace, dimension=dimension, hybrid=args.hybrid_retrieval
 
 
 
162
  )
163
  elif args.vector_store_type == "marqo":
164
  marqo_url = args.marqo_url or "http://localhost:8882"
165
 
166
- index_name = args.index_name
167
- if not index_name:
168
  # Marqo doesn't allow slashes in the index name.
169
- index_name = args.repo_id.split("/")[1]
170
  if args.commit_hash:
171
- index_name += "_" + args.commit_hash
172
 
173
- return MarqoVectorStore(url=marqo_url, index_name=index_name)
174
  else:
175
  raise ValueError(f"Unrecognized vector store type {args.vector_store_type}")
 
149
  def build_from_args(args: dict) -> VectorStore:
150
  """Builds a vector store from the given command-line arguments."""
151
  if args.vector_store_type == "pinecone":
152
+ if not args.pinecone_index_name:
153
+ raise ValueError("Please specify --pinecone-index-name for Pinecone.")
154
  dimension = args.embedding_size if "embedding_size" in args else None
155
 
156
+ index_namespace = args.index_namespace
157
+ if not index_namespace:
158
+ index_namespace = args.repo_id
159
+ if args.commit_hash:
160
+ namespace += "/" + args.commit_hash
161
 
162
  return PineconeVectorStore(
163
+ index_name=args.pinecone_index_name,
164
+ namespace=index_namespace,
165
+ dimension=dimension,
166
+ hybrid=args.hybrid_retrieval,
167
  )
168
  elif args.vector_store_type == "marqo":
169
  marqo_url = args.marqo_url or "http://localhost:8882"
170
 
171
+ index_namespace = args.index_namespace
172
+ if not index_namespace:
173
  # Marqo doesn't allow slashes in the index name.
174
+ index_namespace = args.repo_id.split("/")[1]
175
  if args.commit_hash:
176
+ index_namespace += "_" + args.commit_hash
177
 
178
+ return MarqoVectorStore(url=marqo_url, index_name=index_namespace)
179
  else:
180
  raise ValueError(f"Unrecognized vector store type {args.vector_store_type}")