Spaces:
Running
Running
Split --index-name flag into --pinecone-index-name and --index-namespace.
Browse files- sage/index.py +12 -12
- sage/vector_store.py +16 -11
sage/index.py
CHANGED
|
@@ -71,10 +71,15 @@ def main():
|
|
| 71 |
help="Maximum chunks per batch. We recommend 2000 for the OpenAI embedder. Marqo enforces a limit of 64.",
|
| 72 |
)
|
| 73 |
parser.add_argument(
|
| 74 |
-
"--index-name",
|
| 75 |
default=None,
|
| 76 |
-
help="
|
| 77 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
)
|
| 79 |
parser.add_argument(
|
| 80 |
"--include",
|
|
@@ -133,15 +138,10 @@ def main():
|
|
| 133 |
if args.embedder_type == "marqo" and args.vector_store_type != "marqo":
|
| 134 |
parser.error("When using the marqo embedder, the vector store type must also be marqo.")
|
| 135 |
if args.vector_store_type == "marqo":
|
| 136 |
-
if
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
elif args.vector_store_type == "pinecone" and not args.index_name:
|
| 141 |
-
parser.error(
|
| 142 |
-
"When using Pinecone as the vector store, you must specify an index name. You can create one on "
|
| 143 |
-
"the Pinecone website. Make sure to set it the right --embedding-size."
|
| 144 |
-
)
|
| 145 |
|
| 146 |
# Validate embedder parameters.
|
| 147 |
if args.embedder_type == "marqo":
|
|
|
|
| 71 |
help="Maximum chunks per batch. We recommend 2000 for the OpenAI embedder. Marqo enforces a limit of 64.",
|
| 72 |
)
|
| 73 |
parser.add_argument(
|
| 74 |
+
"--pinecone-index-name",
|
| 75 |
default=None,
|
| 76 |
+
help="Pinecone index name. Required if using Pinecone as the vector store. If the index doesn't exist already, "
|
| 77 |
+
"we will create it.",
|
| 78 |
+
)
|
| 79 |
+
parser.add_argument(
|
| 80 |
+
"--index-namespace",
|
| 81 |
+
default=None,
|
| 82 |
+
help="Index namespace for this repo. When not specified, we default it to a derivative of the repo name."
|
| 83 |
)
|
| 84 |
parser.add_argument(
|
| 85 |
"--include",
|
|
|
|
| 138 |
if args.embedder_type == "marqo" and args.vector_store_type != "marqo":
|
| 139 |
parser.error("When using the marqo embedder, the vector store type must also be marqo.")
|
| 140 |
if args.vector_store_type == "marqo":
|
| 141 |
+
if "/" in args.index_namespace:
|
| 142 |
+
parser.error("The index namespace cannot contain slashes when using Marqo as the vector store.")
|
| 143 |
+
elif args.vector_store_type == "pinecone" and not args.pinecone_index_name:
|
| 144 |
+
parser.error("When using Pinecone as the vector store, you must specify --pinecone-index-name")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
# Validate embedder parameters.
|
| 147 |
if args.embedder_type == "marqo":
|
sage/vector_store.py
CHANGED
|
@@ -149,27 +149,32 @@ class MarqoVectorStore(VectorStore):
|
|
| 149 |
def build_from_args(args: dict) -> VectorStore:
|
| 150 |
"""Builds a vector store from the given command-line arguments."""
|
| 151 |
if args.vector_store_type == "pinecone":
|
| 152 |
-
if not args.
|
| 153 |
-
raise ValueError("Please specify --index-name for Pinecone.")
|
| 154 |
dimension = args.embedding_size if "embedding_size" in args else None
|
| 155 |
|
| 156 |
-
|
| 157 |
-
if
|
| 158 |
-
|
|
|
|
|
|
|
| 159 |
|
| 160 |
return PineconeVectorStore(
|
| 161 |
-
index_name=args.
|
|
|
|
|
|
|
|
|
|
| 162 |
)
|
| 163 |
elif args.vector_store_type == "marqo":
|
| 164 |
marqo_url = args.marqo_url or "http://localhost:8882"
|
| 165 |
|
| 166 |
-
|
| 167 |
-
if not
|
| 168 |
# Marqo doesn't allow slashes in the index name.
|
| 169 |
-
|
| 170 |
if args.commit_hash:
|
| 171 |
-
|
| 172 |
|
| 173 |
-
return MarqoVectorStore(url=marqo_url, index_name=
|
| 174 |
else:
|
| 175 |
raise ValueError(f"Unrecognized vector store type {args.vector_store_type}")
|
|
|
|
| 149 |
def build_from_args(args: dict) -> VectorStore:
|
| 150 |
"""Builds a vector store from the given command-line arguments."""
|
| 151 |
if args.vector_store_type == "pinecone":
|
| 152 |
+
if not args.pinecone_index_name:
|
| 153 |
+
raise ValueError("Please specify --pinecone-index-name for Pinecone.")
|
| 154 |
dimension = args.embedding_size if "embedding_size" in args else None
|
| 155 |
|
| 156 |
+
index_namespace = args.index_namespace
|
| 157 |
+
if not index_namespace:
|
| 158 |
+
index_namespace = args.repo_id
|
| 159 |
+
if args.commit_hash:
|
| 160 |
+
namespace += "/" + args.commit_hash
|
| 161 |
|
| 162 |
return PineconeVectorStore(
|
| 163 |
+
index_name=args.pinecone_index_name,
|
| 164 |
+
namespace=index_namespace,
|
| 165 |
+
dimension=dimension,
|
| 166 |
+
hybrid=args.hybrid_retrieval,
|
| 167 |
)
|
| 168 |
elif args.vector_store_type == "marqo":
|
| 169 |
marqo_url = args.marqo_url or "http://localhost:8882"
|
| 170 |
|
| 171 |
+
index_namespace = args.index_namespace
|
| 172 |
+
if not index_namespace:
|
| 173 |
# Marqo doesn't allow slashes in the index name.
|
| 174 |
+
index_namespace = args.repo_id.split("/")[1]
|
| 175 |
if args.commit_hash:
|
| 176 |
+
index_namespace += "_" + args.commit_hash
|
| 177 |
|
| 178 |
+
return MarqoVectorStore(url=marqo_url, index_name=index_namespace)
|
| 179 |
else:
|
| 180 |
raise ValueError(f"Unrecognized vector store type {args.vector_store_type}")
|