Commit ·
bcc6c02
1
Parent(s): badf4f1
Clarify --ckpt help: retriever weights only, not full DSv4 model
Browse files- demo.py +2 -1
- toy_flashmemory_inference.py +2 -1
demo.py
CHANGED
|
@@ -54,7 +54,8 @@ def make_mock_compressed_k(
|
|
| 54 |
|
| 55 |
def main():
|
| 56 |
ap = argparse.ArgumentParser(description="FlashMemory DS-V4 Retriever demo")
|
| 57 |
-
ap.add_argument("--ckpt", required=True,
|
|
|
|
| 58 |
ap.add_argument("--device", default="cpu", help="cpu or cuda (default: cpu)")
|
| 59 |
ap.add_argument("--batch", type=int, default=2, help="number of decode tokens")
|
| 60 |
ap.add_argument("--n-chunks", type=int, default=64, help="number of compressed-K chunks")
|
|
|
|
| 54 |
|
| 55 |
def main():
|
| 56 |
ap = argparse.ArgumentParser(description="FlashMemory DS-V4 Retriever demo")
|
| 57 |
+
ap.add_argument("--ckpt", required=True,
|
| 58 |
+
help="path to retriever checkpoint (flashmemory_ds_v4.safetensors from HuggingFace, NOT a full DSv4 model)")
|
| 59 |
ap.add_argument("--device", default="cpu", help="cpu or cuda (default: cpu)")
|
| 60 |
ap.add_argument("--batch", type=int, default=2, help="number of decode tokens")
|
| 61 |
ap.add_argument("--n-chunks", type=int, default=64, help="number of compressed-K chunks")
|
toy_flashmemory_inference.py
CHANGED
|
@@ -214,7 +214,8 @@ def main():
|
|
| 214 |
ap = argparse.ArgumentParser(
|
| 215 |
description="Toy DeepSeek-V4-FlashMemory sparse-decode loop driven by the FlashMemory Retriever"
|
| 216 |
)
|
| 217 |
-
ap.add_argument("--ckpt", required=True,
|
|
|
|
| 218 |
ap.add_argument("--device", default="cpu", help="cpu or cuda (default: cpu)")
|
| 219 |
ap.add_argument("--batch", type=int, default=1, help="number of parallel decode sequences")
|
| 220 |
ap.add_argument("--n-chunks", type=int, default=256, help="number of CSA memory chunks (the long history)")
|
|
|
|
| 214 |
ap = argparse.ArgumentParser(
|
| 215 |
description="Toy DeepSeek-V4-FlashMemory sparse-decode loop driven by the FlashMemory Retriever"
|
| 216 |
)
|
| 217 |
+
ap.add_argument("--ckpt", required=True,
|
| 218 |
+
help="path to the retriever checkpoint (flashmemory_ds_v4.safetensors from HuggingFace, NOT a full DSv4 model)")
|
| 219 |
ap.add_argument("--device", default="cpu", help="cpu or cuda (default: cpu)")
|
| 220 |
ap.add_argument("--batch", type=int, default=1, help="number of parallel decode sequences")
|
| 221 |
ap.add_argument("--n-chunks", type=int, default=256, help="number of CSA memory chunks (the long history)")
|