libertywing commited on
Commit
bcc6c02
·
1 Parent(s): badf4f1

Clarify --ckpt help: retriever weights only, not full DSv4 model

Browse files
Files changed (2) hide show
  1. demo.py +2 -1
  2. toy_flashmemory_inference.py +2 -1
demo.py CHANGED
@@ -54,7 +54,8 @@ def make_mock_compressed_k(
54
 
55
  def main():
56
  ap = argparse.ArgumentParser(description="FlashMemory DS-V4 Retriever demo")
57
- ap.add_argument("--ckpt", required=True, help="path to joint checkpoint (.pt)")
 
58
  ap.add_argument("--device", default="cpu", help="cpu or cuda (default: cpu)")
59
  ap.add_argument("--batch", type=int, default=2, help="number of decode tokens")
60
  ap.add_argument("--n-chunks", type=int, default=64, help="number of compressed-K chunks")
 
54
 
55
  def main():
56
  ap = argparse.ArgumentParser(description="FlashMemory DS-V4 Retriever demo")
57
+ ap.add_argument("--ckpt", required=True,
58
+ help="path to retriever checkpoint (flashmemory_ds_v4.safetensors from HuggingFace, NOT a full DSv4 model)")
59
  ap.add_argument("--device", default="cpu", help="cpu or cuda (default: cpu)")
60
  ap.add_argument("--batch", type=int, default=2, help="number of decode tokens")
61
  ap.add_argument("--n-chunks", type=int, default=64, help="number of compressed-K chunks")
toy_flashmemory_inference.py CHANGED
@@ -214,7 +214,8 @@ def main():
214
  ap = argparse.ArgumentParser(
215
  description="Toy DeepSeek-V4-FlashMemory sparse-decode loop driven by the FlashMemory Retriever"
216
  )
217
- ap.add_argument("--ckpt", required=True, help="path to the FlashMemory DS-V4 joint checkpoint (.pt)")
 
218
  ap.add_argument("--device", default="cpu", help="cpu or cuda (default: cpu)")
219
  ap.add_argument("--batch", type=int, default=1, help="number of parallel decode sequences")
220
  ap.add_argument("--n-chunks", type=int, default=256, help="number of CSA memory chunks (the long history)")
 
214
  ap = argparse.ArgumentParser(
215
  description="Toy DeepSeek-V4-FlashMemory sparse-decode loop driven by the FlashMemory Retriever"
216
  )
217
+ ap.add_argument("--ckpt", required=True,
218
+ help="path to the retriever checkpoint (flashmemory_ds_v4.safetensors from HuggingFace, NOT a full DSv4 model)")
219
  ap.add_argument("--device", default="cpu", help="cpu or cuda (default: cpu)")
220
  ap.add_argument("--batch", type=int, default=1, help="number of parallel decode sequences")
221
  ap.add_argument("--n-chunks", type=int, default=256, help="number of CSA memory chunks (the long history)")