anthonym21
/

Eve-2-MoE-272M

@@ -3,10 +3,18 @@ Eve-2-MoE Inference
 ===================
 Quick generation script. Works with local weights or HuggingFace download.
-Usage:
     python generate.py --prompt "The future of AI is"
     python generate.py --prompt "The future of AI is" --model_path ./model_final/pytorch_model.bin
-    python generate.py --prompt "The future of AI is" --hf_repo anthonym21/Eve-2-MoE-250M
 """
 import argparse
@@ -71,7 +79,7 @@ def main():
     args = p.parse_args()
     if not args.model_path and not args.hf_repo:
-        args.hf_repo = "anthonym21/Eve-2-MoE-250M"
     print(f"Loading model on {args.device}...")
     model = load_model(args.model_path, args.hf_repo, args.device)

 ===================
 Quick generation script. Works with local weights or HuggingFace download.
+Usage (standalone):
     python generate.py --prompt "The future of AI is"
     python generate.py --prompt "The future of AI is" --model_path ./model_final/pytorch_model.bin
+    python generate.py --prompt "The future of AI is" --hf_repo anthonym21/Eve-2-MoE-272M
+Usage (HuggingFace):
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    model = AutoModelForCausalLM.from_pretrained("anthonym21/Eve-2-MoE-272M", trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained("gpt2")
+    inputs = tokenizer("The future of AI is", return_tensors="pt")
+    output = model.generate(**inputs, max_new_tokens=100)
+    print(tokenizer.decode(output[0]))
 """
 import argparse
     args = p.parse_args()
     if not args.model_path and not args.hf_repo:
+        args.hf_repo = "anthonym21/Eve-2-MoE-272M"
     print(f"Loading model on {args.device}...")
     model = load_model(args.model_path, args.hf_repo, args.device)