LH-Tech-AI commited on
Commit
07c7da0
·
verified ·
1 Parent(s): b54e272

Create inference.py

Browse files
Files changed (1) hide show
  1. inference.py +33 -0
inference.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %%writefile inference.py
2
+ print("[*] Loading libraries...")
3
+ import torch
4
+ from transformers import LlamaForCausalLM, PreTrainedTokenizerFast
5
+
6
+ model_path = "./quark-v2-final"
7
+
8
+ print("[*] Loading tokenizer...")
9
+ tokenizer = PreTrainedTokenizerFast.from_pretrained(model_path)
10
+
11
+ print("[*] Loading model...")
12
+ model = LlamaForCausalLM.from_pretrained(model_path)
13
+ model.eval()
14
+
15
+ prompt = "Albert Einstein was "
16
+ print(f"[*] Prompt: {prompt!r}")
17
+
18
+ inputs = tokenizer(prompt, return_tensors="pt")
19
+
20
+ with torch.no_grad():
21
+ outputs = model.generate(
22
+ input_ids=inputs["input_ids"],
23
+ attention_mask=inputs["attention_mask"],
24
+ max_new_tokens=150,
25
+ do_sample=True,
26
+ temperature=0.35,
27
+ top_p=0.85,
28
+ repetition_penalty=1.2,
29
+ pad_token_id=tokenizer.pad_token_id,
30
+ eos_token_id=tokenizer.eos_token_id,
31
+ )
32
+
33
+ print("[*] Output:", tokenizer.decode(outputs[0], skip_special_tokens=True))