CreitinGameplays commited on
Commit
f83ba19
·
verified ·
1 Parent(s): aa267cd

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +84 -0
README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - CreitinGameplays/gemma-r1-test
4
+ language:
5
+ - en
6
+ base_model:
7
+ - google/gemma-2-2b-it
8
+ pipeline_tag: text-generation
9
+ library_name: transformers
10
+ ---
11
+
12
+ Chat template:
13
+ ```
14
+ <start_of_turn>user
15
+ {user_prompt}<end_of_turn>
16
+ <start_of_turn>model
17
+ <think>
18
+ ```
19
+
20
+ Code:
21
+ ```python
22
+ # test the model
23
+ import torch
24
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
25
+
26
+ def main():
27
+ model_id = "CreitinGameplays/gemma-2-2b-it-R1-exp"
28
+
29
+ # Load the tokenizer.
30
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
31
+
32
+ # Load the model using bitsandbytes 8-bit quantization if CUDA is available.
33
+ if torch.cuda.is_available():
34
+ model = AutoModelForCausalLM.from_pretrained(
35
+ model_id,
36
+ load_in_4bit=True,
37
+ device_map="auto"
38
+ )
39
+ device = torch.device("cuda")
40
+ else:
41
+ model = AutoModelForCausalLM.from_pretrained(model_id)
42
+ device = torch.device("cpu")
43
+
44
+ # Define the generation parameters.
45
+ generation_kwargs = {
46
+ "max_new_tokens": 4096,
47
+ "do_sample": True,
48
+ "temperature": 0.6,
49
+ "top_k": 40,
50
+ "top_p": 0.9,
51
+ "repetition_penalty": 1.1,
52
+ "num_return_sequences": 1,
53
+ "pad_token_id": tokenizer.eos_token_id
54
+ }
55
+
56
+ print("Enter your prompt (type 'exit' to quit):")
57
+ while True:
58
+ # Get user input.
59
+ user_input = input("Input> ")
60
+ if user_input.lower().strip() in ("exit", "quit"):
61
+ break
62
+
63
+ # Construct the prompt in your desired format.
64
+ prompt = f"""
65
+ <start_of_turn>user
66
+ {user_input}<end_of_turn>
67
+ <start_of_turn>model
68
+ <think>
69
+ """
70
+
71
+ # Tokenize the prompt and send to the selected device.
72
+ input_ids = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=True).to(device)
73
+
74
+ # Create a new TextStreamer instance for streaming responses.
75
+ streamer = TextStreamer(tokenizer)
76
+ generation_kwargs["streamer"] = streamer
77
+
78
+ print("\nAssistant Response:")
79
+ # Generate the text (tokens will stream to stdout via the streamer).
80
+ outputs = model.generate(input_ids, **generation_kwargs)
81
+
82
+ if __name__ == "__main__":
83
+ main()
84
+ ```