Bertug1911 commited on
Commit
e8e5abe
·
verified ·
1 Parent(s): bdc5a31

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +28 -90
README.md CHANGED
@@ -69,98 +69,36 @@ def generate_random_number(num):
69
 
70
 
71
  ## How to use?
72
- NOTE: Model ***DON'T*** supports "Auto-model". (If you use with auto model, it generates random outputs!!!)
73
- You can run this code to use:
74
 
75
  ```
76
- import torch
77
- from transformers import PreTrainedTokenizerFast, GPT2LMHeadModel
78
-
79
- def extract_response_between_tokens(text: str) -> str:
80
-
81
- start_token = "<|im_start|>assistant<|im_sep|>"
82
- end_token = "<|im_end|>"
83
- try:
84
- start_idx = text.index(start_token) + len(start_token)
85
- end_idx = text.index(end_token, start_idx)
86
- return text[start_idx:end_idx]
87
- except ValueError:
88
- # Tokenlar bulunamazsa orijinal metni döndür
89
- return text
90
-
91
- if __name__ == "__main__":
92
- model_name_or_path = "Bertug1911/BrtGPT-1-Pre"
93
-
94
- tokenizer = PreTrainedTokenizerFast.from_pretrained(model_name_or_path)
95
- model = GPT2LMHeadModel.from_pretrained(model_name_or_path)
96
-
97
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
98
- model.to(device)
99
- model.eval()
100
-
101
- user_input = input("Enter something to ask model: ")
102
-
103
-
104
- messages = [{"role": "user", "content": user_input}]
105
-
106
-
107
- formatted_prompt = tokenizer.apply_chat_template(
108
- messages,
109
- tokenize=False,
110
- add_generation_prompt=True
111
- )
112
-
113
-
114
- inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
115
- generated = inputs["input_ids"]
116
-
117
- # Generate config
118
- max_new_tokens = 128
119
- do_sample = True
120
- top_k = 40
121
- temperature = 0.8
122
-
123
- im_end_token_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
124
-
125
- with torch.no_grad():
126
- for i in range(max_new_tokens):
127
- outputs = model(generated)
128
- logits = outputs.logits[:, -1, :]
129
- logits = logits / temperature
130
-
131
- if top_k > 0:
132
- top_k_values, top_k_indices = torch.topk(logits, top_k)
133
- logits_filtered = torch.full_like(logits, float('-inf'))
134
- logits_filtered.scatter_(1, top_k_indices, top_k_values)
135
- logits = logits_filtered
136
-
137
- probs = torch.softmax(logits, dim=-1)
138
-
139
- if do_sample:
140
- next_token = torch.multinomial(probs, num_samples=1)
141
- else:
142
- next_token = torch.argmax(probs, dim=-1, keepdim=True)
143
-
144
- generated = torch.cat([generated, next_token], dim=1)
145
-
146
- if next_token.item() == im_end_token_id:
147
- break
148
-
149
-
150
-
151
- output = tokenizer.decode(generated[0], skip_special_tokens=False)
152
-
153
- # Special token conversions
154
- no_spaces = output.replace(" ", "")
155
- step2 = no_spaces.replace("Ġ", " ")
156
- formatted_output = step2.replace("Ċ", "\n")
157
-
158
- if not formatted_output.strip().endswith("<|im_end|>"):
159
- formatted_output += "<|im_end|>"
160
-
161
-
162
- assistant_response = extract_response_between_tokens(formatted_output)
163
- print("\nModel output:\n", assistant_response)
164
 
165
  ```
166
  ## Evulation
 
69
 
70
 
71
  ## How to use?
72
+ NOTE: Model ***Supports*** Auto-model library now!
73
+ You can run this code to use (Auto-model/Hugging Face transformers):
74
 
75
  ```
76
+ from transformers import pipeline
77
+
78
+ # Pipeline
79
+ pipe = pipeline(
80
+ "text-generation",
81
+ model="Bertug1911/BrtGPT-1-Pre",
82
+ trust_remote_code=True,
83
+ top_k=40, # örnek bir top_k değeri
84
+ temperature=0.8, # örnek bir temperature değeri
85
+ max_new_tokens=50 # maksimum üretilecek yeni token sayısı
86
+ )
87
+
88
+ # Messages
89
+ messages = [
90
+ {"role": "user", "content": "What is the capital of France?"},
91
+ ]
92
+
93
+ # Take out
94
+ output = pipe(messages)
95
+
96
+ # Only write asistant's (Model output) answer
97
+ assistant_response = output[0]["generated_text"][-1]["content"].strip()
98
+ # Special token conversions
99
+ formatted_out = assistant_response.replace(" ", "").replace("Ġ", " ").replace("Ċ", "\n")
100
+
101
+ print(formatted_out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  ```
104
  ## Evulation