Commit ·
63cd48d
1
Parent(s): dd3ad97
Update README.md and config.
Browse files- README.md +19 -6
- config.json +2 -2
- modeling_pharia.py +0 -1
README.md
CHANGED
|
@@ -12,17 +12,30 @@ We provide a joint model card for `Pharia-1-LLM-7B-control` and `Pharia-1-LLM-co
|
|
| 12 |
# Usage
|
| 13 |
|
| 14 |
```python
|
|
|
|
|
|
|
| 15 |
from transformers import AutoModelForCausalLM, PreTrainedTokenizerFast
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
tokenizer = PreTrainedTokenizerFast.from_pretrained(MODEL_ID)
|
| 21 |
-
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True)
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
|
| 26 |
-
|
|
|
|
|
|
|
| 27 |
print(generated_text)
|
| 28 |
```
|
|
|
|
| 12 |
# Usage
|
| 13 |
|
| 14 |
```python
|
| 15 |
+
import torch
|
| 16 |
+
|
| 17 |
from transformers import AutoModelForCausalLM, PreTrainedTokenizerFast
|
| 18 |
|
| 19 |
+
|
| 20 |
+
INPUT = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
| 21 |
+
|
| 22 |
+
You are a helpful assistant. You give engaging, well-structured answers to user inquiries.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
| 23 |
+
|
| 24 |
+
When was Rome founded?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
MODEL_ID = "Aleph-Alpha/Pharia-1-LLM-7B-control-aligned-hf"
|
| 30 |
|
| 31 |
tokenizer = PreTrainedTokenizerFast.from_pretrained(MODEL_ID)
|
| 32 |
+
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True, torch_dtype=torch.bfloat16)
|
| 33 |
|
| 34 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 35 |
+
model = model.to(device)
|
| 36 |
|
| 37 |
+
inputs = tokenizer(INPUT, return_token_type_ids=False, return_tensors="pt").to(device)
|
| 38 |
+
outputs = model.generate(**inputs, max_new_tokens=50)
|
| 39 |
+
generated_text = tokenizer.decode(outputs[0])
|
| 40 |
print(generated_text)
|
| 41 |
```
|
config.json
CHANGED
|
@@ -24,8 +24,8 @@
|
|
| 24 |
"rope_scaling": null,
|
| 25 |
"rope_theta": 1000000,
|
| 26 |
"tie_word_embeddings": false,
|
| 27 |
-
"torch_dtype": "
|
| 28 |
"transformers_version": "4.44.2",
|
| 29 |
"use_cache": true,
|
| 30 |
"vocab_size": 128000
|
| 31 |
-
}
|
|
|
|
| 24 |
"rope_scaling": null,
|
| 25 |
"rope_theta": 1000000,
|
| 26 |
"tie_word_embeddings": false,
|
| 27 |
+
"torch_dtype": "bfloat16",
|
| 28 |
"transformers_version": "4.44.2",
|
| 29 |
"use_cache": true,
|
| 30 |
"vocab_size": 128000
|
| 31 |
+
}
|
modeling_pharia.py
CHANGED
|
@@ -764,7 +764,6 @@ class PhariaForCausalLM(PhariaPreTrainedModel):
|
|
| 764 |
|
| 765 |
hidden_states = outputs[0]
|
| 766 |
logits = self.lm_head(hidden_states)
|
| 767 |
-
logits = logits.float()
|
| 768 |
|
| 769 |
return CausalLMOutputWithPast(
|
| 770 |
loss=0.0,
|
|
|
|
| 764 |
|
| 765 |
hidden_states = outputs[0]
|
| 766 |
logits = self.lm_head(hidden_states)
|
|
|
|
| 767 |
|
| 768 |
return CausalLMOutputWithPast(
|
| 769 |
loss=0.0,
|