Update README.md
Browse files
README.md
CHANGED
|
@@ -16,6 +16,16 @@ pipeline_tag: summarization
|
|
| 16 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 17 |
should probably proofread and complete it, then remove this comment. -->
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
## Model description
|
| 21 |
|
|
@@ -23,210 +33,56 @@ More information needed
|
|
| 23 |
|
| 24 |
## Intended uses & limitations
|
| 25 |
|
| 26 |
-
|
| 27 |
|
| 28 |
-
##
|
| 29 |
-
Tesla M60 16GB VRAM
|
| 30 |
|
|
|
|
| 31 |
|
|
|
|
| 32 |
|
| 33 |
### Training hyperparameters
|
| 34 |
|
| 35 |
The following hyperparameters were used during training:
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
max_seq_length: int = 1024
|
| 46 |
-
num_train_epochs: int = 1
|
| 47 |
-
max_grad_norm: float = 0.3
|
| 48 |
-
num_epochs: 5
|
| 49 |
|
| 50 |
### Training results
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
250 1.357100 1.235908 1.209817 830792.000000 0.703880
|
| 58 |
-
300 1.341700 1.226673 1.196961 995955.000000 0.705412
|
| 59 |
-
350 1.211000 1.223105 1.219540 1161755.000000 0.705137
|
| 60 |
-
400 1.414100 1.219148 1.218188 1330892.000000 0.706035
|
| 61 |
-
450 1.088200 1.214209 1.244467 1494009.000000 0.707179
|
| 62 |
-
500 1.302800 1.210984 1.203838 1659876.000000 0.707986
|
| 63 |
-
550 1.192800 1.208378 1.201593 1828355.000000 0.708459
|
| 64 |
-
600 1.302300 1.206382 1.212914 1989352.000000 0.708516
|
| 65 |
-
650 1.177800 1.205050 1.245975 2155580.000000 0.708198
|
| 66 |
-
700 1.156600 1.201754 1.201212 2323534.000000 0.709032
|
| 67 |
-
750 1.271000 1.201216 1.218800 2488415.000000 0.708988
|
| 68 |
-
800 1.264100 1.198175 1.182730 2655756.000000 0.710219
|
| 69 |
-
850 1.324600 1.196617 1.189218 2822068.000000 0.710231
|
| 70 |
-
900 1.159400 1.198235 1.207774 2988438.000000 0.708831
|
| 71 |
-
950 1.294200 1.194295 1.211270 3153113.000000 0.709955
|
| 72 |
-
1000 1.370000 1.192295 1.215226 3321550.000000 0.710322
|
| 73 |
-
1050 1.157300 1.190316 1.214881 3485313.000000 0.710768
|
| 74 |
-
1100 1.124000 1.189019 1.210650 3651712.000000 0.711739
|
| 75 |
-
1150 1.139700 1.188874 1.209716 3815535.000000 0.711151
|
| 76 |
-
1200 1.293600 1.187840 1.198137 3980373.000000 0.710808
|
| 77 |
-
1250 1.199800 1.186739 1.226214 4146077.000000 0.711442
|
| 78 |
-
...
|
| 79 |
-
XXXX 7700 steps XXXX
|
| 80 |
|
| 81 |
### How to use
|
| 82 |
|
| 83 |
Here is how to use this model with the [pipeline API](https://huggingface.co/transformers/main_classes/pipelines.html):
|
| 84 |
|
| 85 |
```python
|
| 86 |
-
import
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
def create_alpaca_prompt(instruction: str, inp: str = "") -> str:
|
| 98 |
-
"""Format prompt in Alpaca style."""
|
| 99 |
-
if inp.strip():
|
| 100 |
-
prompt = (
|
| 101 |
-
"Below is an instruction that describes a task, paired with an input that provides further context. "
|
| 102 |
-
"Write a response that appropriately completes the request.\n\n"
|
| 103 |
-
f"### Instruction:\n{instruction.strip()}\n\n"
|
| 104 |
-
f"### Input:\n{inp.strip()}\n\n"
|
| 105 |
-
"### Response:\n"
|
| 106 |
-
)
|
| 107 |
-
else:
|
| 108 |
-
prompt = (
|
| 109 |
-
"Below is an instruction that describes a task. "
|
| 110 |
-
"Write a response that appropriately completes the request.\n\n"
|
| 111 |
-
f"### Instruction:\n{instruction.strip()}\n\n"
|
| 112 |
-
"### Response:\n"
|
| 113 |
-
)
|
| 114 |
-
return prompt
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
def load_model_and_tokenizer():
|
| 118 |
-
print(f"Loading base model: {BASE_MODEL}")
|
| 119 |
-
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
|
| 120 |
-
if tokenizer.pad_token is None:
|
| 121 |
-
tokenizer.pad_token = tokenizer.eos_token
|
| 122 |
-
tokenizer.padding_side = "left"
|
| 123 |
-
base_model = AutoModelForCausalLM.from_pretrained(
|
| 124 |
-
BASE_MODEL,
|
| 125 |
-
torch_dtype=DTYPE,
|
| 126 |
-
device_map=DEVICE_MAP,
|
| 127 |
-
)
|
| 128 |
-
print(f"Loading LoRA adapter from {ADAPTER_PATH}")
|
| 129 |
-
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
|
| 130 |
-
print("Merging LoRA weights into base model for speed...")
|
| 131 |
-
model = model.merge_and_unload()
|
| 132 |
-
model.eval()
|
| 133 |
-
# For safety with older GPUs
|
| 134 |
-
torch.backends.cuda.matmul.allow_tf32 = False
|
| 135 |
-
#torch.backends.cudnn.allow_tf32 = False
|
| 136 |
-
return model, tokenizer
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
@torch.no_grad()
|
| 140 |
-
def generate_single(
|
| 141 |
-
model,
|
| 142 |
-
tokenizer,
|
| 143 |
-
instruction: str,
|
| 144 |
-
inp: str = "",
|
| 145 |
-
max_new_tokens: int = 256,
|
| 146 |
-
temperature: float = 0.7,
|
| 147 |
-
top_p: float = 0.9,
|
| 148 |
-
):
|
| 149 |
-
prompt = create_alpaca_prompt(instruction, inp)
|
| 150 |
-
inputs = tokenizer(
|
| 151 |
-
prompt,
|
| 152 |
-
return_tensors="pt",
|
| 153 |
-
).to(model.device)
|
| 154 |
-
output_ids = model.generate(
|
| 155 |
-
**inputs,
|
| 156 |
-
max_new_tokens=max_new_tokens,
|
| 157 |
-
do_sample=False, # ✅ no sampling → no multinomial
|
| 158 |
-
temperature=None, # ignored when do_sample=False
|
| 159 |
-
top_p=None,
|
| 160 |
-
pad_token_id=tokenizer.eos_token_id,
|
| 161 |
-
use_cache=True,
|
| 162 |
-
)
|
| 163 |
-
|
| 164 |
-
full_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 165 |
-
# Strip the prompt part to keep only the response
|
| 166 |
-
if "### Response:" in full_text:
|
| 167 |
-
response = full_text.split("### Response:")[-1].strip()
|
| 168 |
-
else:
|
| 169 |
-
response = full_text.strip()
|
| 170 |
-
return response
|
| 171 |
-
|
| 172 |
-
@torch.no_grad()
|
| 173 |
-
def generate_batch(
|
| 174 |
-
model,
|
| 175 |
-
tokenizer,
|
| 176 |
-
instructions,
|
| 177 |
-
inputs=None,
|
| 178 |
-
max_new_tokens: int = 256,
|
| 179 |
-
temperature: float = 0.7,
|
| 180 |
-
top_p: float = 0.9,
|
| 181 |
-
):
|
| 182 |
-
if inputs is None:
|
| 183 |
-
inputs = [""] * len(instructions)
|
| 184 |
-
prompts = [
|
| 185 |
-
create_alpaca_prompt(inst, inp)
|
| 186 |
-
for inst, inp in zip(instructions, inputs)
|
| 187 |
-
]
|
| 188 |
-
tokenized = tokenizer(
|
| 189 |
-
prompts,
|
| 190 |
-
return_tensors="pt",
|
| 191 |
-
#padding=True,
|
| 192 |
-
#truncation=True,
|
| 193 |
-
).to(model.device)
|
| 194 |
-
|
| 195 |
-
output_ids = model.generate(
|
| 196 |
-
**tokenized,
|
| 197 |
-
max_new_tokens=max_new_tokens,
|
| 198 |
-
do_sample=False, # ✅ no sampling → no multinomial
|
| 199 |
-
temperature=None, # ignored when do_sample=False
|
| 200 |
-
top_p=None,
|
| 201 |
-
# do_sample=True,
|
| 202 |
-
# temperature=temperature,
|
| 203 |
-
# top_p=top_p,
|
| 204 |
-
pad_token_id=tokenizer.eos_token_id,
|
| 205 |
-
)
|
| 206 |
-
outputs = []
|
| 207 |
-
for i in range(len(prompts)):
|
| 208 |
-
full_text = tokenizer.decode(output_ids[i], skip_special_tokens=True)
|
| 209 |
-
if "### Response:" in full_text:
|
| 210 |
-
response = full_text.split("### Response:")[-1].strip()
|
| 211 |
-
else:
|
| 212 |
-
response = full_text.strip()
|
| 213 |
-
outputs.append(response)
|
| 214 |
-
return outputs
|
| 215 |
-
|
| 216 |
-
model, tokenizer = load_model_and_tokenizer()
|
| 217 |
-
|
| 218 |
-
t1 = time.time() # ⏱ start
|
| 219 |
-
# ---------- Example: single prediction ----------
|
| 220 |
-
instruction = "Explain what a GPU is to a 15 year old."
|
| 221 |
-
inp = ""
|
| 222 |
-
response = generate_single(model, tokenizer, instruction, inp, max_new_tokens=512)
|
| 223 |
-
t2 = time.time()
|
| 224 |
-
print(f"Total time: {t2 - t1:.2f} seconds")
|
| 225 |
-
print("=== Single prediction ===")
|
| 226 |
-
print(response)
|
| 227 |
-
|
| 228 |
-
>>> Total time: 4.42 seconds
|
| 229 |
-
=== Single prediction ===
|
| 230 |
-
A GPU (Graphics Processing Unit) is a type of computer processor used to generate images and videos. It is used in computers and other devices to create visual content, such as games and movies. It is much faster than a CPU (Central Processing Unit) and can process more data in less time.
|
| 231 |
```
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 17 |
should probably proofread and complete it, then remove this comment. -->
|
| 18 |
|
| 19 |
+
# InstructTweetSummarizer
|
| 20 |
+
|
| 21 |
+
This model is a fine-tuned version of [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn) on an unknown dataset.
|
| 22 |
+
It achieves the following results on the evaluation set:
|
| 23 |
+
- Loss: 0.3548
|
| 24 |
+
- Rouge1: 47.5134
|
| 25 |
+
- Rouge2: 24.7121
|
| 26 |
+
- Rougel: 35.7366
|
| 27 |
+
- Rougelsum: 35.6499
|
| 28 |
+
- Gen Len: 111.96
|
| 29 |
|
| 30 |
## Model description
|
| 31 |
|
|
|
|
| 33 |
|
| 34 |
## Intended uses & limitations
|
| 35 |
|
| 36 |
+
More information needed
|
| 37 |
|
| 38 |
+
## Training and evaluation data
|
|
|
|
| 39 |
|
| 40 |
+
More information needed
|
| 41 |
|
| 42 |
+
## Training procedure
|
| 43 |
|
| 44 |
### Training hyperparameters
|
| 45 |
|
| 46 |
The following hyperparameters were used during training:
|
| 47 |
+
- learning_rate: 2e-05
|
| 48 |
+
- train_batch_size: 6
|
| 49 |
+
- eval_batch_size: 4
|
| 50 |
+
- seed: 42
|
| 51 |
+
- gradient_accumulation_steps: 2
|
| 52 |
+
- total_train_batch_size: 12
|
| 53 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
| 54 |
+
- lr_scheduler_type: linear
|
| 55 |
+
- num_epochs: 3
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
### Training results
|
| 58 |
|
| 59 |
+
| Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
|
| 60 |
+
|:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:-------:|
|
| 61 |
+
| No log | 1.0 | 417 | 0.3468 | 44.9326 | 22.3736 | 33.008 | 32.9247 | 116.43 |
|
| 62 |
+
| 0.5244 | 2.0 | 834 | 0.3440 | 46.9139 | 24.683 | 35.3699 | 35.333 | 119.65 |
|
| 63 |
+
| 0.2061 | 3.0 | 1251 | 0.3548 | 47.5134 | 24.7121 | 35.7366 | 35.6499 | 111.96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
### How to use
|
| 66 |
|
| 67 |
Here is how to use this model with the [pipeline API](https://huggingface.co/transformers/main_classes/pipelines.html):
|
| 68 |
|
| 69 |
```python
|
| 70 |
+
from transformers import pipeline
|
| 71 |
+
summarizer = pipeline("summarization", model="Sidharthkr/InstructTweetSummarizer")
|
| 72 |
+
def summarymaker(instruction = "", tweets = ""):
|
| 73 |
+
ARTICLE = f"""[INST] {instruction} [/INST] \\n [TWEETS] {tweets} [/TWEETS]"""
|
| 74 |
+
out = summarizer(ARTICLE, max_length=130, min_length=10, do_sample=False)
|
| 75 |
+
out = out[0]['summary_text'].split("[SUMMARY]")[-1].split("[/")[0].split("[via")[0].strip()
|
| 76 |
+
return out
|
| 77 |
+
|
| 78 |
+
summarymaker(instruction = "Summarize the tweets for Stellantis in 100 words",
|
| 79 |
+
tweets = """Stellantis - arch critic of Chinese EVs coming to Europe - is in talks with CATL to build a European plant. \n\nIt has concluded that cutting the price of EVs by using Chinese LFP batteries is more important.\n\n@FT story: \nhttps://t.co/l7nGggRFxH. State-of-the-art North America Battery Technology Centre begins to take shape at Stellantis' Automotive Research and Development Centre (ARDC) in Windsor, Ontario.\n\nhttps://t.co/04RO7CL1O5. RT @UAW: 🧵After the historic Stand Up Strike, UAW members at Ford, General Motors and Stellantis have voted to ratify their new contracts,…. RT @atorsoli: Stellantis and CATL are set to supply lower-cost EV batteries together for Europe, signaling automaker's efforts to tighten t…. RT @atorsoli: Stellantis and CATL are set to supply lower-cost EV batteries together for Europe, signaling automaker's efforts to tighten""")
|
| 80 |
+
>>> 'Stellantis is in talks with CATL to build a European plant, with a focus on cutting the price of EVs by using Chinese LFP batteries. The company is also developing a state-of-the-art North America Battery Technology Centre in Windsor, Ontario, and has ratified its new contracts with the UAW.'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
```
|
| 82 |
|
| 83 |
+
### Framework versions
|
| 84 |
+
|
| 85 |
+
- Transformers 4.34.1
|
| 86 |
+
- Pytorch 2.1.0
|
| 87 |
+
- Datasets 2.14.7
|
| 88 |
+
- Tokenizers 0.14.1
|