End of training
Browse files- README.md +18 -93
- model.safetensors +1 -1
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 18 |
|
| 19 |
This model is a fine-tuned version of [paust/pko-t5-base](https://huggingface.co/paust/pko-t5-base) on an unknown dataset.
|
| 20 |
It achieves the following results on the evaluation set:
|
| 21 |
-
- Loss: 0.
|
| 22 |
-
- Rouge1: 0.
|
| 23 |
-
- Rouge2: 0.
|
| 24 |
-
- Rougel: 0.
|
| 25 |
-
- Rougelsum: 0.
|
| 26 |
|
| 27 |
## Model description
|
| 28 |
|
|
@@ -47,98 +47,23 @@ The following hyperparameters were used during training:
|
|
| 47 |
- seed: 42
|
| 48 |
- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 49 |
- lr_scheduler_type: linear
|
| 50 |
-
-
|
| 51 |
- mixed_precision_training: Native AMP
|
| 52 |
|
| 53 |
### Training results
|
| 54 |
|
| 55 |
-
| Training Loss | Epoch
|
| 56 |
-
|
| 57 |
-
|
|
| 58 |
-
| 0.
|
| 59 |
-
| 0.
|
| 60 |
-
| 0.
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
# 1. λͺ¨λΈ λ° ν ν¬λμ΄μ λ‘λ
|
| 69 |
-
model_id = "seungbo7747/summarization_model"
|
| 70 |
-
tokenizer = T5TokenizerFast.from_pretrained(model_id)
|
| 71 |
-
model = T5ForConditionalGeneration.from_pretrained(model_id)
|
| 72 |
-
|
| 73 |
-
# 2. GPU μ€μ (κ°λ₯ν κ²½μ°)
|
| 74 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 75 |
-
model.to(device)
|
| 76 |
-
print(f"Using device: {device}")
|
| 77 |
-
if torch.cuda.is_available():
|
| 78 |
-
print(f"GPU name: {torch.cuda.get_device_name(0)}")
|
| 79 |
-
|
| 80 |
-
# 3. μμ½ ν¨μ μ μ
|
| 81 |
-
def summarize_text(texts, max_input_length=512, max_output_length=150, num_beams=4):
|
| 82 |
-
"""
|
| 83 |
-
μ£Όμ΄μ§ ν
μ€νΈ 리μ€νΈλ₯Ό μμ½νλ ν¨μ.
|
| 84 |
-
|
| 85 |
-
Args:
|
| 86 |
-
texts (list[str]): μμ½ν ν
μ€νΈ 리μ€νΈ (κ° ν
μ€νΈλ 'summarize: ' μ λμ¬ ν¬ν¨ κ°λ₯).
|
| 87 |
-
max_input_length (int): μ
λ ₯ ν
μ€νΈ μ΅λ κΈΈμ΄.
|
| 88 |
-
max_output_length (int): μΆλ ₯ μμ½ μ΅λ κΈΈμ΄.
|
| 89 |
-
num_beams (int): λΉ μμΉμμ μ¬μ©ν λΉ μ.
|
| 90 |
-
|
| 91 |
-
Returns:
|
| 92 |
-
list[str]: μμ½λ ν
μ€νΈ 리μ€νΈ.
|
| 93 |
-
"""
|
| 94 |
-
# μ
λ ₯ ν
μ€νΈμ 'summarize: ' μ λμ¬ μΆκ° (μλ κ²½μ°)
|
| 95 |
-
inputs = [f"summarize: {text}" if not text.startswith("summarize: ") else text for text in texts]
|
| 96 |
-
|
| 97 |
-
# ν ν°ν
|
| 98 |
-
tokenized_inputs = tokenizer(
|
| 99 |
-
inputs,
|
| 100 |
-
max_length=max_input_length,
|
| 101 |
-
truncation=True,
|
| 102 |
-
padding=True,
|
| 103 |
-
return_tensors="pt"
|
| 104 |
-
)
|
| 105 |
-
|
| 106 |
-
# GPUλ‘ μ
λ ₯ μ΄λ
|
| 107 |
-
tokenized_inputs = {k: v.to(device) for k, v in tokenized_inputs.items()}
|
| 108 |
-
|
| 109 |
-
# μμ½ μμ±
|
| 110 |
-
summary_ids = model.generate(
|
| 111 |
-
tokenized_inputs["input_ids"],
|
| 112 |
-
attention_mask=tokenized_inputs["attention_mask"],
|
| 113 |
-
max_length=max_output_length,
|
| 114 |
-
num_beams=num_beams,
|
| 115 |
-
early_stopping=True
|
| 116 |
-
)
|
| 117 |
-
|
| 118 |
-
# λμ½λ©
|
| 119 |
-
summaries = tokenizer.batch_decode(summary_ids, skip_special_tokens=True)
|
| 120 |
-
return summaries
|
| 121 |
-
|
| 122 |
-
# 4. ν
μ€νΈ μ
λ ₯ μμ
|
| 123 |
-
test_texts = [
|
| 124 |
-
"summarize: νκ΅μ μλλ μμΈμ
λλ€. μμΈμ νλ°λ μ€λΆμ μμΉνλ©°, μΈκ΅¬λ μ½ 970λ§ λͺ
μ
λλ€. μμΈμ κ²½μ , λ¬Έν, μ μΉμ μ€μ¬μ§λ‘, νκ°μ΄ λμλ₯Ό κ°λ‘μ§λ₯΄λ©° λ§μ μμ¬μ μ μ°κ³Ό νλμ 건μΆλ¬Όμ΄ 곡쑴ν©λλ€.",
|
| 125 |
-
"summarize: μΈκ³΅μ§λ₯(AI)μ μ»΄ν¨ν° μμ€ν
μ΄ μΈκ°μ μ§λ₯μ λͺ¨λ°©νκ±°λ μ΄μνλλ‘ λ§λλ κΈ°μ μ
λλ€. AIλ λ¨Έμ λ¬λ, λ₯λ¬λ, μμ°μ΄ μ²λ¦¬ λ±μ λΆμΌλ‘ λλλ©°, μλ£, κΈμ΅, μ μ‘° λ± λ€μν μ°μ
μμ νμ©λκ³ μμ΅λλ€. κ·Έλ¬λ AIμ μ€λ¦¬μ λ¬Έμ μ μΌμ리 λ체 μ°λ €λ μ κΈ°λκ³ μμ΅λλ€.",
|
| 126 |
-
"summarize: κΈ°ν λ³νλ μ§κ΅¬ μ¨λν, ν΄μλ©΄ μμΉ, κ·Ήλ¨μ κΈ°μ νμμ μ΄λνλ κΈλ‘λ² λ¬Έμ μ
λλ€. μ΄μ°ννμ λ°°μΆ κ°μμ μ¬μ κ°λ₯ μλμ§ μ¬μ©μ΄ ν΄κ²°μ±
μΌλ‘ μ μλμ§λ§, κ΅μ μ νλ ₯μ΄ λΆμ‘±ν μν©μ
λλ€."
|
| 127 |
-
]
|
| 128 |
-
|
| 129 |
-
# 5. μμ½ μ€ν λ° κ²°κ³Ό μΆλ ₯
|
| 130 |
-
summaries = summarize_text(test_texts)
|
| 131 |
-
for i, (input_text, summary) in enumerate(zip(test_texts, summaries)):
|
| 132 |
-
print(f"\nInput {i+1}: {input_text}")
|
| 133 |
-
print(f"Summary {i+1}: {summary}")
|
| 134 |
-
|
| 135 |
-
# 6. λ¨μΌ ν
μ€νΈ μμ½ μμ (κ°λ¨ν μ¬μ©)
|
| 136 |
-
single_text = "summarize: λΈλ‘체μΈμ λΆμ°λ λμ§νΈ μ₯λΆλ‘, κ±°λ λ°μ΄ν°λ₯Ό μνΈννμ¬ λ³΄μμ±κ³Ό ν¬λͺ
μ±μ μ 곡ν©λλ€. λΉνΈμ½μΈκ³Ό κ°μ μνΈννλΏλ§ μλλΌ κ³΅κΈλ§ κ΄λ¦¬, μλ£ κΈ°λ‘ λ± λ€μν λΆμΌμμ νμ©λκ³ μμ΅λλ€."
|
| 137 |
-
summary = summarize_text([single_text])[0]
|
| 138 |
-
print(f"\nSingle Input: {single_text}")
|
| 139 |
-
print(f"Single Summary: {summary}")
|
| 140 |
-
```
|
| 141 |
-
|
| 142 |
|
| 143 |
|
| 144 |
### Framework versions
|
|
|
|
| 18 |
|
| 19 |
This model is a fine-tuned version of [paust/pko-t5-base](https://huggingface.co/paust/pko-t5-base) on an unknown dataset.
|
| 20 |
It achieves the following results on the evaluation set:
|
| 21 |
+
- Loss: 0.7192
|
| 22 |
+
- Rouge1: 0.0663
|
| 23 |
+
- Rouge2: 0.0167
|
| 24 |
+
- Rougel: 0.0663
|
| 25 |
+
- Rougelsum: 0.0663
|
| 26 |
|
| 27 |
## Model description
|
| 28 |
|
|
|
|
| 47 |
- seed: 42
|
| 48 |
- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 49 |
- lr_scheduler_type: linear
|
| 50 |
+
- training_steps: 5000
|
| 51 |
- mixed_precision_training: Native AMP
|
| 52 |
|
| 53 |
### Training results
|
| 54 |
|
| 55 |
+
| Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum |
|
| 56 |
+
|:-------------:|:------:|:----:|:---------------:|:------:|:------:|:------:|:---------:|
|
| 57 |
+
| 1.1412 | 0.0111 | 500 | 0.8112 | 0.0612 | 0.0136 | 0.0611 | 0.0611 |
|
| 58 |
+
| 0.8494 | 0.0222 | 1000 | 0.7681 | 0.0651 | 0.0150 | 0.0651 | 0.0650 |
|
| 59 |
+
| 0.8299 | 0.0333 | 1500 | 0.7493 | 0.0659 | 0.0155 | 0.0658 | 0.0658 |
|
| 60 |
+
| 0.7919 | 0.0444 | 2000 | 0.7379 | 0.0663 | 0.0158 | 0.0662 | 0.0662 |
|
| 61 |
+
| 0.7858 | 0.0555 | 2500 | 0.7339 | 0.0667 | 0.0163 | 0.0667 | 0.0667 |
|
| 62 |
+
| 0.7953 | 0.0666 | 3000 | 0.7330 | 0.0674 | 0.0164 | 0.0674 | 0.0674 |
|
| 63 |
+
| 0.7769 | 0.0777 | 3500 | 0.7261 | 0.0679 | 0.0163 | 0.0679 | 0.0678 |
|
| 64 |
+
| 0.7752 | 0.0888 | 4000 | 0.7182 | 0.0683 | 0.0163 | 0.0683 | 0.0683 |
|
| 65 |
+
| 0.7743 | 0.0998 | 4500 | 0.7203 | 0.0682 | 0.0164 | 0.0681 | 0.0681 |
|
| 66 |
+
| 0.7851 | 0.1109 | 5000 | 0.7179 | 0.0684 | 0.0165 | 0.0683 | 0.0683 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
|
| 69 |
### Framework versions
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1102350184
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7695f6cb2eb7b97708cd8464d7223ea0bc2a0fe8846cedc9124c35f4723564b
|
| 3 |
size 1102350184
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5368
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d7873673bc720e72537bf4e33a6337b67a6fa36414171e3c9ebda81c392dc99
|
| 3 |
size 5368
|