Update README.md
Browse files
README.md
CHANGED
|
@@ -45,6 +45,42 @@ Evaluated on the held‑out test split of MeetingBank (≈ 600 transcripts), us
|
|
| 45 |
| **ROUGE‑Lsum** | 48.0142 |
|
| 46 |
|
| 47 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
## Training Data
|
| 49 |
Dataset: MeetingBank
|
| 50 |
Splits: Train (5000+), Validation (600+), Test (600+)
|
|
|
|
| 45 |
| **ROUGE‑Lsum** | 48.0142 |
|
| 46 |
|
| 47 |
---
|
| 48 |
+
|
| 49 |
+
## Usage
|
| 50 |
+
|
| 51 |
+
```python
|
| 52 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 53 |
+
import torch
|
| 54 |
+
|
| 55 |
+
# 1) Load from the Hub
|
| 56 |
+
tokenizer = AutoTokenizer.from_pretrained("Shaelois/MeetingScript")
|
| 57 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("Shaelois/MeetingScript")
|
| 58 |
+
|
| 59 |
+
# 2) Summarize a long transcript
|
| 60 |
+
transcript = """
|
| 61 |
+
Alice: Good morning everyone, let’s get started…
|
| 62 |
+
Bob: I updated the design mockups…
|
| 63 |
+
… (thousands of words) …
|
| 64 |
+
"""
|
| 65 |
+
inputs = tokenizer(
|
| 66 |
+
transcript,
|
| 67 |
+
max_length=4096,
|
| 68 |
+
truncation=True,
|
| 69 |
+
return_tensors="pt"
|
| 70 |
+
).to("cuda")
|
| 71 |
+
|
| 72 |
+
summary_ids = model.generate(
|
| 73 |
+
**inputs,
|
| 74 |
+
num_beams=4,
|
| 75 |
+
max_length=150,
|
| 76 |
+
early_stopping=True
|
| 77 |
+
)
|
| 78 |
+
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
| 79 |
+
print("📝 Summary:", summary)```
|
| 80 |
+
|
| 81 |
+
---
|
| 82 |
+
|
| 83 |
+
|
| 84 |
## Training Data
|
| 85 |
Dataset: MeetingBank
|
| 86 |
Splits: Train (5000+), Validation (600+), Test (600+)
|