Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -37,19 +37,15 @@ def load_or_finetune_pegasus():
|
|
| 37 |
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
|
| 38 |
model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
|
|
|
| 42 |
combined_dataset = concatenate_datasets([cnn_dm, xsum])
|
| 43 |
|
| 44 |
def preprocess_function(examples):
|
| 45 |
-
#
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
# Tokenize inputs and targets
|
| 50 |
-
inputs = tokenizer(texts, max_length=512, truncation=True, padding="max_length", return_tensors="pt")
|
| 51 |
-
targets = tokenizer(summaries, max_length=400, truncation=True, padding="max_length", return_tensors="pt")
|
| 52 |
-
|
| 53 |
inputs["labels"] = targets["input_ids"]
|
| 54 |
return inputs
|
| 55 |
|
|
|
|
| 37 |
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
|
| 38 |
model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
|
| 39 |
|
| 40 |
+
# Load and normalize datasets
|
| 41 |
+
cnn_dm = load_dataset("cnn_dailymail", "3.0.0", split="train[:5000]").rename_column("article", "text").rename_column("highlights", "summary")
|
| 42 |
+
xsum = load_dataset("xsum", split="train[:5000]", trust_remote_code=True).rename_column("document", "text")
|
| 43 |
combined_dataset = concatenate_datasets([cnn_dm, xsum])
|
| 44 |
|
| 45 |
def preprocess_function(examples):
|
| 46 |
+
# Directly use normalized 'text' and 'summary' fields
|
| 47 |
+
inputs = tokenizer(examples["text"], max_length=512, truncation=True, padding="max_length", return_tensors="pt")
|
| 48 |
+
targets = tokenizer(examples["summary"], max_length=400, truncation=True, padding="max_length", return_tensors="pt")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
inputs["labels"] = targets["input_ids"]
|
| 50 |
return inputs
|
| 51 |
|