Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,12 +7,12 @@ model_name = "allenai/scibert_scivocab_uncased"
|
|
| 7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 8 |
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3) # z.B. für 3 Kategorien
|
| 9 |
|
| 10 |
-
# 2️⃣ Dataset laden (
|
| 11 |
-
dataset = load_dataset("scientific_papers"
|
| 12 |
|
| 13 |
# 3️⃣ Tokenisierung der Texte
|
| 14 |
def tokenize_function(examples):
|
| 15 |
-
return tokenizer(examples["
|
| 16 |
|
| 17 |
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
| 18 |
|
|
|
|
| 7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 8 |
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3) # z.B. für 3 Kategorien
|
| 9 |
|
| 10 |
+
# 2️⃣ Dataset laden (armanc/scientific_papers)
|
| 11 |
+
dataset = load_dataset("armanc/scientific_papers")
|
| 12 |
|
| 13 |
# 3️⃣ Tokenisierung der Texte
|
| 14 |
def tokenize_function(examples):
|
| 15 |
+
return tokenizer(examples["text"], padding="max_length", truncation=True)
|
| 16 |
|
| 17 |
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
| 18 |
|