Spaces:

Oranblock
/

marblex

Sleeping

Oranblock commited on Jul 25, 2024

Commit

e7e56a6

verified ·

1 Parent(s): 513e2df

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,7 +36,7 @@ def setup_training():
     # Load your dataset with the specified configuration
     logging.info("Loading the MarbleX dataset")
     dataset = load_dataset("Oranblock/marblex_dataset", "config1")  # Replace "config1" with the appropriate config name
-    logging.info(f"Dataset loaded. Train size: {len(dataset['train'])}, Validation size: {len(dataset['validation'])}")
     # Load tokenizer and model
     logging.info(f"Loading tokenizer and model: {config['model_name']}")
@@ -80,7 +80,7 @@ def setup_training():
         model=model,
         args=training_args,
         train_dataset=tokenized_datasets["train"],
-        eval_dataset=tokenized_datasets["validation"],
         tokenizer=tokenizer,
         compute_metrics=compute_metrics
     )

     # Load your dataset with the specified configuration
     logging.info("Loading the MarbleX dataset")
     dataset = load_dataset("Oranblock/marblex_dataset", "config1")  # Replace "config1" with the appropriate config name
+    logging.info(f"Dataset loaded. Train size: {len(dataset['train'])}, Test size: {len(dataset['test'])}")
     # Load tokenizer and model
     logging.info(f"Loading tokenizer and model: {config['model_name']}")
         model=model,
         args=training_args,
         train_dataset=tokenized_datasets["train"],
+        eval_dataset=tokenized_datasets["test"],  # Use 'test' split for evaluation
         tokenizer=tokenizer,
         compute_metrics=compute_metrics
     )