Spaces:
Runtime error
Runtime error
Gagan Bhatia commited on
Commit ·
d17a51b
1
Parent(s): fbbe319
Update process_data.py
Browse files- src/data/process_data.py +1 -0
src/data/process_data.py
CHANGED
|
@@ -10,6 +10,7 @@ def process_data(split="train"):
|
|
| 10 |
|
| 11 |
df = pd.read_csv("data/raw/{}.csv".format(split))
|
| 12 |
df.columns = ["Unnamed: 0", "input_text", "output_text"]
|
|
|
|
| 13 |
if os.path.exists("data/raw/{}.csv".format(split)):
|
| 14 |
os.remove("data/raw/{}.csv".format(split))
|
| 15 |
df.to_csv('data/processed/{}.csv'.format(split))
|
|
|
|
| 10 |
|
| 11 |
df = pd.read_csv("data/raw/{}.csv".format(split))
|
| 12 |
df.columns = ["Unnamed: 0", "input_text", "output_text"]
|
| 13 |
+
df = df.sample(frac=params["split"], replace=True, random_state=1)
|
| 14 |
if os.path.exists("data/raw/{}.csv".format(split)):
|
| 15 |
os.remove("data/raw/{}.csv".format(split))
|
| 16 |
df.to_csv('data/processed/{}.csv'.format(split))
|