Update app.py
Browse files
app.py
CHANGED
|
@@ -7,8 +7,6 @@ Original file is located at
|
|
| 7 |
https://colab.research.google.com/drive/1nLqIbyBDiBI96gDZ0TziLNX8I4uWnl9G
|
| 8 |
"""
|
| 9 |
|
| 10 |
-
pip install datasets
|
| 11 |
-
|
| 12 |
"""Picking subreddits, split=sub as the data on huggingface datasets is split w.r.t subreddits and not train/test/validation.
|
| 13 |
|
| 14 |
Streaming = True, because we don't want to load all the data into local memory
|
|
@@ -17,7 +15,8 @@ loading and combining all the iterables together.
|
|
| 17 |
|
| 18 |
"""
|
| 19 |
|
| 20 |
-
from
|
|
|
|
| 21 |
|
| 22 |
target_subreddits = ["askscience", "gaming", "technology", "todayilearned", "programming"]
|
| 23 |
|
|
|
|
| 7 |
https://colab.research.google.com/drive/1nLqIbyBDiBI96gDZ0TziLNX8I4uWnl9G
|
| 8 |
"""
|
| 9 |
|
|
|
|
|
|
|
| 10 |
"""Picking subreddits, split=sub as the data on huggingface datasets is split w.r.t subreddits and not train/test/validation.
|
| 11 |
|
| 12 |
Streaming = True, because we don't want to load all the data into local memory
|
|
|
|
| 15 |
|
| 16 |
"""
|
| 17 |
|
| 18 |
+
from huggingface_hub import hf_hub_url, cached_download
|
| 19 |
+
import json
|
| 20 |
|
| 21 |
target_subreddits = ["askscience", "gaming", "technology", "todayilearned", "programming"]
|
| 22 |
|