Spaces:
Running
Running
Update rss_processor.py
Browse files- rss_processor.py +11 -4
rss_processor.py
CHANGED
|
@@ -302,11 +302,18 @@ def upload_to_hf_hub():
|
|
| 302 |
with open('local_rss_store.json','r') as f:
|
| 303 |
json_data=f.read()
|
| 304 |
f.close()
|
| 305 |
-
|
|
|
|
|
|
|
| 306 |
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
|
| 311 |
logger.info(f"Raw feeds folder 'local' uploaded to: {DATASET_REPO_ID}")
|
| 312 |
except Exception as e:
|
|
|
|
| 302 |
with open('local_rss_store.json','r') as f:
|
| 303 |
json_data=f.read()
|
| 304 |
f.close()
|
| 305 |
+
json_list = json.loads(json_data) # json_data is your JSON string
|
| 306 |
+
timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
| 307 |
+
local_filename = f'{timestamp}.parquet'
|
| 308 |
|
| 309 |
+
Dataset.from_list(json_list).to_parquet(local_filename)
|
| 310 |
+
|
| 311 |
+
api.upload_file(
|
| 312 |
+
path_or_fileobj=local_filename,
|
| 313 |
+
path_in_repo=f"data/{f"{timestamp}.parquet"}", # Recommended to keep in a 'data/' folder
|
| 314 |
+
repo_id=DATASET_REPO_ID,
|
| 315 |
+
repo_type="dataset"
|
| 316 |
+
)
|
| 317 |
|
| 318 |
logger.info(f"Raw feeds folder 'local' uploaded to: {DATASET_REPO_ID}")
|
| 319 |
except Exception as e:
|