broadfield-dev commited on
Commit
0901fdc
·
verified ·
1 Parent(s): a8dac1e

Update rss_processor.py

Browse files
Files changed (1) hide show
  1. rss_processor.py +11 -4
rss_processor.py CHANGED
@@ -302,11 +302,18 @@ def upload_to_hf_hub():
302
  with open('local_rss_store.json','r') as f:
303
  json_data=f.read()
304
  f.close()
305
- dataset = Dataset.from_list(json.loads(json_data)).to_parquet(f'{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}')
 
 
306
 
307
- # 3. Create (if needed) and upload as Parquet
308
- # This will create 'data/train-00000-of-00001.parquet' on the Hub
309
- dataset.push_to_hub(DATASET_REPO_ID)
 
 
 
 
 
310
 
311
  logger.info(f"Raw feeds folder 'local' uploaded to: {DATASET_REPO_ID}")
312
  except Exception as e:
 
302
  with open('local_rss_store.json','r') as f:
303
  json_data=f.read()
304
  f.close()
305
+ json_list = json.loads(json_data) # json_data is your JSON string
306
+ timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
307
+ local_filename = f'{timestamp}.parquet'
308
 
309
+ Dataset.from_list(json_list).to_parquet(local_filename)
310
+
311
+ api.upload_file(
312
+ path_or_fileobj=local_filename,
313
+ path_in_repo=f"data/{f"{timestamp}.parquet"}", # Recommended to keep in a 'data/' folder
314
+ repo_id=DATASET_REPO_ID,
315
+ repo_type="dataset"
316
+ )
317
 
318
  logger.info(f"Raw feeds folder 'local' uploaded to: {DATASET_REPO_ID}")
319
  except Exception as e: