Fred808 commited on
Commit
1c27bb8
·
verified ·
1 Parent(s): 389bd6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -15
app.py CHANGED
@@ -1,25 +1,36 @@
 
1
  from huggingface_hub import list_repo_files
2
- import time
 
3
  import os
 
4
 
5
  # === Configuration ===
6
  HF_TOKEN = os.environ.get("HF_TOKEN")
7
  REPO_ID = "Fred808/BG1"
8
- SLEEP_INTERVAL = 60 # seconds
9
 
10
- def count_files():
11
- try:
12
- files = list_repo_files(repo_id=REPO_ID, repo_type="dataset", token=HF_TOKEN)
13
- data_files = [f for f in files if not f.endswith('/')]
14
- print(f"[✓] Total files in '{REPO_ID}': {len(data_files)}")
15
- except Exception as e:
16
- print(f"[!] Error: {e}")
17
 
18
- def main_loop():
19
- print(f"🔁 Starting file counter for dataset: {REPO_ID}")
 
 
20
  while True:
21
- count_files()
22
- time.sleep(SLEEP_INTERVAL)
 
 
 
 
 
 
 
 
23
 
24
- if __name__ == "__main__":
25
- main_loop()
 
 
 
 
1
+ from fastapi import FastAPI
2
  from huggingface_hub import list_repo_files
3
+ from collections import Counter
4
+ import asyncio
5
  import os
6
+ import logging
7
 
8
  # === Configuration ===
9
  HF_TOKEN = os.environ.get("HF_TOKEN")
10
  REPO_ID = "Fred808/BG1"
11
+ SLEEP_INTERVAL = 60 # in seconds
12
 
13
+ # === Logging setup ===
14
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 
 
 
 
 
15
 
16
+ app = FastAPI()
17
+
18
+ # === Core Logic ===
19
+ async def dataset_counter():
20
  while True:
21
+ try:
22
+ files = list_repo_files(repo_id=REPO_ID, repo_type="dataset", token=HF_TOKEN)
23
+ data_files = [f for f in files if not f.endswith('/')]
24
+ ext_count = Counter(os.path.splitext(f)[1] or "no_ext" for f in data_files)
25
+
26
+ logging.info(f"🧮 Total files: {len(data_files)} | File types: {dict(ext_count)}")
27
+
28
+ except Exception as e:
29
+ logging.error(f"❌ Error counting dataset files: {e}")
30
+ await asyncio.sleep(SLEEP_INTERVAL)
31
 
32
+ # === Startup Event ===
33
+ @app.on_event("startup")
34
+ async def startup_event():
35
+ logging.info(f"🚀 Dataset counter started for repo: {REPO_ID}")
36
+ asyncio.create_task(dataset_counter())