blackhole1218 commited on
Commit
7b01a99
·
1 Parent(s): d88128c

feat: use HuggingFace persistent storage (/data) for database persistence

Browse files

- Use /data directory for persistent database storage in HF Spaces
- Unify download/upload repo to channelcorp/ko-tts-arena-db
- Database survives Space restarts and code pushes
- Fall back to instance/ if persistent storage unavailable

Files changed (1) hide show
  1. app.py +50 -32
app.py CHANGED
@@ -11,25 +11,44 @@ month = datetime.now().month
11
 
12
  # Check if running in a Hugging Face Space
13
  IS_SPACES = False
 
 
 
14
  if os.getenv("SPACE_REPO_NAME"):
15
  print("Running in a Hugging Face Space 🤗")
16
  IS_SPACES = True
17
 
18
- # Setup database sync for HF Spaces
19
- if not os.path.exists("instance/tts_arena.db"):
20
- os.makedirs("instance", exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  try:
22
- print("Database not found, downloading from HF dataset...")
23
  hf_hub_download(
24
- repo_id="channelcorp/ko-tts-arena-db",
25
  filename="tts_arena.db",
26
  repo_type="dataset",
27
- local_dir="instance",
28
  token=os.getenv("HF_TOKEN"),
29
  )
30
- print("Database downloaded successfully ✅")
31
  except Exception as e:
32
  print(f"Error downloading database from HF dataset: {str(e)} ⚠️")
 
 
 
33
 
34
  from flask import (
35
  Flask,
@@ -117,9 +136,17 @@ if not IS_SPACES:
117
 
118
  app = Flask(__name__)
119
  app.config["SECRET_KEY"] = os.getenv("SECRET_KEY", os.urandom(24))
120
- app.config["SQLALCHEMY_DATABASE_URI"] = os.getenv(
121
- "DATABASE_URI", "sqlite:///tts_arena.db"
122
- )
 
 
 
 
 
 
 
 
123
  app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
124
  app.config["SESSION_COOKIE_SECURE"] = True
125
  app.config["SESSION_COOKIE_SAMESITE"] = (
@@ -959,13 +986,15 @@ def setup_periodic_tasks():
959
  if not IS_SPACES:
960
  return
961
 
962
- db_path = app.config["SQLALCHEMY_DATABASE_URI"].replace("sqlite:///", "instance/") # Get relative path
 
963
  preferences_repo_id = "TTS-AGI/arena-v2-preferences"
964
- database_repo_id = "TTS-AGI/database-arena-v2"
 
965
  votes_dir = "./votes"
966
 
967
  def sync_database():
968
- """Uploads the database to HF dataset"""
969
  with app.app_context(): # Ensure app context for logging
970
  try:
971
  if not os.path.exists(db_path):
@@ -979,7 +1008,7 @@ def setup_periodic_tasks():
979
  repo_id=database_repo_id,
980
  repo_type="dataset",
981
  )
982
- app.logger.info(f"Database uploaded to {database_repo_id} at {datetime.utcnow()}")
983
  except Exception as e:
984
  app.logger.error(f"Error uploading database to {database_repo_id}: {str(e)}")
985
 
@@ -1282,8 +1311,11 @@ def check_for_coordinated_campaigns():
1282
 
1283
  if __name__ == "__main__":
1284
  with app.app_context():
1285
- # Ensure ./instance and ./votes directories exist
1286
- os.makedirs("instance", exist_ok=True)
 
 
 
1287
  os.makedirs("./votes", exist_ok=True) # Create votes directory if it doesn't exist
1288
  os.makedirs(CACHE_AUDIO_DIR, exist_ok=True) # Ensure cache audio dir exists
1289
 
@@ -1302,22 +1334,8 @@ if __name__ == "__main__":
1302
  except Exception as e:
1303
  app.logger.error(f"Error clearing cache directory {CACHE_AUDIO_DIR}: {e}")
1304
 
1305
-
1306
- # Download database if it doesn't exist (only on initial space start)
1307
- if IS_SPACES and not os.path.exists(app.config["SQLALCHEMY_DATABASE_URI"].replace("sqlite:///", "")):
1308
- try:
1309
- print("Database not found, downloading from HF dataset...")
1310
- hf_hub_download(
1311
- repo_id="TTS-AGI/database-arena-v2",
1312
- filename="tts_arena.db",
1313
- repo_type="dataset",
1314
- local_dir="instance", # download to instance/
1315
- token=os.getenv("HF_TOKEN"),
1316
- )
1317
- print("Database downloaded successfully ✅")
1318
- except Exception as e:
1319
- print(f"Error downloading database from HF dataset: {str(e)} ⚠️")
1320
-
1321
 
1322
  db.create_all() # Create tables if they don't exist
1323
  insert_initial_models()
 
11
 
12
  # Check if running in a Hugging Face Space
13
  IS_SPACES = False
14
+ PERSISTENT_DATA_DIR = None # Will be set if persistent storage is available
15
+ DATABASE_REPO_ID = "channelcorp/ko-tts-arena-db" # Single source for DB
16
+
17
  if os.getenv("SPACE_REPO_NAME"):
18
  print("Running in a Hugging Face Space 🤗")
19
  IS_SPACES = True
20
 
21
+ # Check for persistent storage availability (/data directory)
22
+ # HuggingFace Spaces provides /data as persistent storage
23
+ if os.path.exists("/data") and os.access("/data", os.W_OK):
24
+ PERSISTENT_DATA_DIR = "/data"
25
+ print("Persistent storage available at /data ✅")
26
+ else:
27
+ # Fallback to instance directory (non-persistent)
28
+ PERSISTENT_DATA_DIR = "instance"
29
+ print("⚠️ Warning: Persistent storage (/data) not available. Using 'instance/' (data may be lost on restart)")
30
+
31
+ # Define database path
32
+ db_path = os.path.join(PERSISTENT_DATA_DIR, "tts_arena.db")
33
+
34
+ # Setup database - download only if it doesn't exist in persistent storage
35
+ if not os.path.exists(db_path):
36
+ os.makedirs(PERSISTENT_DATA_DIR, exist_ok=True)
37
  try:
38
+ print(f"Database not found at {db_path}, downloading from HF dataset ({DATABASE_REPO_ID})...")
39
  hf_hub_download(
40
+ repo_id=DATABASE_REPO_ID,
41
  filename="tts_arena.db",
42
  repo_type="dataset",
43
+ local_dir=PERSISTENT_DATA_DIR,
44
  token=os.getenv("HF_TOKEN"),
45
  )
46
+ print(f"Database downloaded successfully to {db_path} ✅")
47
  except Exception as e:
48
  print(f"Error downloading database from HF dataset: {str(e)} ⚠️")
49
+ print("A new database will be created.")
50
+ else:
51
+ print(f"Database found at {db_path} (persistent storage) ✅")
52
 
53
  from flask import (
54
  Flask,
 
136
 
137
  app = Flask(__name__)
138
  app.config["SECRET_KEY"] = os.getenv("SECRET_KEY", os.urandom(24))
139
+
140
+ # Configure database path - use persistent storage in HF Spaces
141
+ if IS_SPACES and PERSISTENT_DATA_DIR:
142
+ # Use persistent storage path for HF Spaces
143
+ app.config["SQLALCHEMY_DATABASE_URI"] = f"sqlite:///{PERSISTENT_DATA_DIR}/tts_arena.db"
144
+ print(f"Using database at: {PERSISTENT_DATA_DIR}/tts_arena.db")
145
+ else:
146
+ app.config["SQLALCHEMY_DATABASE_URI"] = os.getenv(
147
+ "DATABASE_URI", "sqlite:///tts_arena.db"
148
+ )
149
+
150
  app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
151
  app.config["SESSION_COOKIE_SECURE"] = True
152
  app.config["SESSION_COOKIE_SAMESITE"] = (
 
986
  if not IS_SPACES:
987
  return
988
 
989
+ # Get database path from config (handles both persistent storage and fallback)
990
+ db_path = app.config["SQLALCHEMY_DATABASE_URI"].replace("sqlite:///", "")
991
  preferences_repo_id = "TTS-AGI/arena-v2-preferences"
992
+ # Use the same repo for download and upload (consistency)
993
+ database_repo_id = DATABASE_REPO_ID
994
  votes_dir = "./votes"
995
 
996
  def sync_database():
997
+ """Uploads the database to HF dataset (backup to cloud)"""
998
  with app.app_context(): # Ensure app context for logging
999
  try:
1000
  if not os.path.exists(db_path):
 
1008
  repo_id=database_repo_id,
1009
  repo_type="dataset",
1010
  )
1011
+ app.logger.info(f"Database backed up to {database_repo_id} at {datetime.utcnow()}")
1012
  except Exception as e:
1013
  app.logger.error(f"Error uploading database to {database_repo_id}: {str(e)}")
1014
 
 
1311
 
1312
  if __name__ == "__main__":
1313
  with app.app_context():
1314
+ # Ensure directories exist
1315
+ if IS_SPACES and PERSISTENT_DATA_DIR:
1316
+ os.makedirs(PERSISTENT_DATA_DIR, exist_ok=True)
1317
+ else:
1318
+ os.makedirs("instance", exist_ok=True)
1319
  os.makedirs("./votes", exist_ok=True) # Create votes directory if it doesn't exist
1320
  os.makedirs(CACHE_AUDIO_DIR, exist_ok=True) # Ensure cache audio dir exists
1321
 
 
1334
  except Exception as e:
1335
  app.logger.error(f"Error clearing cache directory {CACHE_AUDIO_DIR}: {e}")
1336
 
1337
+ # Note: Database download is handled at module load time for HF Spaces
1338
+ # This ensures DB is ready before app initialization
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1339
 
1340
  db.create_all() # Create tables if they don't exist
1341
  insert_initial_models()