Samfredoly commited on
Commit
8d9bf3d
·
verified ·
1 Parent(s): 7c7b8a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -54
app.py CHANGED
@@ -30,6 +30,12 @@ app_state: Dict[str, Any] = {
30
  "files_since_last_state_upload": 0,
31
  }
32
 
 
 
 
 
 
 
33
  # --- Hugging Face Utility Functions ---
34
 
35
  def get_hf_api() -> HfApi:
@@ -88,15 +94,14 @@ def upload_file_to_huggingface(local_path: str, path_in_repo: str):
88
 
89
  def load_state():
90
  """Loads application state from the local state file."""
91
- global app_state
92
  if os.path.exists(STATE_FILE_PATH):
93
  try:
94
  with open(STATE_FILE_PATH, "r") as f:
95
- # Load the state file content, which contains the file_states and next_download_index
96
  state_content = json.load(f)
97
 
98
- # The app_state itself only tracks counters, not the file_states
99
- # We only update the counters here if they exist in the state file
100
  if "total_files_uploaded" in state_content:
101
  app_state["total_files_uploaded"] = state_content["total_files_uploaded"]
102
  if "current_zip_version" in state_content:
@@ -105,21 +110,32 @@ def load_state():
105
  app_state["files_since_last_zip"] = state_content["files_since_last_zip"]
106
  if "files_since_last_state_upload" in state_content:
107
  app_state["files_since_last_state_upload"] = state_content["files_since_last_state_upload"]
 
 
 
 
 
 
108
 
109
- print(f"State counters loaded successfully from {STATE_FILE_PATH}.")
110
  except Exception as e:
111
  print(f"Error loading state file: {e}. Using default state.")
112
  else:
113
  print("Local state file not found. Using default state.")
114
 
115
  def save_state():
116
- """Saves application state (counters) to the local state file."""
117
- # This function is now only for saving the internal app_state counters,
118
- # not the file_states, which are managed by the client.
119
  try:
 
 
 
 
 
 
120
  with open(STATE_FILE_PATH, "w") as f:
121
- json.dump(app_state, f, indent=4)
122
- print(f"State counters saved successfully to {STATE_FILE_PATH}.")
123
  except Exception as e:
124
  print(f"Error saving state file: {e}")
125
 
@@ -228,7 +244,7 @@ app = FastAPI(
228
  @app.post("/upload/")
229
  async def upload_file(file: UploadFile = File(...)):
230
  """Upload a file to the server and trigger periodic tasks."""
231
- global app_state
232
 
233
  # 1. Save the file
234
  try:
@@ -249,23 +265,38 @@ async def upload_file(file: UploadFile = File(...)):
249
 
250
  # --- LOGIC FOR STATE FILE HANDLING ---
251
  if file.filename == STATE_FILE_NAME:
252
- print(f"Intercepted state file upload: {STATE_FILE_NAME}. Saving locally only.")
253
- # The client's lock/unlock is now persisted locally on the server.
254
- # It will only be uploaded to HF when the STATE_UPLOAD_THRESHOLD is met.
255
- # We return success immediately.
256
- return {"filename": file.filename, "message": "State file successfully saved locally", "state": app_state}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  # --- END LOGIC FOR STATE FILE HANDLING ---
258
 
259
  # 2. Update counters only after successful write of a non-state file (transcription JSON)
260
  app_state["total_files_uploaded"] += 1
261
  app_state["files_since_last_zip"] += 1
262
  app_state["files_since_last_state_upload"] += 1
263
- save_state() # Save internal counters locally
 
 
264
 
265
  # 3. Check for state file upload threshold (100 non-state file uploads)
266
  if app_state["files_since_last_state_upload"] >= STATE_UPLOAD_THRESHOLD:
267
  print(f"State upload threshold ({STATE_UPLOAD_THRESHOLD}) reached. Uploading state file...")
268
- # This uploads the local state file (which includes the latest file_states) to HF
269
  upload_state_file_to_hf()
270
 
271
  # 4. Check for zip upload threshold (100 non-state file uploads)
@@ -275,6 +306,7 @@ async def upload_file(file: UploadFile = File(...)):
275
  if zip_path:
276
  upload_zip_to_hf(zip_path)
277
  os.remove(zip_path)
 
278
 
279
  return {"filename": file.filename, "message": "File successfully uploaded", "state": app_state}
280
  except HTTPException:
@@ -330,45 +362,18 @@ async def list_files():
330
  async def get_state():
331
  """
332
  Get the current application state (including file processing status).
333
- This endpoint now ensures it fetches the latest state from the local file.
334
  """
335
- # 1. Always read the latest state from the local file (which gets updated by client uploads)
336
- if os.path.exists(STATE_FILE_PATH):
337
- try:
338
- with open(STATE_FILE_PATH, "r") as f:
339
- state_content = json.load(f)
340
-
341
- # Ensure the required keys for the client are present
342
- if "file_states" not in state_content:
343
- state_content["file_states"] = {}
344
- if "next_download_index" not in state_content:
345
- state_content["next_download_index"] = 0
346
-
347
- print(f"✅ Read state from file: next_download_index={state_content.get('next_download_index')}, file_states={len(state_content.get('file_states', {}))}")
348
- return {"state": state_content}
349
-
350
- except Exception as e:
351
- print(f"Error reading local state file for /state/ endpoint: {e}")
352
- raise HTTPException(status_code=500, detail="Error processing state file.")
353
 
354
- # 2. If file doesn't exist, try to download from HF as fallback
355
- download_state_file_from_hf()
356
- if os.path.exists(STATE_FILE_PATH):
357
- try:
358
- with open(STATE_FILE_PATH, "r") as f:
359
- state_content = json.load(f)
360
-
361
- if "file_states" not in state_content:
362
- state_content["file_states"] = {}
363
- if "next_download_index" not in state_content:
364
- state_content["next_download_index"] = 0
365
-
366
- return {"state": state_content}
367
- except Exception as e:
368
- print(f"Error reading downloaded state file: {e}")
369
 
370
- # 3. Return default state if file still not found
371
- return {"state": {"next_download_index": 0, "file_states": {}}}
372
 
373
  # --- Main execution block for testing/running ---
374
  if __name__ == "__main__":
 
30
  "files_since_last_state_upload": 0,
31
  }
32
 
33
+ # Global file states (client-managed processing state)
34
+ file_states_cache: Dict[str, Any] = {
35
+ "next_download_index": 0,
36
+ "file_states": {}
37
+ }
38
+
39
  # --- Hugging Face Utility Functions ---
40
 
41
  def get_hf_api() -> HfApi:
 
94
 
95
  def load_state():
96
  """Loads application state from the local state file."""
97
+ global app_state, file_states_cache
98
  if os.path.exists(STATE_FILE_PATH):
99
  try:
100
  with open(STATE_FILE_PATH, "r") as f:
101
+ # Load the complete state file content
102
  state_content = json.load(f)
103
 
104
+ # Load internal counters
 
105
  if "total_files_uploaded" in state_content:
106
  app_state["total_files_uploaded"] = state_content["total_files_uploaded"]
107
  if "current_zip_version" in state_content:
 
110
  app_state["files_since_last_zip"] = state_content["files_since_last_zip"]
111
  if "files_since_last_state_upload" in state_content:
112
  app_state["files_since_last_state_upload"] = state_content["files_since_last_state_upload"]
113
+
114
+ # Load file_states (client-managed processing state)
115
+ if "file_states" in state_content:
116
+ file_states_cache["file_states"] = state_content["file_states"]
117
+ if "next_download_index" in state_content:
118
+ file_states_cache["next_download_index"] = state_content["next_download_index"]
119
 
120
+ print(f"State loaded successfully: app_state={app_state}, file_states_cache={file_states_cache}")
121
  except Exception as e:
122
  print(f"Error loading state file: {e}. Using default state.")
123
  else:
124
  print("Local state file not found. Using default state.")
125
 
126
  def save_state():
127
+ """Saves both application state counters and file processing states to the local state file."""
128
+ global app_state, file_states_cache
 
129
  try:
130
+ # Combine both app_state counters and file_states into one JSON
131
+ complete_state = {
132
+ **app_state, # Include all counters
133
+ **file_states_cache # Include file_states and next_download_index
134
+ }
135
+
136
  with open(STATE_FILE_PATH, "w") as f:
137
+ json.dump(complete_state, f, indent=4)
138
+ print(f"Complete state saved to {STATE_FILE_PATH}.")
139
  except Exception as e:
140
  print(f"Error saving state file: {e}")
141
 
 
244
  @app.post("/upload/")
245
  async def upload_file(file: UploadFile = File(...)):
246
  """Upload a file to the server and trigger periodic tasks."""
247
+ global app_state, file_states_cache
248
 
249
  # 1. Save the file
250
  try:
 
265
 
266
  # --- LOGIC FOR STATE FILE HANDLING ---
267
  if file.filename == STATE_FILE_NAME:
268
+ print(f"Intercepted state file upload: {STATE_FILE_NAME}. Merging with cache...")
269
+ # Read the uploaded state file and merge it with our cache
270
+ try:
271
+ with open(file_path, "r") as f:
272
+ uploaded_state = json.load(f)
273
+
274
+ # Merge the uploaded state into our cache
275
+ if "file_states" in uploaded_state:
276
+ file_states_cache["file_states"] = uploaded_state["file_states"]
277
+ if "next_download_index" in uploaded_state:
278
+ file_states_cache["next_download_index"] = uploaded_state["next_download_index"]
279
+
280
+ # Save the merged state back to disk
281
+ save_state()
282
+ print(f"✅ State file merged: next_download_index={file_states_cache['next_download_index']}, file_states count={len(file_states_cache['file_states'])}")
283
+ except Exception as e:
284
+ print(f"Error processing uploaded state file: {e}")
285
+
286
+ return {"filename": file.filename, "message": "State file successfully merged", "state": file_states_cache}
287
  # --- END LOGIC FOR STATE FILE HANDLING ---
288
 
289
  # 2. Update counters only after successful write of a non-state file (transcription JSON)
290
  app_state["total_files_uploaded"] += 1
291
  app_state["files_since_last_zip"] += 1
292
  app_state["files_since_last_state_upload"] += 1
293
+ save_state() # Save both app_state and file_states
294
+
295
+ print(f"✅ File uploaded: {file.filename} | files_since_last_zip={app_state['files_since_last_zip']}")
296
 
297
  # 3. Check for state file upload threshold (100 non-state file uploads)
298
  if app_state["files_since_last_state_upload"] >= STATE_UPLOAD_THRESHOLD:
299
  print(f"State upload threshold ({STATE_UPLOAD_THRESHOLD}) reached. Uploading state file...")
 
300
  upload_state_file_to_hf()
301
 
302
  # 4. Check for zip upload threshold (100 non-state file uploads)
 
306
  if zip_path:
307
  upload_zip_to_hf(zip_path)
308
  os.remove(zip_path)
309
+ cleanup_upload_dir()
310
 
311
  return {"filename": file.filename, "message": "File successfully uploaded", "state": app_state}
312
  except HTTPException:
 
362
  async def get_state():
363
  """
364
  Get the current application state (including file processing status).
365
+ Returns both internal counters and client-managed file_states.
366
  """
367
+ global file_states_cache
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
+ # Return the merged state: app_state counters + file_states cache
370
+ return_state = {
371
+ **app_state,
372
+ **file_states_cache
373
+ }
 
 
 
 
 
 
 
 
 
 
374
 
375
+ print(f"✅ Returning state: next_download_index={file_states_cache.get('next_download_index')}, file_states count={len(file_states_cache.get('file_states', {}))}")
376
+ return {"state": return_state}
377
 
378
  # --- Main execution block for testing/running ---
379
  if __name__ == "__main__":