Samfredoly commited on
Commit
fef919a
·
verified ·
1 Parent(s): e4ba65c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +198 -57
app.py CHANGED
@@ -1,99 +1,204 @@
1
  import os
2
  import shutil
3
  import zipfile
4
- import asyncio
 
5
  from contextlib import asynccontextmanager
6
- from typing import List
7
 
8
  from fastapi import FastAPI, UploadFile, File, HTTPException
9
  from fastapi.responses import FileResponse
10
- from huggingface_hub import HfApi
11
 
12
  # --- Configuration ---
13
  UPLOAD_DIR = "uploaded_files"
 
14
  HF_DATASET_REPO = "samfred2/A_Text"
15
  HF_TOKEN = os.getenv("HF_TOKEN")
16
 
17
- # --- Utility Functions ---
 
 
18
 
19
- def get_uploaded_files() -> List[str]:
20
- """Returns a list of all files in the upload directory."""
21
- if not os.path.exists(UPLOAD_DIR):
22
- return []
23
- return [os.path.join(UPLOAD_DIR, f) for f in os.listdir(UPLOAD_DIR) if os.path.isfile(os.path.join(UPLOAD_DIR, f))]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- def zip_uploaded_files(zip_filename: str = "uploaded_files.zip") -> str:
26
- """Zips all files in the upload directory into a single zip file."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  if not os.path.exists(UPLOAD_DIR) or not os.listdir(UPLOAD_DIR):
28
  print("No files to zip.")
29
  return None
30
 
 
 
 
 
31
  zip_path = os.path.join(os.getcwd(), zip_filename)
 
 
 
32
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
33
  for root, _, files in os.walk(UPLOAD_DIR):
34
  for file in files:
35
  file_path = os.path.join(root, file)
36
- # Add file to zip, preserving directory structure relative to UPLOAD_DIR
37
- zipf.write(file_path, os.path.relpath(file_path, UPLOAD_DIR))
 
38
 
39
- print(f"Successfully created zip file at: {zip_path}")
40
  return zip_path
41
 
42
- def upload_to_huggingface(zip_path: str):
43
- """Uploads the zip file to the specified Hugging Face dataset."""
44
- if not HF_TOKEN:
45
- print("HF_TOKEN not found in environment variables. Skipping upload.")
46
- return
47
-
48
  if not zip_path or not os.path.exists(zip_path):
49
  print("Zip file not found. Skipping upload.")
50
  return
51
 
52
- try:
53
- api = HfApi()
54
-
55
- # Upload the zip file to the root of the dataset repository
56
- api.upload_file(
57
- path_or_fileobj=zip_path,
58
- path_in_repo=os.path.basename(zip_path),
59
- repo_id=HF_DATASET_REPO,
60
- repo_type="dataset",
61
- token=HF_TOKEN
62
- )
63
- print(f"Successfully uploaded {os.path.basename(zip_path)} to {HF_DATASET_REPO}")
64
- except Exception as e:
65
- print(f"Hugging Face upload failed: {e}")
66
 
67
  def cleanup_upload_dir():
68
- """Removes the upload directory and its contents."""
69
  if os.path.exists(UPLOAD_DIR):
70
- shutil.rmtree(UPLOAD_DIR)
71
- print(f"Cleaned up {UPLOAD_DIR} directory.")
 
 
 
 
 
72
 
73
  # --- Application Lifespan ---
74
 
75
  @asynccontextmanager
76
  async def lifespan(app: FastAPI):
77
- # Startup: Ensure upload directory exists
78
  os.makedirs(UPLOAD_DIR, exist_ok=True)
79
  print(f"Application starting. Upload directory: {UPLOAD_DIR}")
 
 
 
 
 
 
80
  yield
81
- # Shutdown: Zip and upload files
82
- print("Application shutting down. Initiating final upload...")
83
- zip_path = zip_uploaded_files()
 
 
 
 
 
84
  if zip_path:
85
- upload_to_huggingface(zip_path)
86
  # Clean up the created zip file after upload
87
  os.remove(zip_path)
 
 
88
  cleanup_upload_dir()
89
  print("Shutdown complete.")
90
 
91
  # --- FastAPI App Initialization ---
92
 
93
  app = FastAPI(
94
- title="File Uploader and Downloader Service",
95
- description="A simple service for file management and Hugging Face dataset synchronization.",
96
- version="1.0.0",
97
  lifespan=lifespan
98
  )
99
 
@@ -101,18 +206,41 @@ app = FastAPI(
101
 
102
  @app.post("/upload/")
103
  async def upload_file(file: UploadFile = File(...)):
104
- """Upload a file to the server."""
 
 
 
105
  try:
106
  file_path = os.path.join(UPLOAD_DIR, file.filename)
107
  # Check if file already exists to prevent overwriting without warning
108
  if os.path.exists(file_path):
109
  raise HTTPException(status_code=409, detail=f"File '{file.filename}' already exists.")
110
-
111
- # Write the file content to disk
112
- with open(file_path, "wb") as buffer:
113
- shutil.copyfileobj(file.file, buffer)
 
 
114
 
115
- return {"filename": file.filename, "message": "File successfully uploaded"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  except HTTPException:
117
  raise
118
  except Exception as e:
@@ -136,23 +264,36 @@ async def download_file(filename: str):
136
  async def sync_dataset():
137
  """Manually trigger zipping of all uploaded files and uploading to the Hugging Face dataset."""
138
  print("Manual dataset sync triggered.")
139
- zip_path = zip_uploaded_files()
 
 
 
 
 
140
  if not zip_path:
141
- return {"message": "No files to sync. Upload directory is empty."}
142
 
143
- upload_to_huggingface(zip_path)
144
 
145
  # Clean up the created zip file after upload
146
  os.remove(zip_path)
147
 
148
- return {"message": "Files zipped and upload to Hugging Face dataset initiated."}
149
 
150
  @app.get("/files/")
151
  async def list_files():
152
  """List all files currently available for download."""
153
  if not os.path.exists(UPLOAD_DIR):
154
  return {"files": []}
155
- return {"files": os.listdir(UPLOAD_DIR)}
 
 
 
 
 
 
 
 
156
 
157
  # --- Main execution block for testing/running ---
158
  if __name__ == "__main__":
@@ -178,4 +319,4 @@ if __name__ == "__main__":
178
  finally:
179
  # Simulate cleanup that happens in the lifespan context manager
180
  # when running with uvicorn in a real environment.
181
- pass
 
1
  import os
2
  import shutil
3
  import zipfile
4
+ import json
5
+ import time
6
  from contextlib import asynccontextmanager
7
+ from typing import List, Dict, Any
8
 
9
  from fastapi import FastAPI, UploadFile, File, HTTPException
10
  from fastapi.responses import FileResponse
11
+ from huggingface_hub import HfApi, hf_hub_download, HfFileSystem
12
 
13
  # --- Configuration ---
14
  UPLOAD_DIR = "uploaded_files"
15
+ STATE_FILE_PATH = os.path.join(os.getcwd(), "processing_audio_state.json")
16
  HF_DATASET_REPO = "samfred2/A_Text"
17
  HF_TOKEN = os.getenv("HF_TOKEN")
18
 
19
+ # Hardcoded variables for automation
20
+ ZIP_UPLOAD_THRESHOLD = 200
21
+ STATE_UPLOAD_THRESHOLD = 100
22
 
23
+ # Global state object (will be loaded from/saved to STATE_FILE_PATH)
24
+ app_state: Dict[str, Any] = {
25
+ "total_files_uploaded": 0,
26
+ "current_zip_version": 0,
27
+ "files_since_last_zip": 0,
28
+ "files_since_last_state_upload": 0,
29
+ }
30
+
31
+ # --- Hugging Face Utility Functions ---
32
+
33
+ def get_hf_api() -> HfApi:
34
+ """Returns an HfApi instance, raising an error if token is missing."""
35
+ if not HF_TOKEN:
36
+ raise ValueError("HF_TOKEN not found in environment variables.")
37
+ return HfApi(token=HF_TOKEN)
38
+
39
+ def download_state_file():
40
+ """Downloads the state file from the Hugging Face dataset."""
41
+ try:
42
+ api = get_hf_api()
43
+ # Use hf_hub_download for simple file download
44
+ downloaded_path = hf_hub_download(
45
+ repo_id=HF_DATASET_REPO,
46
+ filename=os.path.basename(STATE_FILE_PATH),
47
+ repo_type="dataset",
48
+ local_dir=os.path.dirname(STATE_FILE_PATH),
49
+ local_dir_use_symlinks=False
50
+ )
51
+ print(f"Successfully downloaded state file to: {downloaded_path}")
52
+ return True
53
+ except ValueError:
54
+ print("HF_TOKEN missing. Cannot download state file.")
55
+ return False
56
+ except Exception as e:
57
+ # File not found is a common case for the first run
58
+ if "404" in str(e):
59
+ print("State file not found on Hugging Face. Will start with default state.")
60
+ else:
61
+ print(f"Error downloading state file: {e}")
62
+ return False
63
 
64
+ def upload_file_to_huggingface(local_path: str, path_in_repo: str):
65
+ """Uploads a single file to the specified Hugging Face dataset."""
66
+ try:
67
+ api = get_hf_api()
68
+ api.upload_file(
69
+ path_or_fileobj=local_path,
70
+ path_in_repo=path_in_repo,
71
+ repo_id=HF_DATASET_REPO,
72
+ repo_type="dataset",
73
+ token=HF_TOKEN
74
+ )
75
+ print(f"Successfully uploaded {os.path.basename(local_path)} to {HF_DATASET_REPO} as {path_in_repo}")
76
+ return True
77
+ except ValueError:
78
+ print("HF_TOKEN missing. Skipping upload.")
79
+ return False
80
+ except Exception as e:
81
+ print(f"Hugging Face upload failed for {local_path}: {e}")
82
+ return False
83
+
84
+ # --- State Management Functions ---
85
+
86
+ def load_state():
87
+ """Loads application state from the local state file."""
88
+ global app_state
89
+ if os.path.exists(STATE_FILE_PATH):
90
+ try:
91
+ with open(STATE_FILE_PATH, "r") as f:
92
+ app_state.update(json.load(f))
93
+ print(f"State loaded successfully from {STATE_FILE_PATH}.")
94
+ except Exception as e:
95
+ print(f"Error loading state file: {e}. Using default state.")
96
+ else:
97
+ print("Local state file not found. Using default state.")
98
+
99
+ def save_state():
100
+ """Saves application state to the local state file."""
101
+ try:
102
+ with open(STATE_FILE_PATH, "w") as f:
103
+ json.dump(app_state, f, indent=4)
104
+ print(f"State saved successfully to {STATE_FILE_PATH}.")
105
+ except Exception as e:
106
+ print(f"Error saving state file: {e}")
107
+
108
+ def upload_state_file_to_hf():
109
+ """Saves the current state and uploads the state file to Hugging Face."""
110
+ save_state()
111
+ upload_file_to_huggingface(STATE_FILE_PATH, os.path.basename(STATE_FILE_PATH))
112
+ global app_state
113
+ app_state["files_since_last_state_upload"] = 0
114
+
115
+ # --- File Processing Functions ---
116
+
117
+ def zip_uploaded_files_versioned() -> str:
118
+ """Zips all files in the upload directory (excluding state file) into a versioned zip file."""
119
  if not os.path.exists(UPLOAD_DIR) or not os.listdir(UPLOAD_DIR):
120
  print("No files to zip.")
121
  return None
122
 
123
+ # Increment version and create filename
124
+ global app_state
125
+ app_state["current_zip_version"] += 1
126
+ zip_filename = f"uploaded_files_{app_state['current_zip_version']}.zip"
127
  zip_path = os.path.join(os.getcwd(), zip_filename)
128
+
129
+ exclude_file = os.path.basename(STATE_FILE_PATH)
130
+
131
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
132
  for root, _, files in os.walk(UPLOAD_DIR):
133
  for file in files:
134
  file_path = os.path.join(root, file)
135
+ if file != exclude_file:
136
+ # Add file to zip, preserving directory structure relative to UPLOAD_DIR
137
+ zipf.write(file_path, os.path.relpath(file_path, UPLOAD_DIR))
138
 
139
+ print(f"Successfully created versioned zip file at: {zip_path}")
140
  return zip_path
141
 
142
+ def upload_zip_to_hf(zip_path: str):
143
+ """Uploads the versioned zip file to the Hugging Face dataset."""
 
 
 
 
144
  if not zip_path or not os.path.exists(zip_path):
145
  print("Zip file not found. Skipping upload.")
146
  return
147
 
148
+ upload_file_to_huggingface(zip_path, os.path.basename(zip_path))
149
+
150
+ # Reset counter after successful upload
151
+ global app_state
152
+ app_state["files_since_last_zip"] = 0
 
 
 
 
 
 
 
 
 
153
 
154
  def cleanup_upload_dir():
155
+ """Removes the upload directory and its contents, but keeps the state file."""
156
  if os.path.exists(UPLOAD_DIR):
157
+ for item in os.listdir(UPLOAD_DIR):
158
+ item_path = os.path.join(UPLOAD_DIR, item)
159
+ if os.path.isdir(item_path):
160
+ shutil.rmtree(item_path)
161
+ elif item != os.path.basename(STATE_FILE_PATH):
162
+ os.remove(item_path)
163
+ print(f"Cleaned up files in {UPLOAD_DIR} directory (state file preserved).")
164
 
165
  # --- Application Lifespan ---
166
 
167
  @asynccontextmanager
168
  async def lifespan(app: FastAPI):
169
+ # Startup: 1. Ensure upload directory exists
170
  os.makedirs(UPLOAD_DIR, exist_ok=True)
171
  print(f"Application starting. Upload directory: {UPLOAD_DIR}")
172
+
173
+ # Startup: 2. Download and load state
174
+ download_state_file()
175
+ load_state()
176
+ print(f"Initial state: {app_state}")
177
+
178
  yield
179
+
180
+ # Shutdown: 1. Upload state file
181
+ print("Application shutting down. Uploading final state...")
182
+ upload_state_file_to_hf()
183
+
184
+ # Shutdown: 2. Perform final zip and upload of any remaining files
185
+ print("Performing final zip and upload...")
186
+ zip_path = zip_uploaded_files_versioned()
187
  if zip_path:
188
+ upload_zip_to_hf(zip_path)
189
  # Clean up the created zip file after upload
190
  os.remove(zip_path)
191
+
192
+ # Shutdown: 3. Cleanup
193
  cleanup_upload_dir()
194
  print("Shutdown complete.")
195
 
196
  # --- FastAPI App Initialization ---
197
 
198
  app = FastAPI(
199
+ title="Versioned File Uploader and Dataset Sync Service",
200
+ description="A service for file management with versioned zipping and state persistence to Hugging Face.",
201
+ version="2.0.0",
202
  lifespan=lifespan
203
  )
204
 
 
206
 
207
  @app.post("/upload/")
208
  async def upload_file(file: UploadFile = File(...)):
209
+ """Upload a file to the server and trigger periodic tasks."""
210
+ global app_state
211
+
212
+ # 1. Save the file
213
  try:
214
  file_path = os.path.join(UPLOAD_DIR, file.filename)
215
  # Check if file already exists to prevent overwriting without warning
216
  if os.path.exists(file_path):
217
  raise HTTPException(status_code=409, detail=f"File '{file.filename}' already exists.")
218
+
219
+ try:
220
+ with open(file_path, "wb") as buffer:
221
+ shutil.copyfileobj(file.file, buffer)
222
+ except Exception as e:
223
+ raise HTTPException(status_code=500, detail=f"Could not write file to disk: {e}")
224
 
225
+ # 2. Update counters only after successful write
226
+ app_state["total_files_uploaded"] += 1
227
+ app_state["files_since_last_zip"] += 1
228
+ app_state["files_since_last_state_upload"] += 1
229
+ save_state() # Save state after every upload
230
+ # 3. Check for state file upload threshold (100 uploads)
231
+ if app_state["files_since_last_state_upload"] >= STATE_UPLOAD_THRESHOLD:
232
+ print(f"State upload threshold ({STATE_UPLOAD_THRESHOLD}) reached. Uploading state file...")
233
+ upload_state_file_to_hf()
234
+
235
+ # 4. Check for zip upload threshold (200 uploads)
236
+ if app_state["files_since_last_zip"] >= ZIP_UPLOAD_THRESHOLD:
237
+ print(f"Zip upload threshold ({ZIP_UPLOAD_THRESHOLD}) reached. Zipping and uploading files...")
238
+ zip_path = zip_uploaded_files_versioned()
239
+ if zip_path:
240
+ upload_zip_to_hf(zip_path)
241
+ os.remove(zip_path)
242
+
243
+ return {"filename": file.filename, "message": "File successfully uploaded", "state": app_state}
244
  except HTTPException:
245
  raise
246
  except Exception as e:
 
264
  async def sync_dataset():
265
  """Manually trigger zipping of all uploaded files and uploading to the Hugging Face dataset."""
266
  print("Manual dataset sync triggered.")
267
+
268
+ # 1. Upload state file
269
+ upload_state_file_to_hf()
270
+
271
+ # 2. Zip and upload files
272
+ zip_path = zip_uploaded_files_versioned()
273
  if not zip_path:
274
+ return {"message": "No files to sync. Upload directory is empty.", "state": app_state}
275
 
276
+ upload_zip_to_hf(zip_path)
277
 
278
  # Clean up the created zip file after upload
279
  os.remove(zip_path)
280
 
281
+ return {"message": "Files zipped and upload to Hugging Face dataset initiated.", "state": app_state}
282
 
283
  @app.get("/files/")
284
  async def list_files():
285
  """List all files currently available for download."""
286
  if not os.path.exists(UPLOAD_DIR):
287
  return {"files": []}
288
+
289
+ # Exclude the state file from the list of downloadable files
290
+ files = [f for f in os.listdir(UPLOAD_DIR) if f != os.path.basename(STATE_FILE_PATH)]
291
+ return {"files": files, "state": app_state}
292
+
293
+ @app.get("/state/")
294
+ async def get_state():
295
+ """Get the current application state."""
296
+ return {"state": app_state}
297
 
298
  # --- Main execution block for testing/running ---
299
  if __name__ == "__main__":
 
319
  finally:
320
  # Simulate cleanup that happens in the lifespan context manager
321
  # when running with uvicorn in a real environment.
322
+ pass