Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,8 +7,8 @@ import asyncio
|
|
| 7 |
import threading
|
| 8 |
import time
|
| 9 |
import hashlib
|
| 10 |
-
from typing import Dict, List, Set
|
| 11 |
-
from fastapi import FastAPI, BackgroundTasks, HTTPException
|
| 12 |
from fastapi.responses import HTMLResponse, JSONResponse
|
| 13 |
from fastapi.middleware.cors import CORSMiddleware
|
| 14 |
from huggingface_hub import HfApi, list_repo_files
|
|
@@ -101,7 +101,7 @@ def load_download_state() -> int:
|
|
| 101 |
if os.path.exists(DOWNLOAD_STATE_FILE):
|
| 102 |
try:
|
| 103 |
with open(DOWNLOAD_STATE_FILE, "r") as f:
|
| 104 |
-
return json.load(f).get("next_download_index",
|
| 105 |
except json.JSONDecodeError:
|
| 106 |
log_message(f"β οΈ Warning: Could not decode {DOWNLOAD_STATE_FILE}. Starting download from index 0.")
|
| 107 |
return 0
|
|
@@ -129,7 +129,7 @@ def save_processed_files_state(processed_set: set):
|
|
| 129 |
with open(PROCESS_STATE_FILE, "w") as f:
|
| 130 |
json.dump({"processed_rars": list(processed_set)}, f)
|
| 131 |
|
| 132 |
-
def download_rar_files(start_index:
|
| 133 |
"""Downloads a batch of RAR files from the source dataset"""
|
| 134 |
try:
|
| 135 |
all_files = list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset", token=HF_TOKEN)
|
|
@@ -143,7 +143,7 @@ def download_rar_files(start_index: 4, chunk_size: int) -> tuple:
|
|
| 143 |
log_message("β
No more RAR files to download.")
|
| 144 |
return [], start_index
|
| 145 |
|
| 146 |
-
log_message(f"π₯ Downloading RAR files {start_index +
|
| 147 |
|
| 148 |
downloaded_paths = []
|
| 149 |
for file_path_in_repo in files_to_download_metadata:
|
|
@@ -190,7 +190,7 @@ def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_fold
|
|
| 190 |
|
| 191 |
# Check if this folder content has already been uploaded
|
| 192 |
if folder_hash in uploaded_folders_set:
|
| 193 |
-
log_message(f"π Folder '{folder_name}' already uploaded (hash: {folder_hash[:8]}...), skipping.")
|
| 194 |
processed_rars_set.add(filename)
|
| 195 |
save_processed_files_state(processed_rars_set)
|
| 196 |
return True
|
|
@@ -251,7 +251,7 @@ def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_fold
|
|
| 251 |
save_uploaded_folders(uploaded_folders_set)
|
| 252 |
processing_status["uploaded_folders"] = len(uploaded_folders_set)
|
| 253 |
|
| 254 |
-
log_message(f"π Folder '{folder_name}' locked in BG2 repo (hash: {folder_hash[:8]}...)")
|
| 255 |
|
| 256 |
return True
|
| 257 |
|
|
@@ -275,8 +275,8 @@ def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_fold
|
|
| 275 |
except Exception as e:
|
| 276 |
log_message(f"β οΈ Could not clean up extraction folder {current_extract_folder}: {e}")
|
| 277 |
|
| 278 |
-
def continuous_processing():
|
| 279 |
-
"""Main processing loop that runs continuously"""
|
| 280 |
processing_status["is_running"] = True
|
| 281 |
log_message("π Starting continuous RAR processing...")
|
| 282 |
|
|
@@ -285,6 +285,12 @@ def continuous_processing():
|
|
| 285 |
uploaded_folders = load_uploaded_folders()
|
| 286 |
processing_status["uploaded_folders"] = len(uploaded_folders)
|
| 287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
while processing_status["is_running"]:
|
| 289 |
# 1. Download a batch of RAR files
|
| 290 |
download_start_index = load_download_state()
|
|
@@ -298,7 +304,7 @@ def continuous_processing():
|
|
| 298 |
unprocessed_rars = [rar for rar in all_local_rars if os.path.basename(rar) not in processed_rars]
|
| 299 |
|
| 300 |
if not unprocessed_rars:
|
| 301 |
-
log_message("β
|
| 302 |
break
|
| 303 |
else:
|
| 304 |
log_message(f"π Found {len(unprocessed_rars)} unprocessed local RAR files")
|
|
@@ -365,6 +371,10 @@ async def root():
|
|
| 365 |
.stop-button:hover { background: #d32f2f; }
|
| 366 |
.stats { display: flex; gap: 20px; margin: 20px 0; }
|
| 367 |
.stat-item { background: #f0f0f0; padding: 10px; border-radius: 5px; text-align: center; flex: 1; }
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
</style>
|
| 369 |
</head>
|
| 370 |
<body>
|
|
@@ -397,8 +407,14 @@ async def root():
|
|
| 397 |
</div>
|
| 398 |
</div>
|
| 399 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
<div>
|
| 401 |
-
<button class="button" onclick="startProcessing()" id="start-btn">Start Processing</button>
|
| 402 |
<button class="button stop-button" onclick="stopProcessing()" id="stop-btn" disabled>Stop Processing</button>
|
| 403 |
<button class="button" onclick="refreshStatus()">Refresh Status</button>
|
| 404 |
</div>
|
|
@@ -410,47 +426,67 @@ async def root():
|
|
| 410 |
<script>
|
| 411 |
async function startProcessing() {
|
| 412 |
try {
|
| 413 |
-
const response = await fetch(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
const result = await response.json();
|
| 415 |
alert(result.message);
|
| 416 |
refreshStatus();
|
| 417 |
} catch (error) {
|
| 418 |
-
alert(
|
| 419 |
}
|
| 420 |
}
|
| 421 |
|
| 422 |
async function stopProcessing() {
|
| 423 |
try {
|
| 424 |
-
const response = await fetch(
|
| 425 |
const result = await response.json();
|
| 426 |
alert(result.message);
|
| 427 |
refreshStatus();
|
| 428 |
} catch (error) {
|
| 429 |
-
alert(
|
| 430 |
}
|
| 431 |
}
|
| 432 |
|
| 433 |
async function refreshStatus() {
|
| 434 |
try {
|
| 435 |
-
const response = await fetch(
|
| 436 |
const status = await response.json();
|
| 437 |
|
| 438 |
-
document.getElementById(
|
| 439 |
-
document.getElementById(
|
| 440 |
-
document.getElementById(
|
| 441 |
-
document.getElementById(
|
| 442 |
-
document.getElementById(
|
| 443 |
-
document.getElementById(
|
| 444 |
-
document.getElementById(
|
| 445 |
|
| 446 |
-
document.getElementById(
|
| 447 |
-
document.getElementById(
|
| 448 |
|
| 449 |
-
const logsDiv = document.getElementById(
|
| 450 |
-
logsDiv.innerHTML = status.logs.join(
|
| 451 |
logsDiv.scrollTop = logsDiv.scrollHeight;
|
| 452 |
} catch (error) {
|
| 453 |
-
console.error(
|
| 454 |
}
|
| 455 |
}
|
| 456 |
|
|
@@ -479,6 +515,18 @@ async def start_processing(background_tasks: BackgroundTasks):
|
|
| 479 |
background_tasks.add_task(continuous_processing)
|
| 480 |
return {"message": "Processing started"}
|
| 481 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
@app.post("/stop")
|
| 483 |
async def stop_processing():
|
| 484 |
"""Stop the processing"""
|
|
@@ -502,3 +550,4 @@ async def get_uploaded_folders():
|
|
| 502 |
if __name__ == "__main__":
|
| 503 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
| 504 |
|
|
|
|
|
|
| 7 |
import threading
|
| 8 |
import time
|
| 9 |
import hashlib
|
| 10 |
+
from typing import Dict, List, Set, Optional
|
| 11 |
+
from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
|
| 12 |
from fastapi.responses import HTMLResponse, JSONResponse
|
| 13 |
from fastapi.middleware.cors import CORSMiddleware
|
| 14 |
from huggingface_hub import HfApi, list_repo_files
|
|
|
|
| 101 |
if os.path.exists(DOWNLOAD_STATE_FILE):
|
| 102 |
try:
|
| 103 |
with open(DOWNLOAD_STATE_FILE, "r") as f:
|
| 104 |
+
return json.load(f).get("next_download_index", 0)
|
| 105 |
except json.JSONDecodeError:
|
| 106 |
log_message(f"β οΈ Warning: Could not decode {DOWNLOAD_STATE_FILE}. Starting download from index 0.")
|
| 107 |
return 0
|
|
|
|
| 129 |
with open(PROCESS_STATE_FILE, "w") as f:
|
| 130 |
json.dump({"processed_rars": list(processed_set)}, f)
|
| 131 |
|
| 132 |
+
def download_rar_files(start_index: int, chunk_size: int) -> tuple:
|
| 133 |
"""Downloads a batch of RAR files from the source dataset"""
|
| 134 |
try:
|
| 135 |
all_files = list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset", token=HF_TOKEN)
|
|
|
|
| 143 |
log_message("β
No more RAR files to download.")
|
| 144 |
return [], start_index
|
| 145 |
|
| 146 |
+
log_message(f"π₯ Downloading RAR files {start_index + 1} to {end_index} from {SOURCE_REPO_ID}")
|
| 147 |
|
| 148 |
downloaded_paths = []
|
| 149 |
for file_path_in_repo in files_to_download_metadata:
|
|
|
|
| 190 |
|
| 191 |
# Check if this folder content has already been uploaded
|
| 192 |
if folder_hash in uploaded_folders_set:
|
| 193 |
+
log_message(f"π Folder \'{folder_name}\' already uploaded (hash: {folder_hash[:8]}...), skipping.")
|
| 194 |
processed_rars_set.add(filename)
|
| 195 |
save_processed_files_state(processed_rars_set)
|
| 196 |
return True
|
|
|
|
| 251 |
save_uploaded_folders(uploaded_folders_set)
|
| 252 |
processing_status["uploaded_folders"] = len(uploaded_folders_set)
|
| 253 |
|
| 254 |
+
log_message(f"π Folder \'{folder_name}\' locked in BG2 repo (hash: {folder_hash[:8]}...)")
|
| 255 |
|
| 256 |
return True
|
| 257 |
|
|
|
|
| 275 |
except Exception as e:
|
| 276 |
log_message(f"β οΈ Could not clean up extraction folder {current_extract_folder}: {e}")
|
| 277 |
|
| 278 |
+
def continuous_processing(start_download_index: Optional[int] = None):
|
| 279 |
+
"""Main processing loop that runs continuously, with an optional starting download index"""
|
| 280 |
processing_status["is_running"] = True
|
| 281 |
log_message("π Starting continuous RAR processing...")
|
| 282 |
|
|
|
|
| 285 |
uploaded_folders = load_uploaded_folders()
|
| 286 |
processing_status["uploaded_folders"] = len(uploaded_folders)
|
| 287 |
|
| 288 |
+
if start_download_index is not None:
|
| 289 |
+
log_message(f"Starting download from index: {start_download_index}")
|
| 290 |
+
save_download_state(start_download_index) # Set download state to start from this index
|
| 291 |
+
else:
|
| 292 |
+
log_message("Starting download from saved state or beginning.")
|
| 293 |
+
|
| 294 |
while processing_status["is_running"]:
|
| 295 |
# 1. Download a batch of RAR files
|
| 296 |
download_start_index = load_download_state()
|
|
|
|
| 304 |
unprocessed_rars = [rar for rar in all_local_rars if os.path.basename(rar) not in processed_rars]
|
| 305 |
|
| 306 |
if not unprocessed_rars:
|
| 307 |
+
log_message("β
No more RAR files to download. Stopping...")
|
| 308 |
break
|
| 309 |
else:
|
| 310 |
log_message(f"π Found {len(unprocessed_rars)} unprocessed local RAR files")
|
|
|
|
| 371 |
.stop-button:hover { background: #d32f2f; }
|
| 372 |
.stats { display: flex; gap: 20px; margin: 20px 0; }
|
| 373 |
.stat-item { background: #f0f0f0; padding: 10px; border-radius: 5px; text-align: center; flex: 1; }
|
| 374 |
+
.start-form { margin-top: 20px; padding: 15px; border: 1px solid #ddd; border-radius: 5px; background: #f9f9f9; }
|
| 375 |
+
.start-form input[type="number"] { width: calc(100% - 120px); padding: 8px; margin-right: 10px; border: 1px solid #ccc; border-radius: 4px; }
|
| 376 |
+
.start-form button { padding: 8px 15px; background: #4CAF50; color: white; border: none; border-radius: 4px; cursor: pointer; }
|
| 377 |
+
.start-form button:hover { background: #45a049; }
|
| 378 |
</style>
|
| 379 |
</head>
|
| 380 |
<body>
|
|
|
|
| 407 |
</div>
|
| 408 |
</div>
|
| 409 |
|
| 410 |
+
<div class="start-form">
|
| 411 |
+
<h3>Start Processing from Specific Download Index</h3>
|
| 412 |
+
<input type="number" id="start-index-input" placeholder="Enter start index (e.g., 0)" value="0">
|
| 413 |
+
<button onclick="startProcessingWithIndex()">Start from Index</button>
|
| 414 |
+
</div>
|
| 415 |
+
|
| 416 |
<div>
|
| 417 |
+
<button class="button" onclick="startProcessing()" id="start-btn">Start Processing (from last saved index)</button>
|
| 418 |
<button class="button stop-button" onclick="stopProcessing()" id="stop-btn" disabled>Stop Processing</button>
|
| 419 |
<button class="button" onclick="refreshStatus()">Refresh Status</button>
|
| 420 |
</div>
|
|
|
|
| 426 |
<script>
|
| 427 |
async function startProcessing() {
|
| 428 |
try {
|
| 429 |
+
const response = await fetch(\"/start\", { method: \"POST\" });
|
| 430 |
+
const result = await response.json();
|
| 431 |
+
alert(result.message);
|
| 432 |
+
refreshStatus();
|
| 433 |
+
} catch (error) {
|
| 434 |
+
alert(\"Error starting processing: \" + error.message);
|
| 435 |
+
}
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
async function startProcessingWithIndex() {
|
| 439 |
+
const index = document.getElementById(\"start-index-input\").value;
|
| 440 |
+
if (index === "" || isNaN(index)) {
|
| 441 |
+
alert(\"Please enter a valid number for the start index.\");
|
| 442 |
+
return;
|
| 443 |
+
}
|
| 444 |
+
try {
|
| 445 |
+
const response = await fetch(\"/start_from_index\", {
|
| 446 |
+
method: \"POST\",
|
| 447 |
+
headers: { \"Content-Type\": \"application/x-www-form-urlencoded\" },
|
| 448 |
+
body: `start_index=${parseInt(index)}`
|
| 449 |
+
});
|
| 450 |
const result = await response.json();
|
| 451 |
alert(result.message);
|
| 452 |
refreshStatus();
|
| 453 |
} catch (error) {
|
| 454 |
+
alert(\"Error starting processing from index: \" + error.message);
|
| 455 |
}
|
| 456 |
}
|
| 457 |
|
| 458 |
async function stopProcessing() {
|
| 459 |
try {
|
| 460 |
+
const response = await fetch(\"/stop\", { method: \"POST\" });
|
| 461 |
const result = await response.json();
|
| 462 |
alert(result.message);
|
| 463 |
refreshStatus();
|
| 464 |
} catch (error) {
|
| 465 |
+
alert(\"Error stopping processing: \" + error.message);
|
| 466 |
}
|
| 467 |
}
|
| 468 |
|
| 469 |
async function refreshStatus() {
|
| 470 |
try {
|
| 471 |
+
const response = await fetch(\"/status\");
|
| 472 |
const status = await response.json();
|
| 473 |
|
| 474 |
+
document.getElementById(\"status\").textContent = status.is_running ? \"Running\" : \"Stopped\";
|
| 475 |
+
document.getElementById(\"current-file\").textContent = status.current_file || \"None\";
|
| 476 |
+
document.getElementById(\"last-update\").textContent = status.last_update || \"Never\";
|
| 477 |
+
document.getElementById(\"total-files\").textContent = status.total_files;
|
| 478 |
+
document.getElementById(\"processed-files\").textContent = status.processed_files;
|
| 479 |
+
document.getElementById(\"uploaded-folders\").textContent = status.uploaded_folders;
|
| 480 |
+
document.getElementById(\"failed-files\").textContent = status.failed_files;
|
| 481 |
|
| 482 |
+
document.getElementById(\"start-btn\").disabled = status.is_running;
|
| 483 |
+
document.getElementById(\"stop-btn\").disabled = !status.is_running;
|
| 484 |
|
| 485 |
+
const logsDiv = document.getElementById(\"logs\");
|
| 486 |
+
logsDiv.innerHTML = status.logs.join(\"<br>\");
|
| 487 |
logsDiv.scrollTop = logsDiv.scrollHeight;
|
| 488 |
} catch (error) {
|
| 489 |
+
console.error(\"Error refreshing status:\", error);
|
| 490 |
}
|
| 491 |
}
|
| 492 |
|
|
|
|
| 515 |
background_tasks.add_task(continuous_processing)
|
| 516 |
return {"message": "Processing started"}
|
| 517 |
|
| 518 |
+
@app.post("/start_from_index")
|
| 519 |
+
async def start_processing_from_index(background_tasks: BackgroundTasks, start_index: int = Form(...)):
|
| 520 |
+
"""Start the processing from a specific download index in background"""
|
| 521 |
+
if processing_status["is_running"]:
|
| 522 |
+
return {"message": "Processing is already running"}
|
| 523 |
+
|
| 524 |
+
if start_index < 0:
|
| 525 |
+
return {"message": "Start index cannot be negative."}
|
| 526 |
+
|
| 527 |
+
background_tasks.add_task(continuous_processing, start_download_index=start_index)
|
| 528 |
+
return {"message": f"Processing started from download index: {start_index}"}
|
| 529 |
+
|
| 530 |
@app.post("/stop")
|
| 531 |
async def stop_processing():
|
| 532 |
"""Stop the processing"""
|
|
|
|
| 550 |
if __name__ == "__main__":
|
| 551 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
| 552 |
|
| 553 |
+
|