Update api_server.py
Browse files- api_server.py +67 -10
api_server.py
CHANGED
|
@@ -3,7 +3,6 @@ from fastapi.responses import JSONResponse
|
|
| 3 |
import asyncio
|
| 4 |
import os
|
| 5 |
import time
|
| 6 |
-
import json
|
| 7 |
from typing import Optional, Dict, Any, List
|
| 8 |
from enum import Enum
|
| 9 |
from pydantic import BaseModel
|
|
@@ -63,6 +62,41 @@ class DownloadStatus(BaseModel):
|
|
| 63 |
downloading: Optional[str] = None
|
| 64 |
error: Optional[str] = None
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
def download_state_from_hf(token: str) -> DownloadState:
|
| 67 |
"""Try to download the state file from the HF dataset. Returns state dict or creates new."""
|
| 68 |
if not token:
|
|
@@ -79,8 +113,13 @@ def download_state_from_hf(token: str) -> DownloadState:
|
|
| 79 |
data = json.load(f)
|
| 80 |
return DownloadState(**data)
|
| 81 |
except Exception as e:
|
| 82 |
-
console.print(f"[yellow]No existing state found, creating new:[/yellow] {str(e)}")
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
async def clean_downloaded_file(file_path: str):
|
| 86 |
"""Remove local file after successful upload"""
|
|
@@ -361,13 +400,31 @@ async def run_download(channel: Optional[str], message_limit: Optional[int], tas
|
|
| 361 |
async def start_initial_download():
|
| 362 |
"""Start the download process automatically when the server starts"""
|
| 363 |
task_id = "initial_download"
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
|
| 372 |
@app.post("/download", response_model=Dict[str, str])
|
| 373 |
async def start_download(request: DownloadRequest, background_tasks: BackgroundTasks):
|
|
|
|
| 3 |
import asyncio
|
| 4 |
import os
|
| 5 |
import time
|
|
|
|
| 6 |
from typing import Optional, Dict, Any, List
|
| 7 |
from enum import Enum
|
| 8 |
from pydantic import BaseModel
|
|
|
|
| 62 |
downloading: Optional[str] = None
|
| 63 |
error: Optional[str] = None
|
| 64 |
|
| 65 |
+
def create_hf_dataset(token: str) -> bool:
|
| 66 |
+
"""Create the Hugging Face dataset if it doesn't exist."""
|
| 67 |
+
try:
|
| 68 |
+
from huggingface_hub import create_repo, RepoNotFoundError
|
| 69 |
+
try:
|
| 70 |
+
# Try to create the dataset repository
|
| 71 |
+
create_repo(
|
| 72 |
+
repo_id=download_channel.HF_REPO_ID,
|
| 73 |
+
token=token,
|
| 74 |
+
repo_type="dataset",
|
| 75 |
+
exist_ok=True
|
| 76 |
+
)
|
| 77 |
+
console.print(f"[green]Created or verified dataset:[/green] {download_channel.HF_REPO_ID}")
|
| 78 |
+
|
| 79 |
+
# Create initial state file
|
| 80 |
+
initial_state = DownloadState(channel=download_channel.CHANNEL)
|
| 81 |
+
with open(download_channel.STATE_FILE, "w", encoding="utf-8") as f:
|
| 82 |
+
json.dump(initial_state.dict(), f, indent=2, ensure_ascii=False)
|
| 83 |
+
|
| 84 |
+
# Upload initial state
|
| 85 |
+
if download_channel.upload_file_to_hf(
|
| 86 |
+
download_channel.STATE_FILE,
|
| 87 |
+
download_channel.STATE_FILE,
|
| 88 |
+
token
|
| 89 |
+
):
|
| 90 |
+
console.print("[green]Initialized dataset with empty state file[/green]")
|
| 91 |
+
return True
|
| 92 |
+
except Exception as e:
|
| 93 |
+
console.print(f"[red]Failed to create dataset:[/red] {str(e)}")
|
| 94 |
+
return False
|
| 95 |
+
except ImportError:
|
| 96 |
+
console.print("[red]huggingface_hub not properly installed[/red]")
|
| 97 |
+
return False
|
| 98 |
+
return True
|
| 99 |
+
|
| 100 |
def download_state_from_hf(token: str) -> DownloadState:
|
| 101 |
"""Try to download the state file from the HF dataset. Returns state dict or creates new."""
|
| 102 |
if not token:
|
|
|
|
| 113 |
data = json.load(f)
|
| 114 |
return DownloadState(**data)
|
| 115 |
except Exception as e:
|
| 116 |
+
console.print(f"[yellow]No existing state found, creating new dataset:[/yellow] {str(e)}")
|
| 117 |
+
if create_hf_dataset(token):
|
| 118 |
+
console.print("[green]Dataset created successfully![/green]")
|
| 119 |
+
return DownloadState(channel=download_channel.CHANNEL)
|
| 120 |
+
else:
|
| 121 |
+
console.print("[red]Failed to create dataset, using local state only[/red]")
|
| 122 |
+
return DownloadState(channel=download_channel.CHANNEL)
|
| 123 |
|
| 124 |
async def clean_downloaded_file(file_path: str):
|
| 125 |
"""Remove local file after successful upload"""
|
|
|
|
| 400 |
async def start_initial_download():
|
| 401 |
"""Start the download process automatically when the server starts"""
|
| 402 |
task_id = "initial_download"
|
| 403 |
+
|
| 404 |
+
# Verify HF token is set
|
| 405 |
+
if not download_channel.HF_TOKEN:
|
| 406 |
+
console.print("[red]ERROR: HF_TOKEN not set. Please set your Hugging Face token.[/red]")
|
| 407 |
+
return
|
| 408 |
+
|
| 409 |
+
# Create dataset structure if needed
|
| 410 |
+
console.print("[yellow]Checking Hugging Face dataset...[/yellow]")
|
| 411 |
+
try:
|
| 412 |
+
state = download_state_from_hf(download_channel.HF_TOKEN)
|
| 413 |
+
console.print(f"[green]Using channel:[/green] {state.channel}")
|
| 414 |
+
|
| 415 |
+
# Create files directory in dataset if it doesn't exist
|
| 416 |
+
os.makedirs(download_channel.OUTPUT_DIR, exist_ok=True)
|
| 417 |
+
|
| 418 |
+
# Start the download process with default settings
|
| 419 |
+
asyncio.create_task(run_download(
|
| 420 |
+
channel=None, # Use default from download_channel.py
|
| 421 |
+
message_limit=None, # Use default
|
| 422 |
+
task_id=task_id
|
| 423 |
+
))
|
| 424 |
+
console.print(f"[green]Started initial download task:[/green] {task_id}")
|
| 425 |
+
|
| 426 |
+
except Exception as e:
|
| 427 |
+
console.print(f"[red]Failed to initialize:[/red] {str(e)}")
|
| 428 |
|
| 429 |
@app.post("/download", response_model=Dict[str, str])
|
| 430 |
async def start_download(request: DownloadRequest, background_tasks: BackgroundTasks):
|