Fred808 commited on
Commit
f77e180
·
verified ·
1 Parent(s): 55f649b

Update api_server.py

Browse files
Files changed (1) hide show
  1. api_server.py +67 -10
api_server.py CHANGED
@@ -3,7 +3,6 @@ from fastapi.responses import JSONResponse
3
  import asyncio
4
  import os
5
  import time
6
- import json
7
  from typing import Optional, Dict, Any, List
8
  from enum import Enum
9
  from pydantic import BaseModel
@@ -63,6 +62,41 @@ class DownloadStatus(BaseModel):
63
  downloading: Optional[str] = None
64
  error: Optional[str] = None
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def download_state_from_hf(token: str) -> DownloadState:
67
  """Try to download the state file from the HF dataset. Returns state dict or creates new."""
68
  if not token:
@@ -79,8 +113,13 @@ def download_state_from_hf(token: str) -> DownloadState:
79
  data = json.load(f)
80
  return DownloadState(**data)
81
  except Exception as e:
82
- console.print(f"[yellow]No existing state found, creating new:[/yellow] {str(e)}")
83
- return DownloadState(channel=download_channel.CHANNEL)
 
 
 
 
 
84
 
85
  async def clean_downloaded_file(file_path: str):
86
  """Remove local file after successful upload"""
@@ -361,13 +400,31 @@ async def run_download(channel: Optional[str], message_limit: Optional[int], tas
361
  async def start_initial_download():
362
  """Start the download process automatically when the server starts"""
363
  task_id = "initial_download"
364
- # Start the download process with default settings
365
- asyncio.create_task(run_download(
366
- channel=None, # Use default from download_channel.py
367
- message_limit=None, # Use default
368
- task_id=task_id
369
- ))
370
- console.print(f"[green]Started initial download task:[/green] {task_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
 
372
  @app.post("/download", response_model=Dict[str, str])
373
  async def start_download(request: DownloadRequest, background_tasks: BackgroundTasks):
 
3
  import asyncio
4
  import os
5
  import time
 
6
  from typing import Optional, Dict, Any, List
7
  from enum import Enum
8
  from pydantic import BaseModel
 
62
  downloading: Optional[str] = None
63
  error: Optional[str] = None
64
 
65
+ def create_hf_dataset(token: str) -> bool:
66
+ """Create the Hugging Face dataset if it doesn't exist."""
67
+ try:
68
+ from huggingface_hub import create_repo, RepoNotFoundError
69
+ try:
70
+ # Try to create the dataset repository
71
+ create_repo(
72
+ repo_id=download_channel.HF_REPO_ID,
73
+ token=token,
74
+ repo_type="dataset",
75
+ exist_ok=True
76
+ )
77
+ console.print(f"[green]Created or verified dataset:[/green] {download_channel.HF_REPO_ID}")
78
+
79
+ # Create initial state file
80
+ initial_state = DownloadState(channel=download_channel.CHANNEL)
81
+ with open(download_channel.STATE_FILE, "w", encoding="utf-8") as f:
82
+ json.dump(initial_state.dict(), f, indent=2, ensure_ascii=False)
83
+
84
+ # Upload initial state
85
+ if download_channel.upload_file_to_hf(
86
+ download_channel.STATE_FILE,
87
+ download_channel.STATE_FILE,
88
+ token
89
+ ):
90
+ console.print("[green]Initialized dataset with empty state file[/green]")
91
+ return True
92
+ except Exception as e:
93
+ console.print(f"[red]Failed to create dataset:[/red] {str(e)}")
94
+ return False
95
+ except ImportError:
96
+ console.print("[red]huggingface_hub not properly installed[/red]")
97
+ return False
98
+ return True
99
+
100
  def download_state_from_hf(token: str) -> DownloadState:
101
  """Try to download the state file from the HF dataset. Returns state dict or creates new."""
102
  if not token:
 
113
  data = json.load(f)
114
  return DownloadState(**data)
115
  except Exception as e:
116
+ console.print(f"[yellow]No existing state found, creating new dataset:[/yellow] {str(e)}")
117
+ if create_hf_dataset(token):
118
+ console.print("[green]Dataset created successfully![/green]")
119
+ return DownloadState(channel=download_channel.CHANNEL)
120
+ else:
121
+ console.print("[red]Failed to create dataset, using local state only[/red]")
122
+ return DownloadState(channel=download_channel.CHANNEL)
123
 
124
  async def clean_downloaded_file(file_path: str):
125
  """Remove local file after successful upload"""
 
400
  async def start_initial_download():
401
  """Start the download process automatically when the server starts"""
402
  task_id = "initial_download"
403
+
404
+ # Verify HF token is set
405
+ if not download_channel.HF_TOKEN:
406
+ console.print("[red]ERROR: HF_TOKEN not set. Please set your Hugging Face token.[/red]")
407
+ return
408
+
409
+ # Create dataset structure if needed
410
+ console.print("[yellow]Checking Hugging Face dataset...[/yellow]")
411
+ try:
412
+ state = download_state_from_hf(download_channel.HF_TOKEN)
413
+ console.print(f"[green]Using channel:[/green] {state.channel}")
414
+
415
+ # Create files directory in dataset if it doesn't exist
416
+ os.makedirs(download_channel.OUTPUT_DIR, exist_ok=True)
417
+
418
+ # Start the download process with default settings
419
+ asyncio.create_task(run_download(
420
+ channel=None, # Use default from download_channel.py
421
+ message_limit=None, # Use default
422
+ task_id=task_id
423
+ ))
424
+ console.print(f"[green]Started initial download task:[/green] {task_id}")
425
+
426
+ except Exception as e:
427
+ console.print(f"[red]Failed to initialize:[/red] {str(e)}")
428
 
429
  @app.post("/download", response_model=Dict[str, str])
430
  async def start_download(request: DownloadRequest, background_tasks: BackgroundTasks):