|
|
import httpx |
|
|
import asyncio |
|
|
import os |
|
|
import uuid |
|
|
from pathlib import Path |
|
|
from typing import Optional, Dict, Any |
|
|
import json |
|
|
from datetime import datetime |
|
|
import aiofiles |
|
|
|
|
|
class MiddlewareClient: |
|
|
def __init__(self, base_url: str = "https://fred808-vssee.hf.space"): |
|
|
self.base_url = base_url.rstrip('/') |
|
|
self.client = httpx.AsyncClient(timeout=30.0) |
|
|
self.requester_id = str(uuid.uuid4()) |
|
|
self.download_dir = Path("downloads") |
|
|
self.download_dir.mkdir(exist_ok=True) |
|
|
|
|
|
|
|
|
self.current_course: Optional[str] = None |
|
|
self.current_image: Optional[str] = None |
|
|
|
|
|
|
|
|
self.stats = { |
|
|
"downloads_started": 0, |
|
|
"downloads_completed": 0, |
|
|
"bytes_downloaded": 0, |
|
|
"start_time": datetime.now().isoformat() |
|
|
} |
|
|
|
|
|
async def close(self): |
|
|
"""Close the HTTP client""" |
|
|
await self.client.aclose() |
|
|
|
|
|
async def get_next_course(self) -> Optional[Dict[str, Any]]: |
|
|
"""Get next available course""" |
|
|
try: |
|
|
response = await self.client.get( |
|
|
f"{self.base_url}/middleware/next/course", |
|
|
params={"requester_id": self.requester_id} |
|
|
) |
|
|
response.raise_for_status() |
|
|
course_data = response.json() |
|
|
self.current_course = course_data["course_id"] |
|
|
return course_data |
|
|
except httpx.HTTPError as e: |
|
|
if e.response.status_code == 404: |
|
|
print("No more courses available") |
|
|
return None |
|
|
raise |
|
|
|
|
|
async def get_next_image(self, course_id: str) -> Optional[Dict[str, Any]]: |
|
|
"""Get next available image from a course""" |
|
|
try: |
|
|
response = await self.client.get( |
|
|
f"{self.base_url}/middleware/next/image/{course_id}", |
|
|
params={"requester_id": self.requester_id} |
|
|
) |
|
|
response.raise_for_status() |
|
|
image_data = response.json() |
|
|
self.current_image = image_data["file_id"] |
|
|
return image_data |
|
|
except httpx.HTTPError as e: |
|
|
if e.response.status_code == 404: |
|
|
print(f"No more images available in course {course_id}") |
|
|
return None |
|
|
raise |
|
|
|
|
|
async def release_course(self, course_id: str): |
|
|
"""Release lock on a course""" |
|
|
try: |
|
|
response = await self.client.post( |
|
|
f"{self.base_url}/middleware/release/course/{course_id}", |
|
|
params={"requester_id": self.requester_id} |
|
|
) |
|
|
response.raise_for_status() |
|
|
self.current_course = None |
|
|
except httpx.HTTPError as e: |
|
|
print(f"Error releasing course {course_id}: {e}") |
|
|
|
|
|
async def release_image(self, course_id: str, file_id: str): |
|
|
"""Release lock on an image""" |
|
|
try: |
|
|
response = await self.client.post( |
|
|
f"{self.base_url}/middleware/release/image/{course_id}/{file_id}", |
|
|
params={"requester_id": self.requester_id} |
|
|
) |
|
|
response.raise_for_status() |
|
|
self.current_image = None |
|
|
except httpx.HTTPError as e: |
|
|
print(f"Error releasing image {file_id}: {e}") |
|
|
|
|
|
async def download_file(self, course: str, file_id: str) -> bool: |
|
|
save_path = self.download_dir / course / file_id |
|
|
save_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
try: |
|
|
response = await self.client.get( |
|
|
f"{self.base_url}/download", |
|
|
params={"course": course, "file": file_id} |
|
|
) |
|
|
response.raise_for_status() |
|
|
|
|
|
self.stats["downloads_started"] += 1 |
|
|
|
|
|
async with aiofiles.open(save_path, 'wb') as f: |
|
|
async for chunk in response.aiter_bytes(): |
|
|
await f.write(chunk) |
|
|
self.stats["bytes_downloaded"] += len(chunk) |
|
|
|
|
|
self.stats["downloads_completed"] += 1 |
|
|
return True |
|
|
return True |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error downloading {file_id}: {e}") |
|
|
return False |
|
|
|
|
|
def save_stats(self): |
|
|
"""Save download statistics""" |
|
|
self.stats["end_time"] = datetime.now().isoformat() |
|
|
stats_file = self.download_dir / "download_stats.json" |
|
|
with open(stats_file, 'w') as f: |
|
|
json.dump(self.stats, f, indent=2) |
|
|
|
|
|
async def download_all(self, max_courses: int = None, max_files: int = None): |
|
|
"""Download all available files with optional limits""" |
|
|
try: |
|
|
courses_processed = 0 |
|
|
files_downloaded = 0 |
|
|
|
|
|
while True: |
|
|
if max_courses and courses_processed >= max_courses: |
|
|
print(f"Reached maximum courses limit ({max_courses})") |
|
|
break |
|
|
|
|
|
course_data = await self.get_next_course() |
|
|
if not course_data: |
|
|
print("No more courses available") |
|
|
break |
|
|
|
|
|
course_id = course_data["course_id"] |
|
|
print(f"\nProcessing course: {course_id}") |
|
|
courses_processed += 1 |
|
|
|
|
|
course_files = 0 |
|
|
while True: |
|
|
if max_files and files_downloaded >= max_files: |
|
|
print(f"Reached maximum files limit ({max_files})") |
|
|
break |
|
|
|
|
|
image_data = await self.get_next_image(course_id) |
|
|
if not image_data: |
|
|
break |
|
|
|
|
|
file_id = image_data["file_id"] |
|
|
print(f"Downloading: {file_id}") |
|
|
|
|
|
if await self.download_file(course_id, file_id): |
|
|
files_downloaded += 1 |
|
|
course_files += 1 |
|
|
print(f"Successfully downloaded: {file_id}") |
|
|
|
|
|
await self.release_image(course_id, file_id) |
|
|
|
|
|
print(f"Completed course {course_id} - Downloaded {course_files} files") |
|
|
await self.release_course(course_id) |
|
|
|
|
|
print(f"\nDownload complete!") |
|
|
print(f"Processed {courses_processed} courses") |
|
|
print(f"Downloaded {files_downloaded} files") |
|
|
print(f"Total bytes: {self.stats['bytes_downloaded']:,}") |
|
|
|
|
|
finally: |
|
|
self.save_stats() |
|
|
await self.close() |
|
|
|
|
|
async def main(): |
|
|
|
|
|
Path("downloads").mkdir(exist_ok=True) |
|
|
|
|
|
client = MiddlewareClient() |
|
|
|
|
|
try: |
|
|
|
|
|
await client.download_all(max_courses=2, max_files=10) |
|
|
except KeyboardInterrupt: |
|
|
print("\nDownload interrupted by user") |
|
|
finally: |
|
|
await client.close() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
asyncio.run(main()) |