import httpx import asyncio import os import uuid from pathlib import Path from typing import Optional, Dict, Any import json from datetime import datetime import aiofiles class MiddlewareClient: def __init__(self, base_url: str = "https://fred808-vssee.hf.space"): self.base_url = base_url.rstrip('/') self.client = httpx.AsyncClient(timeout=30.0) # 30 second timeout self.requester_id = str(uuid.uuid4()) # Unique ID for this client self.download_dir = Path("downloads") self.download_dir.mkdir(exist_ok=True) # Keep track of our current locks self.current_course: Optional[str] = None self.current_image: Optional[str] = None # Statistics self.stats = { "downloads_started": 0, "downloads_completed": 0, "bytes_downloaded": 0, "start_time": datetime.now().isoformat() } async def close(self): """Close the HTTP client""" await self.client.aclose() async def get_next_course(self) -> Optional[Dict[str, Any]]: """Get next available course""" try: response = await self.client.get( f"{self.base_url}/middleware/next/course", params={"requester_id": self.requester_id} ) response.raise_for_status() course_data = response.json() self.current_course = course_data["course_id"] return course_data except httpx.HTTPError as e: if e.response.status_code == 404: print("No more courses available") return None raise async def get_next_image(self, course_id: str) -> Optional[Dict[str, Any]]: """Get next available image from a course""" try: response = await self.client.get( f"{self.base_url}/middleware/next/image/{course_id}", params={"requester_id": self.requester_id} ) response.raise_for_status() image_data = response.json() self.current_image = image_data["file_id"] return image_data except httpx.HTTPError as e: if e.response.status_code == 404: print(f"No more images available in course {course_id}") return None raise async def release_course(self, course_id: str): """Release lock on a course""" try: response = await self.client.post( f"{self.base_url}/middleware/release/course/{course_id}", params={"requester_id": self.requester_id} ) response.raise_for_status() self.current_course = None except httpx.HTTPError as e: print(f"Error releasing course {course_id}: {e}") async def release_image(self, course_id: str, file_id: str): """Release lock on an image""" try: response = await self.client.post( f"{self.base_url}/middleware/release/image/{course_id}/{file_id}", params={"requester_id": self.requester_id} ) response.raise_for_status() self.current_image = None except httpx.HTTPError as e: print(f"Error releasing image {file_id}: {e}") async def download_file(self, course: str, file_id: str) -> bool: save_path = self.download_dir / course / file_id save_path.parent.mkdir(parents=True, exist_ok=True) try: response = await self.client.get( f"{self.base_url}/download", params={"course": course, "file": file_id} ) response.raise_for_status() self.stats["downloads_started"] += 1 async with aiofiles.open(save_path, 'wb') as f: async for chunk in response.aiter_bytes(): await f.write(chunk) self.stats["bytes_downloaded"] += len(chunk) self.stats["downloads_completed"] += 1 return True return True except Exception as e: print(f"Error downloading {file_id}: {e}") return False def save_stats(self): """Save download statistics""" self.stats["end_time"] = datetime.now().isoformat() stats_file = self.download_dir / "download_stats.json" with open(stats_file, 'w') as f: json.dump(self.stats, f, indent=2) async def download_all(self, max_courses: int = None, max_files: int = None): """Download all available files with optional limits""" try: courses_processed = 0 files_downloaded = 0 while True: if max_courses and courses_processed >= max_courses: print(f"Reached maximum courses limit ({max_courses})") break course_data = await self.get_next_course() if not course_data: print("No more courses available") break course_id = course_data["course_id"] print(f"\nProcessing course: {course_id}") courses_processed += 1 course_files = 0 while True: if max_files and files_downloaded >= max_files: print(f"Reached maximum files limit ({max_files})") break image_data = await self.get_next_image(course_id) if not image_data: break file_id = image_data["file_id"] print(f"Downloading: {file_id}") if await self.download_file(course_id, file_id): files_downloaded += 1 course_files += 1 print(f"Successfully downloaded: {file_id}") await self.release_image(course_id, file_id) print(f"Completed course {course_id} - Downloaded {course_files} files") await self.release_course(course_id) print(f"\nDownload complete!") print(f"Processed {courses_processed} courses") print(f"Downloaded {files_downloaded} files") print(f"Total bytes: {self.stats['bytes_downloaded']:,}") finally: self.save_stats() await self.close() async def main(): # Create downloads directory Path("downloads").mkdir(exist_ok=True) client = MiddlewareClient() try: # Download 2 courses with up to 5 files each as an example await client.download_all(max_courses=2, max_files=10) except KeyboardInterrupt: print("\nDownload interrupted by user") finally: await client.close() if __name__ == "__main__": asyncio.run(main())