appt / app.py
Fred808's picture
Update app.py
252c8e0 verified
import httpx
import asyncio
import os
import uuid
from pathlib import Path
from typing import Optional, Dict, Any
import json
from datetime import datetime
import aiofiles
class MiddlewareClient:
def __init__(self, base_url: str = "https://fred808-vssee.hf.space"):
self.base_url = base_url.rstrip('/')
self.client = httpx.AsyncClient(timeout=30.0) # 30 second timeout
self.requester_id = str(uuid.uuid4()) # Unique ID for this client
self.download_dir = Path("downloads")
self.download_dir.mkdir(exist_ok=True)
# Keep track of our current locks
self.current_course: Optional[str] = None
self.current_image: Optional[str] = None
# Statistics
self.stats = {
"downloads_started": 0,
"downloads_completed": 0,
"bytes_downloaded": 0,
"start_time": datetime.now().isoformat()
}
async def close(self):
"""Close the HTTP client"""
await self.client.aclose()
async def get_next_course(self) -> Optional[Dict[str, Any]]:
"""Get next available course"""
try:
response = await self.client.get(
f"{self.base_url}/middleware/next/course",
params={"requester_id": self.requester_id}
)
response.raise_for_status()
course_data = response.json()
self.current_course = course_data["course_id"]
return course_data
except httpx.HTTPError as e:
if e.response.status_code == 404:
print("No more courses available")
return None
raise
async def get_next_image(self, course_id: str) -> Optional[Dict[str, Any]]:
"""Get next available image from a course"""
try:
response = await self.client.get(
f"{self.base_url}/middleware/next/image/{course_id}",
params={"requester_id": self.requester_id}
)
response.raise_for_status()
image_data = response.json()
self.current_image = image_data["file_id"]
return image_data
except httpx.HTTPError as e:
if e.response.status_code == 404:
print(f"No more images available in course {course_id}")
return None
raise
async def release_course(self, course_id: str):
"""Release lock on a course"""
try:
response = await self.client.post(
f"{self.base_url}/middleware/release/course/{course_id}",
params={"requester_id": self.requester_id}
)
response.raise_for_status()
self.current_course = None
except httpx.HTTPError as e:
print(f"Error releasing course {course_id}: {e}")
async def release_image(self, course_id: str, file_id: str):
"""Release lock on an image"""
try:
response = await self.client.post(
f"{self.base_url}/middleware/release/image/{course_id}/{file_id}",
params={"requester_id": self.requester_id}
)
response.raise_for_status()
self.current_image = None
except httpx.HTTPError as e:
print(f"Error releasing image {file_id}: {e}")
async def download_file(self, course: str, file_id: str) -> bool:
save_path = self.download_dir / course / file_id
save_path.parent.mkdir(parents=True, exist_ok=True)
try:
response = await self.client.get(
f"{self.base_url}/download",
params={"course": course, "file": file_id}
)
response.raise_for_status()
self.stats["downloads_started"] += 1
async with aiofiles.open(save_path, 'wb') as f:
async for chunk in response.aiter_bytes():
await f.write(chunk)
self.stats["bytes_downloaded"] += len(chunk)
self.stats["downloads_completed"] += 1
return True
return True
except Exception as e:
print(f"Error downloading {file_id}: {e}")
return False
def save_stats(self):
"""Save download statistics"""
self.stats["end_time"] = datetime.now().isoformat()
stats_file = self.download_dir / "download_stats.json"
with open(stats_file, 'w') as f:
json.dump(self.stats, f, indent=2)
async def download_all(self, max_courses: int = None, max_files: int = None):
"""Download all available files with optional limits"""
try:
courses_processed = 0
files_downloaded = 0
while True:
if max_courses and courses_processed >= max_courses:
print(f"Reached maximum courses limit ({max_courses})")
break
course_data = await self.get_next_course()
if not course_data:
print("No more courses available")
break
course_id = course_data["course_id"]
print(f"\nProcessing course: {course_id}")
courses_processed += 1
course_files = 0
while True:
if max_files and files_downloaded >= max_files:
print(f"Reached maximum files limit ({max_files})")
break
image_data = await self.get_next_image(course_id)
if not image_data:
break
file_id = image_data["file_id"]
print(f"Downloading: {file_id}")
if await self.download_file(course_id, file_id):
files_downloaded += 1
course_files += 1
print(f"Successfully downloaded: {file_id}")
await self.release_image(course_id, file_id)
print(f"Completed course {course_id} - Downloaded {course_files} files")
await self.release_course(course_id)
print(f"\nDownload complete!")
print(f"Processed {courses_processed} courses")
print(f"Downloaded {files_downloaded} files")
print(f"Total bytes: {self.stats['bytes_downloaded']:,}")
finally:
self.save_stats()
await self.close()
async def main():
# Create downloads directory
Path("downloads").mkdir(exist_ok=True)
client = MiddlewareClient()
try:
# Download 2 courses with up to 5 files each as an example
await client.download_all(max_courses=2, max_files=10)
except KeyboardInterrupt:
print("\nDownload interrupted by user")
finally:
await client.close()
if __name__ == "__main__":
asyncio.run(main())