File size: 7,201 Bytes
2de604e
 
c32cd59
2de604e
 
 
c32cd59
2de604e
252c8e0
c32cd59
2de604e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c32cd59
2de604e
 
 
c32cd59
2de604e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c32cd59
2de604e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c32cd59
2de604e
 
 
 
 
 
 
 
 
 
 
c32cd59
2de604e
 
 
 
 
 
 
 
 
 
 
c32cd59
2de604e
 
252c8e0
c32cd59
 
2de604e
 
 
 
 
c32cd59
2de604e
c32cd59
252c8e0
2de604e
 
 
c32cd59
2de604e
 
252c8e0
c32cd59
 
2de604e
 
c32cd59
2de604e
 
 
 
 
 
c32cd59
2de604e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c32cd59
2de604e
c32cd59
2de604e
 
 
 
 
 
 
c32cd59
 
2de604e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import httpx
import asyncio
import os
import uuid
from pathlib import Path
from typing import Optional, Dict, Any
import json
from datetime import datetime
import aiofiles

class MiddlewareClient:
    def __init__(self, base_url: str = "https://fred808-vssee.hf.space"):
        self.base_url = base_url.rstrip('/')
        self.client = httpx.AsyncClient(timeout=30.0)  # 30 second timeout
        self.requester_id = str(uuid.uuid4())  # Unique ID for this client
        self.download_dir = Path("downloads")
        self.download_dir.mkdir(exist_ok=True)
        
        # Keep track of our current locks
        self.current_course: Optional[str] = None
        self.current_image: Optional[str] = None
        
        # Statistics
        self.stats = {
            "downloads_started": 0,
            "downloads_completed": 0,
            "bytes_downloaded": 0,
            "start_time": datetime.now().isoformat()
        }

    async def close(self):
        """Close the HTTP client"""
        await self.client.aclose()

    async def get_next_course(self) -> Optional[Dict[str, Any]]:
        """Get next available course"""
        try:
            response = await self.client.get(
                f"{self.base_url}/middleware/next/course",
                params={"requester_id": self.requester_id}
            )
            response.raise_for_status()
            course_data = response.json()
            self.current_course = course_data["course_id"]
            return course_data
        except httpx.HTTPError as e:
            if e.response.status_code == 404:
                print("No more courses available")
                return None
            raise

    async def get_next_image(self, course_id: str) -> Optional[Dict[str, Any]]:
        """Get next available image from a course"""
        try:
            response = await self.client.get(
                f"{self.base_url}/middleware/next/image/{course_id}",
                params={"requester_id": self.requester_id}
            )
            response.raise_for_status()
            image_data = response.json()
            self.current_image = image_data["file_id"]
            return image_data
        except httpx.HTTPError as e:
            if e.response.status_code == 404:
                print(f"No more images available in course {course_id}")
                return None
            raise

    async def release_course(self, course_id: str):
        """Release lock on a course"""
        try:
            response = await self.client.post(
                f"{self.base_url}/middleware/release/course/{course_id}",
                params={"requester_id": self.requester_id}
            )
            response.raise_for_status()
            self.current_course = None
        except httpx.HTTPError as e:
            print(f"Error releasing course {course_id}: {e}")

    async def release_image(self, course_id: str, file_id: str):
        """Release lock on an image"""
        try:
            response = await self.client.post(
                f"{self.base_url}/middleware/release/image/{course_id}/{file_id}",
                params={"requester_id": self.requester_id}
            )
            response.raise_for_status()
            self.current_image = None
        except httpx.HTTPError as e:
            print(f"Error releasing image {file_id}: {e}")

    async def download_file(self, course: str, file_id: str) -> bool:
        save_path = self.download_dir / course / file_id
        save_path.parent.mkdir(parents=True, exist_ok=True)
        
        try:
            response = await self.client.get(
                f"{self.base_url}/download",
                params={"course": course, "file": file_id}
            )
            response.raise_for_status()
            
            self.stats["downloads_started"] += 1
            
            async with aiofiles.open(save_path, 'wb') as f:
                async for chunk in response.aiter_bytes():
                    await f.write(chunk)
                    self.stats["bytes_downloaded"] += len(chunk)
            
            self.stats["downloads_completed"] += 1
            return True
            return True
            
        except Exception as e:
            print(f"Error downloading {file_id}: {e}")
            return False

    def save_stats(self):
        """Save download statistics"""
        self.stats["end_time"] = datetime.now().isoformat()
        stats_file = self.download_dir / "download_stats.json"
        with open(stats_file, 'w') as f:
            json.dump(self.stats, f, indent=2)

    async def download_all(self, max_courses: int = None, max_files: int = None):
        """Download all available files with optional limits"""
        try:
            courses_processed = 0
            files_downloaded = 0
            
            while True:
                if max_courses and courses_processed >= max_courses:
                    print(f"Reached maximum courses limit ({max_courses})")
                    break
                    
                course_data = await self.get_next_course()
                if not course_data:
                    print("No more courses available")
                    break
                
                course_id = course_data["course_id"]
                print(f"\nProcessing course: {course_id}")
                courses_processed += 1
                
                course_files = 0
                while True:
                    if max_files and files_downloaded >= max_files:
                        print(f"Reached maximum files limit ({max_files})")
                        break
                        
                    image_data = await self.get_next_image(course_id)
                    if not image_data:
                        break
                    
                    file_id = image_data["file_id"]
                    print(f"Downloading: {file_id}")
                    
                    if await self.download_file(course_id, file_id):
                        files_downloaded += 1
                        course_files += 1
                        print(f"Successfully downloaded: {file_id}")
                    
                    await self.release_image(course_id, file_id)
                
                print(f"Completed course {course_id} - Downloaded {course_files} files")
                await self.release_course(course_id)
            
            print(f"\nDownload complete!")
            print(f"Processed {courses_processed} courses")
            print(f"Downloaded {files_downloaded} files")
            print(f"Total bytes: {self.stats['bytes_downloaded']:,}")
            
        finally:
            self.save_stats()
            await self.close()

async def main():
    # Create downloads directory
    Path("downloads").mkdir(exist_ok=True)
    
    client = MiddlewareClient()
    
    try:
        # Download 2 courses with up to 5 files each as an example
        await client.download_all(max_courses=2, max_files=10)
    except KeyboardInterrupt:
        print("\nDownload interrupted by user")
    finally:
        await client.close()

if __name__ == "__main__":
    asyncio.run(main())