factorstudios commited on
Commit
058701f
Β·
verified Β·
1 Parent(s): c5d6c28

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +32 -0
  2. main.py +142 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python base image
2
+ FROM python:3.10-slim
3
+
4
+ # Set environment variables
5
+ ENV PYTHONDONTWRITEBYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
+
8
+ # Create app directory
9
+ WORKDIR /app
10
+
11
+ # Install system dependencies
12
+ RUN apt-get update && apt-get install -y \
13
+ curl \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Copy requirements and install Python packages
17
+ COPY requirements.txt .
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy the rest of the code
21
+ COPY . .
22
+
23
+ # Make everything in /app world-writable
24
+ RUN chmod -R 777 /app
25
+
26
+ # Ensure /tmp is writable too (common scratch space)
27
+ RUN chmod -R 777 /tmp
28
+
29
+ # Default command to run the script (change as needed)
30
+ EXPOSE 8000
31
+
32
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
main.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import requests
4
+ import zipfile
5
+ import shutil
6
+ from urllib.parse import urlparse
7
+ from huggingface_hub import upload_file
8
+ from fastapi import FastAPI
9
+ from contextlib import asynccontextmanager
10
+ import asyncio
11
+ import logging
12
+
13
+ # === CONFIGURATION ===
14
+ HF_TOKEN = os.environ.get("HF_TOKEN")
15
+ REPO_ID = "factorstudios/Pipeline"
16
+ DATA_PATH = "Blenders"
17
+ OUTPUT_DIR = "batch_downloads"
18
+ DOWNLOAD_URLS = [
19
+ "https://ww6.zeroupload.xyz/a852d5fdd63835ba196a48462b2759af/Coloso_BlChScul_DownloadPirate.com.rar?download_token=ca9428d28e5ea75bf0a412ea2b50ce3410d2df8fdaf9073b555704f4b42ba98f"
20
+ ]
21
+ DELAY_BETWEEN_DOWNLOADS = 12 # seconds
22
+
23
+ # === Setup Logging ===
24
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
25
+
26
+ # === Prepare output folder ===
27
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
28
+
29
+ app = FastAPI()
30
+
31
+ # === DUMMY ROUTE TO KEEP SERVER HEALTHY ===
32
+ @app.get("/")
33
+ def keep_alive():
34
+ return {"status": "running"}
35
+
36
+ # === Upload Function ===
37
+ def upload_to_dataset(filepath):
38
+ try:
39
+ upload_file(
40
+ path_or_fileobj=filepath,
41
+ path_in_repo=f"{DATA_PATH}/{os.path.basename(filepath)}",
42
+ repo_id=REPO_ID,
43
+ repo_type="dataset",
44
+ token=HF_TOKEN
45
+ )
46
+ logging.info(f"[↑] Uploaded: {filepath}")
47
+ except Exception as e:
48
+ logging.error(f"[!] Upload failed: {filepath} β€” {e}")
49
+
50
+ # === Upload Directory Contents ===
51
+ def upload_directory_contents(directory):
52
+ try:
53
+ for root, dirs, files in os.walk(directory):
54
+ for file in files:
55
+ filepath = os.path.join(root, file)
56
+ relative_path = os.path.relpath(filepath, directory)
57
+ upload_file(
58
+ path_or_fileobj=filepath,
59
+ path_in_repo=f"{DATA_PATH}/{relative_path}",
60
+ repo_id=REPO_ID,
61
+ repo_type="dataset",
62
+ token=HF_TOKEN
63
+ )
64
+ logging.info(f"[↑] Uploaded: {relative_path}")
65
+ except Exception as e:
66
+ logging.error(f"[!] Upload directory failed: {directory} β€” {e}")
67
+
68
+ # === Background Worker ===
69
+ async def downloader_worker():
70
+ for direct_download_link in DOWNLOAD_URLS:
71
+ logging.info("[*] Waiting before next download...")
72
+ await asyncio.sleep(DELAY_BETWEEN_DOWNLOADS)
73
+
74
+ try:
75
+ logging.info(f"[*] Downloading from: {direct_download_link}")
76
+ filename = os.path.basename(urlparse(direct_download_link).path)
77
+ if not filename or "." not in filename:
78
+ filename = "downloaded_file_" + str(int(time.time()))
79
+
80
+ local_path = os.path.join(OUTPUT_DIR, filename)
81
+ logging.info(f"[*] Saving to: {local_path}")
82
+
83
+ with requests.get(direct_download_link, stream=True) as r:
84
+ r.raise_for_status()
85
+ with open(local_path, "wb") as f:
86
+ for chunk in r.iter_content(chunk_size=8192):
87
+ f.write(chunk)
88
+
89
+ logging.info(f"[βœ“] Downloaded: {filename}")
90
+
91
+ # Check if file is a zip file
92
+ if filename.lower().endswith('.zip'):
93
+ logging.info(f"[*] Extracting zip file: {filename}")
94
+ extract_dir = os.path.join(OUTPUT_DIR, os.path.splitext(filename)[0])
95
+ os.makedirs(extract_dir, exist_ok=True)
96
+
97
+ try:
98
+ with zipfile.ZipFile(local_path, 'r') as zip_ref:
99
+ zip_ref.extractall(extract_dir)
100
+ logging.info(f"[βœ“] Extracted to: {extract_dir}")
101
+
102
+ # Upload all extracted contents
103
+ upload_directory_contents(extract_dir)
104
+
105
+ # Cleanup
106
+ shutil.rmtree(extract_dir)
107
+ os.remove(local_path)
108
+ logging.info(f"[βœ“] Cleaned up extracted files and zip")
109
+ except zipfile.BadZipFile:
110
+ logging.error(f"[!] Invalid zip file: {filename}")
111
+ os.remove(local_path)
112
+ else:
113
+ # If not a zip, upload directly as before
114
+ upload_to_dataset(local_path)
115
+ os.remove(local_path)
116
+
117
+ except Exception as e:
118
+ logging.error(f"[!] Error with {direct_download_link}: {e}")
119
+
120
+ logging.info("βœ… All files processed.")
121
+
122
+ @app.get("/")
123
+ def stay_alive():
124
+ return {"msg": "Running"}
125
+
126
+ @app.get("/health")
127
+ def healthcheck():
128
+ return {"healthy": True}
129
+
130
+ # === FastAPI Lifespan ===
131
+ @asynccontextmanager
132
+ async def lifespan(app: FastAPI):
133
+ logging.info("πŸš€ Starting FastAPI download-uploader microservice...")
134
+ task = asyncio.create_task(downloader_worker())
135
+ yield
136
+ task.cancel()
137
+ logging.info("πŸ›‘ Shutting down microservice.")
138
+
139
+ # === FastAPI App ===
140
+ app = FastAPI(lifespan=lifespan)
141
+ # Re-assign app with lifespan logic
142
+ app.router.lifespan_context = lifespan
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ requests
2
+ huggingface_hub
3
+ bs4
4
+ fastapi
5
+ uvicorn