Ark-kun commited on
Commit
6439a6b
·
1 Parent(s): 37e8b85

Added Dockerfile and start_HuggingFace.py with launcher=None

Browse files
Files changed (2) hide show
  1. Dockerfile +76 -0
  2. start_HuggingFace.py +285 -0
Dockerfile ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a Python image with uv pre-installed
2
+ FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
3
+
4
+ # Creating the /data directory and giving full access to users to avoid the errors:
5
+ # --> RUN mkdir -p /data
6
+ # mkdir: cannot create directory ‘/data’: Permission denied
7
+ RUN mkdir -p /data
8
+ RUN chmod 777 /data
9
+
10
+ # # Setup a non-root user
11
+ # RUN groupadd --system --gid 999 nonroot \
12
+ # && useradd --system --gid 999 --uid 999 --create-home nonroot
13
+
14
+ # The two following lines are requirements for the Dev Mode to be functional
15
+ # Learn more about the Dev Mode at https://huggingface.co/dev-mode-explorers
16
+ RUN useradd -m -u 1000 user
17
+ USER user
18
+
19
+ # Install the project into `/app`
20
+ WORKDIR /app/backend
21
+
22
+ # Enable bytecode compilation
23
+ ENV UV_COMPILE_BYTECODE=1
24
+
25
+ # Copy from the cache instead of linking since it's a mounted volume
26
+ ENV UV_LINK_MODE=copy
27
+
28
+ # Ensure installed tools can be executed out of the box
29
+ ENV UV_TOOL_BIN_DIR=/usr/local/bin
30
+
31
+ # Install the project's dependencies using the lockfile and settings
32
+ RUN --mount=type=cache,target=/root/.cache/uv \
33
+ --mount=type=bind,source=backend/uv.lock,target=uv.lock \
34
+ --mount=type=bind,source=backend/pyproject.toml,target=pyproject.toml \
35
+ uv sync --locked --no-install-project --no-dev
36
+
37
+ # Then, add the rest of the project source code and install it
38
+ # Installing separately from its dependencies allows optimal layer caching
39
+ COPY backend /app/backend
40
+ RUN --mount=type=cache,target=/root/.cache/uv \
41
+ uv sync --locked --no-dev
42
+
43
+ # Place executables in the environment at the front of the path
44
+ ENV PATH="/app/backend/.venv/bin:$PATH"
45
+
46
+ # COPY start_HuggingFace.py /app/
47
+ COPY start_HuggingFace.py /app/backend
48
+
49
+
50
+ # Copy frontend build
51
+ COPY frontend_build /app/frontend_build
52
+
53
+ # Put Tangle data into persistent storage
54
+ RUN mkdir -p /data
55
+ RUN ln -s /data/tangle/data /app/backend/data
56
+
57
+
58
+ # Reset the entrypoint, don't invoke `uv`
59
+ ENTRYPOINT []
60
+
61
+ # # Use the non-root user to run our application
62
+ # USER nonroot
63
+
64
+ # Run the FastAPI application by default
65
+ # Uses `fastapi dev` to enable hot-reloading when the `watch` sync occurs
66
+ # Uses `--host 0.0.0.0` to allow access from outside the container
67
+ # Note in production, you should use `fastapi run` instead
68
+
69
+ # WORKDIR /app
70
+ # CMD ["fastapi", "dev", "--host", "0.0.0.0", "/app/backend/start_local.py"]
71
+
72
+ # WORKDIR /app/backend
73
+ # CMD ["fastapi", "dev", "--host", "0.0.0.0", "/app/start_HuggingFace.py"]
74
+
75
+ WORKDIR /app/backend
76
+ CMD ["fastapi", "dev", "--host", "0.0.0.0", "--port", "7860", "/app/backend/start_HuggingFace.py"]
start_HuggingFace.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import pathlib
4
+
5
+ import fastapi
6
+
7
+ # Debug
8
+
9
+ # region Paths configuration
10
+
11
+ root_data_dir: str = "./data/"
12
+ root_data_dir_path = pathlib.Path(root_data_dir).resolve()
13
+ print(f"{root_data_dir_path=}")
14
+
15
+ artifacts_dir_path = root_data_dir_path / "artifacts"
16
+ logs_dir_path = root_data_dir_path / "logs"
17
+
18
+ root_data_dir_path.mkdir(parents=True, exist_ok=True)
19
+ artifacts_dir_path.mkdir(parents=True, exist_ok=True)
20
+ logs_dir_path.mkdir(parents=True, exist_ok=True)
21
+ # endregion
22
+
23
+ # region: DB Configuration
24
+ database_path = root_data_dir_path / "db.sqlite"
25
+ database_uri = f"sqlite:///{database_path}"
26
+ print(f"{database_uri=}")
27
+ # endregion
28
+
29
+ # region: Storage configuration
30
+ from cloud_pipelines.orchestration.storage_providers import local_storage
31
+
32
+ storage_provider = local_storage.LocalStorageProvider()
33
+
34
+ artifacts_root_uri = artifacts_dir_path.as_posix()
35
+ logs_root_uri = logs_dir_path.as_posix()
36
+ # endregion
37
+
38
+ # region: Launcher configuration
39
+ # import docker
40
+ # from cloud_pipelines_backend.launchers import local_docker_launchers
41
+
42
+ # docker_client = docker.DockerClient.from_env(timeout=5)
43
+ # _ = docker_client.version()
44
+
45
+ # launcher = local_docker_launchers.DockerContainerLauncher(
46
+ # client=docker_client,
47
+ # )
48
+ launcher = None
49
+ # endregion
50
+
51
+ # region: Orchestrator configuration
52
+ default_task_annotations = {}
53
+ sleep_seconds_between_queue_sweeps: float = 5.0
54
+ # endregion
55
+
56
+ # region: Authentication configuration
57
+ import fastapi
58
+
59
+ ADMIN_USER_NAME = "admin"
60
+ default_component_library_owner_username = ADMIN_USER_NAME
61
+
62
+
63
+ # ! This function is just a placeholder for user authentication and authorization so that every request has a user name and permissions.
64
+ # ! This placeholder function authenticates the user as user with name "admin" and read/write/admin permissions.
65
+ # ! In a real multi-user deployment, the `get_user_details` function MUST be replaced with real authentication/authorization based on OAuth or another auth system.
66
+ def get_user_details(request: fastapi.Request):
67
+ return api_router.UserDetails(
68
+ name=ADMIN_USER_NAME,
69
+ permissions=api_router.Permissions(
70
+ read=True,
71
+ write=True,
72
+ admin=True,
73
+ ),
74
+ )
75
+
76
+
77
+ # endregion
78
+
79
+
80
+ # region: Logging configuration
81
+ import logging.config
82
+
83
+ LOGGING_CONFIG = {
84
+ "version": 1,
85
+ "disable_existing_loggers": True,
86
+ "formatters": {
87
+ "standard": {"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"},
88
+ },
89
+ "handlers": {
90
+ "default": {
91
+ "level": "INFO",
92
+ "formatter": "standard",
93
+ "class": "logging.StreamHandler",
94
+ "stream": "ext://sys.stderr",
95
+ },
96
+ },
97
+ "loggers": {
98
+ # root logger
99
+ "": {
100
+ "level": "INFO",
101
+ "handlers": ["default"],
102
+ "propagate": False,
103
+ },
104
+ "uvicorn.error": {
105
+ "level": "DEBUG",
106
+ "handlers": ["default"],
107
+ # Fix triplicated log messages
108
+ "propagate": False,
109
+ },
110
+ "uvicorn.access": {
111
+ "level": "DEBUG",
112
+ "handlers": ["default"],
113
+ },
114
+ "watchfiles.main": {
115
+ "level": "WARNING",
116
+ "handlers": ["default"],
117
+ },
118
+ },
119
+ }
120
+
121
+ logging.config.dictConfig(LOGGING_CONFIG)
122
+
123
+ logger = logging.getLogger(__name__)
124
+ # endregion
125
+
126
+ # region: Database engine initialization
127
+ from cloud_pipelines_backend import api_router
128
+
129
+ db_engine = api_router.create_db_engine(
130
+ database_uri=database_uri,
131
+ )
132
+ # endregion
133
+
134
+
135
+ # region: Orchestrator initialization
136
+
137
+ import logging
138
+ import pathlib
139
+
140
+ import sqlalchemy
141
+ from sqlalchemy import orm
142
+
143
+ from cloud_pipelines.orchestration.storage_providers import (
144
+ interfaces as storage_interfaces,
145
+ )
146
+ from cloud_pipelines_backend import orchestrator_sql
147
+
148
+
149
+ def create_db_and_tables(db_engine: sqlalchemy.Engine):
150
+ from cloud_pipelines_backend import backend_types_sql
151
+
152
+ backend_types_sql._TableBase.metadata.create_all(db_engine)
153
+
154
+
155
+ def run_orchestrator(
156
+ db_engine: sqlalchemy.Engine,
157
+ storage_provider: storage_interfaces.StorageProvider,
158
+ data_root_uri: str,
159
+ logs_root_uri: str,
160
+ sleep_seconds_between_queue_sweeps: float = 5.0,
161
+ ):
162
+ # logger = logging.getLogger(__name__)
163
+ # orchestrator_logger = logging.getLogger("cloud_pipelines_backend.orchestrator_sql")
164
+
165
+ # orchestrator_logger.setLevel(logging.DEBUG)
166
+ # formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
167
+
168
+ # stderr_handler = logging.StreamHandler()
169
+ # stderr_handler.setLevel(logging.INFO)
170
+ # stderr_handler.setFormatter(formatter)
171
+
172
+ # # TODO: Disable the default logger instead of not adding a new one
173
+ # # orchestrator_logger.addHandler(stderr_handler)
174
+ # logger.addHandler(stderr_handler)
175
+
176
+ logger.info("Starting the orchestrator")
177
+
178
+ # With autobegin=False you always need to begin a transaction, even to query the DB.
179
+ session_factory = orm.sessionmaker(
180
+ autocommit=False, autoflush=False, bind=db_engine
181
+ )
182
+
183
+ orchestrator = orchestrator_sql.OrchestratorService_Sql(
184
+ session_factory=session_factory,
185
+ launcher=launcher,
186
+ storage_provider=storage_provider,
187
+ data_root_uri=data_root_uri,
188
+ logs_root_uri=logs_root_uri,
189
+ default_task_annotations=default_task_annotations,
190
+ sleep_seconds_between_queue_sweeps=sleep_seconds_between_queue_sweeps,
191
+ )
192
+ orchestrator.run_loop()
193
+
194
+
195
+ run_configured_orchestrator = lambda: run_orchestrator(
196
+ db_engine=db_engine,
197
+ storage_provider=storage_provider,
198
+ data_root_uri=artifacts_root_uri,
199
+ logs_root_uri=logs_root_uri,
200
+ sleep_seconds_between_queue_sweeps=sleep_seconds_between_queue_sweeps,
201
+ )
202
+ # endregion
203
+
204
+
205
+ # region: API Server initialization
206
+ import contextlib
207
+ import threading
208
+ import traceback
209
+
210
+ import fastapi
211
+ from fastapi import staticfiles
212
+
213
+ from cloud_pipelines_backend import api_router
214
+
215
+
216
+ @contextlib.asynccontextmanager
217
+ async def lifespan(app: fastapi.FastAPI):
218
+ create_db_and_tables(db_engine=db_engine)
219
+ threading.Thread(
220
+ target=run_configured_orchestrator,
221
+ daemon=True,
222
+ ).start()
223
+ yield
224
+
225
+
226
+ app = fastapi.FastAPI(
227
+ title="Cloud Pipelines API",
228
+ version="0.0.1",
229
+ separate_input_output_schemas=False,
230
+ lifespan=lifespan,
231
+ )
232
+
233
+
234
+ @app.exception_handler(Exception)
235
+ def handle_error(request: fastapi.Request, exc: BaseException):
236
+ exception_str = traceback.format_exception(type(exc), exc, exc.__traceback__)
237
+ return fastapi.responses.JSONResponse(
238
+ status_code=503,
239
+ content={"exception": exception_str},
240
+ )
241
+
242
+
243
+ api_router.setup_routes(
244
+ app=app,
245
+ db_engine=db_engine,
246
+ user_details_getter=get_user_details,
247
+ container_launcher_for_log_streaming=launcher,
248
+ default_component_library_owner_username=default_component_library_owner_username,
249
+ )
250
+
251
+
252
+ # Health check needed by the Web app
253
+ @app.get("/services/ping")
254
+ def health_check():
255
+ return {}
256
+
257
+
258
+ # Mounting the web app if the files exist
259
+ this_dir = pathlib.Path(__file__).parent
260
+ web_app_search_dirs = [
261
+ this_dir / ".." / "pipeline-studio-app" / "build",
262
+ this_dir / ".." / "frontend" / "build",
263
+ this_dir / ".." / "frontend_build",
264
+ this_dir / "pipeline-studio-app" / "build",
265
+ ]
266
+ found_frontend_build_files = False
267
+ for web_app_dir in web_app_search_dirs:
268
+ if web_app_dir.exists():
269
+ found_frontend_build_files = True
270
+ logger.info(f"Found the Web app static files at {str(web_app_dir)}. Mounting them.")
271
+ # The Web app base URL is currently static and hardcoded.
272
+ # TODO: Remove this mount once the base URL becomes relative.
273
+ app.mount(
274
+ "/pipeline-studio-app/",
275
+ staticfiles.StaticFiles(directory=web_app_dir, html=True),
276
+ name="static",
277
+ )
278
+ app.mount(
279
+ "/",
280
+ staticfiles.StaticFiles(directory=web_app_dir, html=True),
281
+ name="static",
282
+ )
283
+ if not found_frontend_build_files:
284
+ logger.warning("The Web app files were not found. Skipping.")
285
+ # endregion