Update Dockerfile
Browse files- Dockerfile +37 -53
Dockerfile
CHANGED
|
@@ -4,35 +4,24 @@ FROM jupyter/base-notebook:latest
|
|
| 4 |
# Jalankan sebagai root hanya untuk instalasi
|
| 5 |
USER root
|
| 6 |
|
| 7 |
-
# ===== Install Java 17
|
| 8 |
-
RUN apt-get update &&
|
| 9 |
-
apt-get install -y --no-install-recommends openjdk-17-jdk curl ca-certificates wget && \
|
| 10 |
-
apt-get clean && rm -rf /var/lib/apt/lists/*
|
| 11 |
|
| 12 |
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
|
| 13 |
ENV PATH=$JAVA_HOME/bin:$PATH
|
| 14 |
|
| 15 |
# ===== Install Apache Spark 3.5.0 =====
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
ENV SPARK_URL=https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_DIST}.tgz
|
| 19 |
-
|
| 20 |
-
RUN curl -L -o /tmp/spark.tgz "${SPARK_URL}" && \
|
| 21 |
-
tar -xzf /tmp/spark.tgz -C /usr/local/ && \
|
| 22 |
-
rm /tmp/spark.tgz && \
|
| 23 |
-
mv /usr/local/${SPARK_DIST} /usr/local/spark
|
| 24 |
|
| 25 |
# ===== Set environment Spark =====
|
| 26 |
ENV SPARK_HOME=/usr/local/spark
|
| 27 |
ENV PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$JAVA_HOME/bin:$PATH
|
| 28 |
ENV PYSPARK_PYTHON=python3
|
| 29 |
-
# Pastikan nama py4j tidak terputus; Spark 3.5.0 biasanya bundling py4j 0.10.9.7
|
| 30 |
ENV PYTHONPATH=$SPARK_HOME/python/:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH
|
| 31 |
|
| 32 |
# ===== Install library Python =====
|
| 33 |
-
|
| 34 |
-
# Saya sengaja tidak meng-install 'pyspark' lewat pip untuk menghindari mismatch dengan binary Spark yang diinstall.
|
| 35 |
-
RUN pip install --no-cache-dir pandas matplotlib findspark ipykernel jupyterlab
|
| 36 |
|
| 37 |
# ===== Perbaiki izin direktori home Jupyter =====
|
| 38 |
RUN mkdir -p /home/jovyan/.local/share/jupyter/runtime && \
|
|
@@ -46,41 +35,33 @@ WORKDIR /workspace
|
|
| 46 |
|
| 47 |
# ===== Buat dua kernel Spark terisolasi =====
|
| 48 |
RUN mkdir -p /usr/local/share/jupyter/kernels/spark_kernel1 && \
|
| 49 |
-
mkdir -p /usr/local/share/jupyter/kernels/spark_kernel2
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
"SPARK_HOME": "/usr/local/spark",
|
| 77 |
-
"PYSPARK_PYTHON": "python3",
|
| 78 |
-
"SPARK_LOG_DIR": "/workspace/spark_logs/kernel2",
|
| 79 |
-
"SPARK_LOCAL_DIRS": "/workspace/spark_logs/kernel2",
|
| 80 |
-
"PYTHONPATH": "/usr/local/spark/python/:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip"
|
| 81 |
-
}
|
| 82 |
-
}
|
| 83 |
-
JSON
|
| 84 |
|
| 85 |
# ===== Ubah kembali ke user default Jupyter =====
|
| 86 |
USER jovyan
|
|
@@ -89,5 +70,8 @@ USER jovyan
|
|
| 89 |
EXPOSE 7860
|
| 90 |
|
| 91 |
# ===== Jalankan JupyterLab di port 7860 =====
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
# Jalankan sebagai root hanya untuk instalasi
|
| 5 |
USER root
|
| 6 |
|
| 7 |
+
# ===== Install Java 17 =====
|
| 8 |
+
RUN apt-get update && apt-get install -y openjdk-17-jdk curl && apt-get clean
|
|
|
|
|
|
|
| 9 |
|
| 10 |
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
|
| 11 |
ENV PATH=$JAVA_HOME/bin:$PATH
|
| 12 |
|
| 13 |
# ===== Install Apache Spark 3.5.0 =====
|
| 14 |
+
RUN curl -L https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz \
|
| 15 |
+
| tar -xz -C /usr/local/ && mv /usr/local/spark-3.5.0-bin-hadoop3 /usr/local/spark
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# ===== Set environment Spark =====
|
| 18 |
ENV SPARK_HOME=/usr/local/spark
|
| 19 |
ENV PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$JAVA_HOME/bin:$PATH
|
| 20 |
ENV PYSPARK_PYTHON=python3
|
|
|
|
| 21 |
ENV PYTHONPATH=$SPARK_HOME/python/:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH
|
| 22 |
|
| 23 |
# ===== Install library Python =====
|
| 24 |
+
RUN pip install --no-cache-dir pyspark pandas matplotlib findspark ipykernel jupyterlab
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# ===== Perbaiki izin direktori home Jupyter =====
|
| 27 |
RUN mkdir -p /home/jovyan/.local/share/jupyter/runtime && \
|
|
|
|
| 35 |
|
| 36 |
# ===== Buat dua kernel Spark terisolasi =====
|
| 37 |
RUN mkdir -p /usr/local/share/jupyter/kernels/spark_kernel1 && \
|
| 38 |
+
mkdir -p /usr/local/share/jupyter/kernels/spark_kernel2 && \
|
| 39 |
+
echo '{ \
|
| 40 |
+
"argv": ["python3", "-m", "ipykernel_launcher", "-f", "{connection_file}"], \
|
| 41 |
+
"display_name": "Spark Kernel 1", \
|
| 42 |
+
"language": "python", \
|
| 43 |
+
"env": { \
|
| 44 |
+
"JAVA_HOME": "/usr/lib/jvm/java-17-openjdk-amd64", \
|
| 45 |
+
"SPARK_HOME": "/usr/local/spark", \
|
| 46 |
+
"PYSPARK_PYTHON": "python3", \
|
| 47 |
+
"SPARK_LOG_DIR": "/workspace/spark_logs/kernel1", \
|
| 48 |
+
"SPARK_LOCAL_DIRS": "/workspace/spark_logs/kernel1", \
|
| 49 |
+
"PYTHONPATH": "/usr/local/spark/python/:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip" \
|
| 50 |
+
} \
|
| 51 |
+
}' > /usr/local/share/jupyter/kernels/spark_kernel1/kernel.json && \
|
| 52 |
+
echo '{ \
|
| 53 |
+
"argv": ["python3", "-m", "ipykernel_launcher", "-f", "{connection_file}"], \
|
| 54 |
+
"display_name": "Spark Kernel 2", \
|
| 55 |
+
"language": "python", \
|
| 56 |
+
"env": { \
|
| 57 |
+
"JAVA_HOME": "/usr/lib/jvm/java-17-openjdk-amd64", \
|
| 58 |
+
"SPARK_HOME": "/usr/local/spark", \
|
| 59 |
+
"PYSPARK_PYTHON": "python3", \
|
| 60 |
+
"SPARK_LOG_DIR": "/workspace/spark_logs/kernel2", \
|
| 61 |
+
"SPARK_LOCAL_DIRS": "/workspace/spark_logs/kernel2", \
|
| 62 |
+
"PYTHONPATH": "/usr/local/spark/python/:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip" \
|
| 63 |
+
} \
|
| 64 |
+
}' > /usr/local/share/jupyter/kernels/spark_kernel2/kernel.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
# ===== Ubah kembali ke user default Jupyter =====
|
| 67 |
USER jovyan
|
|
|
|
| 70 |
EXPOSE 7860
|
| 71 |
|
| 72 |
# ===== Jalankan JupyterLab di port 7860 =====
|
| 73 |
+
CMD ["bash", "-c", "\
|
| 74 |
+
echo '🚀 JupyterLab berjalan di Hugging Face Spaces pada port 7860' && \
|
| 75 |
+
jupyter lab --ip=0.0.0.0 --port=7860 --no-browser --allow-root \
|
| 76 |
+
--NotebookApp.token='' --NotebookApp.password='' --NotebookApp.default_url=/lab \
|
| 77 |
+
"]
|