Spaces:
Sleeping
Sleeping
| # ===== Gunakan base image resmi Jupyter ===== | |
| FROM jupyter/base-notebook:latest | |
| # Jalankan sebagai root hanya untuk instalasi | |
| USER root | |
| # ===== Install Java 17 dan utilitas ===== | |
| RUN apt-get update && \ | |
| apt-get install -y --no-install-recommends openjdk-17-jdk curl ca-certificates wget && \ | |
| apt-get clean && rm -rf /var/lib/apt/lists/* | |
| ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 | |
| ENV PATH=$JAVA_HOME/bin:$PATH | |
| # ===== Install Apache Spark 3.5.0 ===== | |
| ENV SPARK_VERSION=3.5.0 | |
| ENV SPARK_DIST=spark-${SPARK_VERSION}-bin-hadoop3 | |
| ENV SPARK_URL=https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_DIST}.tgz | |
| RUN curl -L -o /tmp/spark.tgz "${SPARK_URL}" && \ | |
| tar -xzf /tmp/spark.tgz -C /usr/local/ && \ | |
| rm /tmp/spark.tgz && \ | |
| mv /usr/local/${SPARK_DIST} /usr/local/spark | |
| # ===== Set environment Spark ===== | |
| ENV SPARK_HOME=/usr/local/spark | |
| ENV PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$JAVA_HOME/bin:$PATH | |
| ENV PYSPARK_PYTHON=python3 | |
| # Pastikan nama py4j tidak terputus; Spark 3.5.0 biasanya bundling py4j 0.10.9.7 | |
| ENV PYTHONPATH=$SPARK_HOME/python/:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH | |
| # ===== Install library Python ===== | |
| # NOTE: jika kamu ingin menggunakan pyspark via pip, gunakan pyspark==3.5.0 untuk cocok dengan Spark. | |
| # Saya sengaja tidak meng-install 'pyspark' lewat pip untuk menghindari mismatch dengan binary Spark yang diinstall. | |
| RUN pip install --no-cache-dir pandas matplotlib findspark ipykernel jupyterlab | |
| # ===== Perbaiki izin direktori home Jupyter ===== | |
| RUN mkdir -p /home/jovyan/.local/share/jupyter/runtime && \ | |
| chown -R 1000:100 /home/jovyan && \ | |
| chmod -R 777 /home/jovyan | |
| # ===== Siapkan direktori kerja dan log Spark ===== | |
| RUN mkdir -p /workspace/spark_logs/kernel1 /workspace/spark_logs/kernel2 && \ | |
| chmod -R 777 /workspace | |
| WORKDIR /workspace | |
| # ===== Buat dua kernel Spark terisolasi ===== | |
| RUN mkdir -p /usr/local/share/jupyter/kernels/spark_kernel1 && \ | |
| mkdir -p /usr/local/share/jupyter/kernels/spark_kernel2 | |
| # kernel 1 | |
| RUN cat > /usr/local/share/jupyter/kernels/spark_kernel1/kernel.json <<'JSON' | |
| { | |
| "argv": ["python3", "-m", "ipykernel_launcher", "-f", "{connection_file}"], | |
| "display_name": "Spark Kernel 1", | |
| "language": "python", | |
| "env": { | |
| "JAVA_HOME": "/usr/lib/jvm/java-17-openjdk-amd64", | |
| "SPARK_HOME": "/usr/local/spark", | |
| "PYSPARK_PYTHON": "python3", | |
| "SPARK_LOG_DIR": "/workspace/spark_logs/kernel1", | |
| "SPARK_LOCAL_DIRS": "/workspace/spark_logs/kernel1", | |
| "PYTHONPATH": "/usr/local/spark/python/:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip" | |
| } | |
| } | |
| JSON | |
| # kernel 2 | |
| RUN cat > /usr/local/share/jupyter/kernels/spark_kernel2/kernel.json <<'JSON' | |
| { | |
| "argv": ["python3", "-m", "ipykernel_launcher", "-f", "{connection_file}"], | |
| "display_name": "Spark Kernel 2", | |
| "language": "python", | |
| "env": { | |
| "JAVA_HOME": "/usr/lib/jvm/java-17-openjdk-amd64", | |
| "SPARK_HOME": "/usr/local/spark", | |
| "PYSPARK_PYTHON": "python3", | |
| "SPARK_LOG_DIR": "/workspace/spark_logs/kernel2", | |
| "SPARK_LOCAL_DIRS": "/workspace/spark_logs/kernel2", | |
| "PYTHONPATH": "/usr/local/spark/python/:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip" | |
| } | |
| } | |
| JSON | |
| # ===== Ubah kembali ke user default Jupyter ===== | |
| USER jovyan | |
| # ===== Port Hugging Face (7860) ===== | |
| EXPOSE 7860 | |
| # ===== Jalankan JupyterLab di port 7860 ===== | |
| # Gunakan ServerApp flags (kompatibel dengan jupyter_server/jupyterlab terbaru) | |
| CMD ["bash", "-c", "echo 'JupyterLab berjalan di port 7860' && jupyter lab --ip=0.0.0.0 --port=7860 --no-browser --ServerApp.token='' --ServerApp.password='' --NotebookApp.token='' --NotebookApp.password='' --LabApp.default_url=/lab"] | |