Mandr1 commited on
Commit
1d64ecc
·
verified ·
1 Parent(s): 42b3a03

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +37 -53
Dockerfile CHANGED
@@ -4,35 +4,24 @@ FROM jupyter/base-notebook:latest
4
  # Jalankan sebagai root hanya untuk instalasi
5
  USER root
6
 
7
- # ===== Install Java 17 dan utilitas =====
8
- RUN apt-get update && \
9
- apt-get install -y --no-install-recommends openjdk-17-jdk curl ca-certificates wget && \
10
- apt-get clean && rm -rf /var/lib/apt/lists/*
11
 
12
  ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
13
  ENV PATH=$JAVA_HOME/bin:$PATH
14
 
15
  # ===== Install Apache Spark 3.5.0 =====
16
- ENV SPARK_VERSION=3.5.0
17
- ENV SPARK_DIST=spark-${SPARK_VERSION}-bin-hadoop3
18
- ENV SPARK_URL=https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_DIST}.tgz
19
-
20
- RUN curl -L -o /tmp/spark.tgz "${SPARK_URL}" && \
21
- tar -xzf /tmp/spark.tgz -C /usr/local/ && \
22
- rm /tmp/spark.tgz && \
23
- mv /usr/local/${SPARK_DIST} /usr/local/spark
24
 
25
  # ===== Set environment Spark =====
26
  ENV SPARK_HOME=/usr/local/spark
27
  ENV PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$JAVA_HOME/bin:$PATH
28
  ENV PYSPARK_PYTHON=python3
29
- # Pastikan nama py4j tidak terputus; Spark 3.5.0 biasanya bundling py4j 0.10.9.7
30
  ENV PYTHONPATH=$SPARK_HOME/python/:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH
31
 
32
  # ===== Install library Python =====
33
- # NOTE: jika kamu ingin menggunakan pyspark via pip, gunakan pyspark==3.5.0 untuk cocok dengan Spark.
34
- # Saya sengaja tidak meng-install 'pyspark' lewat pip untuk menghindari mismatch dengan binary Spark yang diinstall.
35
- RUN pip install --no-cache-dir pandas matplotlib findspark ipykernel jupyterlab
36
 
37
  # ===== Perbaiki izin direktori home Jupyter =====
38
  RUN mkdir -p /home/jovyan/.local/share/jupyter/runtime && \
@@ -46,41 +35,33 @@ WORKDIR /workspace
46
 
47
  # ===== Buat dua kernel Spark terisolasi =====
48
  RUN mkdir -p /usr/local/share/jupyter/kernels/spark_kernel1 && \
49
- mkdir -p /usr/local/share/jupyter/kernels/spark_kernel2
50
-
51
- # kernel 1
52
- RUN cat > /usr/local/share/jupyter/kernels/spark_kernel1/kernel.json <<'JSON'
53
- {
54
- "argv": ["python3", "-m", "ipykernel_launcher", "-f", "{connection_file}"],
55
- "display_name": "Spark Kernel 1",
56
- "language": "python",
57
- "env": {
58
- "JAVA_HOME": "/usr/lib/jvm/java-17-openjdk-amd64",
59
- "SPARK_HOME": "/usr/local/spark",
60
- "PYSPARK_PYTHON": "python3",
61
- "SPARK_LOG_DIR": "/workspace/spark_logs/kernel1",
62
- "SPARK_LOCAL_DIRS": "/workspace/spark_logs/kernel1",
63
- "PYTHONPATH": "/usr/local/spark/python/:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip"
64
- }
65
- }
66
- JSON
67
-
68
- # kernel 2
69
- RUN cat > /usr/local/share/jupyter/kernels/spark_kernel2/kernel.json <<'JSON'
70
- {
71
- "argv": ["python3", "-m", "ipykernel_launcher", "-f", "{connection_file}"],
72
- "display_name": "Spark Kernel 2",
73
- "language": "python",
74
- "env": {
75
- "JAVA_HOME": "/usr/lib/jvm/java-17-openjdk-amd64",
76
- "SPARK_HOME": "/usr/local/spark",
77
- "PYSPARK_PYTHON": "python3",
78
- "SPARK_LOG_DIR": "/workspace/spark_logs/kernel2",
79
- "SPARK_LOCAL_DIRS": "/workspace/spark_logs/kernel2",
80
- "PYTHONPATH": "/usr/local/spark/python/:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip"
81
- }
82
- }
83
- JSON
84
 
85
  # ===== Ubah kembali ke user default Jupyter =====
86
  USER jovyan
@@ -89,5 +70,8 @@ USER jovyan
89
  EXPOSE 7860
90
 
91
  # ===== Jalankan JupyterLab di port 7860 =====
92
- # Gunakan ServerApp flags (kompatibel dengan jupyter_server/jupyterlab terbaru)
93
- CMD ["bash", "-c", "echo 'JupyterLab berjalan di port 7860' && jupyter lab --ip=0.0.0.0 --port=7860 --no-browser --ServerApp.token='' --ServerApp.password='' --NotebookApp.token='' --NotebookApp.password='' --LabApp.default_url=/lab"]
 
 
 
 
4
  # Jalankan sebagai root hanya untuk instalasi
5
  USER root
6
 
7
+ # ===== Install Java 17 =====
8
+ RUN apt-get update && apt-get install -y openjdk-17-jdk curl && apt-get clean
 
 
9
 
10
  ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
11
  ENV PATH=$JAVA_HOME/bin:$PATH
12
 
13
  # ===== Install Apache Spark 3.5.0 =====
14
+ RUN curl -L https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz \
15
+ | tar -xz -C /usr/local/ && mv /usr/local/spark-3.5.0-bin-hadoop3 /usr/local/spark
 
 
 
 
 
 
16
 
17
  # ===== Set environment Spark =====
18
  ENV SPARK_HOME=/usr/local/spark
19
  ENV PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$JAVA_HOME/bin:$PATH
20
  ENV PYSPARK_PYTHON=python3
 
21
  ENV PYTHONPATH=$SPARK_HOME/python/:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH
22
 
23
  # ===== Install library Python =====
24
+ RUN pip install --no-cache-dir pyspark pandas matplotlib findspark ipykernel jupyterlab
 
 
25
 
26
  # ===== Perbaiki izin direktori home Jupyter =====
27
  RUN mkdir -p /home/jovyan/.local/share/jupyter/runtime && \
 
35
 
36
  # ===== Buat dua kernel Spark terisolasi =====
37
  RUN mkdir -p /usr/local/share/jupyter/kernels/spark_kernel1 && \
38
+ mkdir -p /usr/local/share/jupyter/kernels/spark_kernel2 && \
39
+ echo '{ \
40
+ "argv": ["python3", "-m", "ipykernel_launcher", "-f", "{connection_file}"], \
41
+ "display_name": "Spark Kernel 1", \
42
+ "language": "python", \
43
+ "env": { \
44
+ "JAVA_HOME": "/usr/lib/jvm/java-17-openjdk-amd64", \
45
+ "SPARK_HOME": "/usr/local/spark", \
46
+ "PYSPARK_PYTHON": "python3", \
47
+ "SPARK_LOG_DIR": "/workspace/spark_logs/kernel1", \
48
+ "SPARK_LOCAL_DIRS": "/workspace/spark_logs/kernel1", \
49
+ "PYTHONPATH": "/usr/local/spark/python/:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip" \
50
+ } \
51
+ }' > /usr/local/share/jupyter/kernels/spark_kernel1/kernel.json && \
52
+ echo '{ \
53
+ "argv": ["python3", "-m", "ipykernel_launcher", "-f", "{connection_file}"], \
54
+ "display_name": "Spark Kernel 2", \
55
+ "language": "python", \
56
+ "env": { \
57
+ "JAVA_HOME": "/usr/lib/jvm/java-17-openjdk-amd64", \
58
+ "SPARK_HOME": "/usr/local/spark", \
59
+ "PYSPARK_PYTHON": "python3", \
60
+ "SPARK_LOG_DIR": "/workspace/spark_logs/kernel2", \
61
+ "SPARK_LOCAL_DIRS": "/workspace/spark_logs/kernel2", \
62
+ "PYTHONPATH": "/usr/local/spark/python/:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip" \
63
+ } \
64
+ }' > /usr/local/share/jupyter/kernels/spark_kernel2/kernel.json
 
 
 
 
 
 
 
 
65
 
66
  # ===== Ubah kembali ke user default Jupyter =====
67
  USER jovyan
 
70
  EXPOSE 7860
71
 
72
  # ===== Jalankan JupyterLab di port 7860 =====
73
+ CMD ["bash", "-c", "\
74
+ echo '🚀 JupyterLab berjalan di Hugging Face Spaces pada port 7860' && \
75
+ jupyter lab --ip=0.0.0.0 --port=7860 --no-browser --allow-root \
76
+ --NotebookApp.token='' --NotebookApp.password='' --NotebookApp.default_url=/lab \
77
+ "]