Mandr1 commited on
Commit
42b3a03
·
verified ·
1 Parent(s): 1ad8acc

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +78 -22
Dockerfile CHANGED
@@ -1,37 +1,93 @@
1
- # Gunakan base image resmi dari Jupyter
2
  FROM jupyter/base-notebook:latest
 
3
  # Jalankan sebagai root hanya untuk instalasi
4
  USER root
5
- # ===== Install Java 17 =====
6
- RUN apt-get update && apt-get install -y openjdk-17-jdk curl && apt-get clean
 
 
 
 
7
  ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
8
  ENV PATH=$JAVA_HOME/bin:$PATH
9
- # ===== Install Apache Spark =====
10
- RUN curl -L https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz \
11
- | tar -xz -C /usr/local/ && mv /usr/local/spark-3.5.0-bin-hadoop3 /usr/local/spark
 
 
 
 
 
 
 
 
 
12
  ENV SPARK_HOME=/usr/local/spark
13
- ENV PATH=$SPARK_HOME/bin:$PATH
14
  ENV PYSPARK_PYTHON=python3
15
- # ===== Install Python package =====
16
- RUN pip install --no-cache-dir pyspark pandas matplotlib findspark ipykernel
 
 
 
 
 
 
17
  # ===== Perbaiki izin direktori home Jupyter =====
18
  RUN mkdir -p /home/jovyan/.local/share/jupyter/runtime && \
19
- chown -R 1000:100 /home/jovyan && \
20
- chmod -R 777 /home/jovyan
21
- # ===== Buat dua kernel Spark =====
22
- RUN python -m ipykernel install --user --name spark_kernel1 --display-name "Spark Kernel 1" && \
23
- python -m ipykernel install --user --name spark_kernel2 --display-name "Spark Kernel 2"
24
- # ===== Direktori kerja =====
25
  WORKDIR /workspace
26
- RUN chmod -R 777 /workspace
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # ===== Ubah kembali ke user default Jupyter =====
29
  USER jovyan
 
30
  # ===== Port Hugging Face (7860) =====
31
  EXPOSE 7860
32
- # ===== Jalankan Jupyter di port 7860 =====
33
- CMD ["bash", "-c", "\
34
- echo ' JupyterLab berjalan di Hugging Face Spaces pada port 7860' && \
35
- jupyter lab --ip=0.0.0.0 --port=7860 --no-browser --allow-root \
36
- --NotebookApp.token='' --NotebookApp.password='' --NotebookApp.default_url=/lab \
37
- "]
 
1
+ # ===== Gunakan base image resmi Jupyter =====
2
  FROM jupyter/base-notebook:latest
3
+
4
  # Jalankan sebagai root hanya untuk instalasi
5
  USER root
6
+
7
+ # ===== Install Java 17 dan utilitas =====
8
+ RUN apt-get update && \
9
+ apt-get install -y --no-install-recommends openjdk-17-jdk curl ca-certificates wget && \
10
+ apt-get clean && rm -rf /var/lib/apt/lists/*
11
+
12
  ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
13
  ENV PATH=$JAVA_HOME/bin:$PATH
14
+
15
+ # ===== Install Apache Spark 3.5.0 =====
16
+ ENV SPARK_VERSION=3.5.0
17
+ ENV SPARK_DIST=spark-${SPARK_VERSION}-bin-hadoop3
18
+ ENV SPARK_URL=https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_DIST}.tgz
19
+
20
+ RUN curl -L -o /tmp/spark.tgz "${SPARK_URL}" && \
21
+ tar -xzf /tmp/spark.tgz -C /usr/local/ && \
22
+ rm /tmp/spark.tgz && \
23
+ mv /usr/local/${SPARK_DIST} /usr/local/spark
24
+
25
+ # ===== Set environment Spark =====
26
  ENV SPARK_HOME=/usr/local/spark
27
+ ENV PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$JAVA_HOME/bin:$PATH
28
  ENV PYSPARK_PYTHON=python3
29
+ # Pastikan nama py4j tidak terputus; Spark 3.5.0 biasanya bundling py4j 0.10.9.7
30
+ ENV PYTHONPATH=$SPARK_HOME/python/:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH
31
+
32
+ # ===== Install library Python =====
33
+ # NOTE: jika kamu ingin menggunakan pyspark via pip, gunakan pyspark==3.5.0 untuk cocok dengan Spark.
34
+ # Saya sengaja tidak meng-install 'pyspark' lewat pip untuk menghindari mismatch dengan binary Spark yang diinstall.
35
+ RUN pip install --no-cache-dir pandas matplotlib findspark ipykernel jupyterlab
36
+
37
  # ===== Perbaiki izin direktori home Jupyter =====
38
  RUN mkdir -p /home/jovyan/.local/share/jupyter/runtime && \
39
+ chown -R 1000:100 /home/jovyan && \
40
+ chmod -R 777 /home/jovyan
41
+
42
+ # ===== Siapkan direktori kerja dan log Spark =====
43
+ RUN mkdir -p /workspace/spark_logs/kernel1 /workspace/spark_logs/kernel2 && \
44
+ chmod -R 777 /workspace
45
  WORKDIR /workspace
46
+
47
+ # ===== Buat dua kernel Spark terisolasi =====
48
+ RUN mkdir -p /usr/local/share/jupyter/kernels/spark_kernel1 && \
49
+ mkdir -p /usr/local/share/jupyter/kernels/spark_kernel2
50
+
51
+ # kernel 1
52
+ RUN cat > /usr/local/share/jupyter/kernels/spark_kernel1/kernel.json <<'JSON'
53
+ {
54
+ "argv": ["python3", "-m", "ipykernel_launcher", "-f", "{connection_file}"],
55
+ "display_name": "Spark Kernel 1",
56
+ "language": "python",
57
+ "env": {
58
+ "JAVA_HOME": "/usr/lib/jvm/java-17-openjdk-amd64",
59
+ "SPARK_HOME": "/usr/local/spark",
60
+ "PYSPARK_PYTHON": "python3",
61
+ "SPARK_LOG_DIR": "/workspace/spark_logs/kernel1",
62
+ "SPARK_LOCAL_DIRS": "/workspace/spark_logs/kernel1",
63
+ "PYTHONPATH": "/usr/local/spark/python/:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip"
64
+ }
65
+ }
66
+ JSON
67
+
68
+ # kernel 2
69
+ RUN cat > /usr/local/share/jupyter/kernels/spark_kernel2/kernel.json <<'JSON'
70
+ {
71
+ "argv": ["python3", "-m", "ipykernel_launcher", "-f", "{connection_file}"],
72
+ "display_name": "Spark Kernel 2",
73
+ "language": "python",
74
+ "env": {
75
+ "JAVA_HOME": "/usr/lib/jvm/java-17-openjdk-amd64",
76
+ "SPARK_HOME": "/usr/local/spark",
77
+ "PYSPARK_PYTHON": "python3",
78
+ "SPARK_LOG_DIR": "/workspace/spark_logs/kernel2",
79
+ "SPARK_LOCAL_DIRS": "/workspace/spark_logs/kernel2",
80
+ "PYTHONPATH": "/usr/local/spark/python/:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip"
81
+ }
82
+ }
83
+ JSON
84
 
85
  # ===== Ubah kembali ke user default Jupyter =====
86
  USER jovyan
87
+
88
  # ===== Port Hugging Face (7860) =====
89
  EXPOSE 7860
90
+
91
+ # ===== Jalankan JupyterLab di port 7860 =====
92
+ # Gunakan ServerApp flags (kompatibel dengan jupyter_server/jupyterlab terbaru)
93
+ CMD ["bash", "-c", "echo 'JupyterLab berjalan di port 7860' && jupyter lab --ip=0.0.0.0 --port=7860 --no-browser --ServerApp.token='' --ServerApp.password='' --NotebookApp.token='' --NotebookApp.password='' --LabApp.default_url=/lab"]