process_sci_data / Dockerfile
zhijian's picture
Update Dockerfile
e30f44e
raw
history blame
465 Bytes
FROM python:3.8
RUN apt update && apt install -y default-jdk default-jre git
ENV JAVA_HOME=/usr/lib/jvm/default-java
RUN pip install git+https://github.com/alibaba/data-juicer.git
RUN pip install fsspec==2023.3.0 jsonlines
RUN useradd -m -u 1000 dj
USER dj
ENV HOME=/home/dj
RUN cd $HOME/ && git clone https://github.com/alibaba/data-juicer.git && cd ../
WORKDIR $HOME/data-juicer/demos/process_sci_data
RUN chown -R dj $HOME/
CMD ["streamlit", "run", "app.py"]