-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #57 from ministryofjustice/spark-update
Updated allspark-notebook to upstream spark-3.1.1 image
- Loading branch information
Showing
5 changed files
with
45 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,54 @@ | ||
FROM jupyter/all-spark-notebook:399cbb986c6b | ||
FROM jupyter/all-spark-notebook:spark-3.1.1@sha256:b73dad39ad5c469a92764e38d7cc4321040d3fedddcad7fcebc4ddc7f9c15ff2 | ||
|
||
LABEL [email protected] | ||
|
||
USER root | ||
ENV PATH=$PATH:$HOME/.local/bin | ||
|
||
# To match RStudio | ||
ENV NB_UID=1001 | ||
|
||
# Home directory contents is already owned by UID 1001 | ||
ENV CHOWN_HOME=no | ||
|
||
ENV PATH=$PATH:$HOME/.local/bin \ | ||
CHOWN_HOME=no \ | ||
PYSPARK_SUBMIT_ARGS="--packages com.amazonaws:aws-java-sdk:1.11.918,org.apache.hadoop:hadoop-aws:3.0.1 pyspark-shell" | ||
# `org.apache.hadoop:hadoop-aws` version must match `pyspark` version | ||
# NB these are sensible defaults but may need to be changed programatically for | ||
# non local spark (ie. EMR etc.) | ||
ENV PYSPARK_SUBMIT_ARGS="--packages com.amazonaws:aws-java-sdk:1.11.918,org.apache.hadoop:hadoop-aws:3.0.1 pyspark-shell" | ||
|
||
RUN apt-get update && apt-get install -y \ | ||
ca-certificates-java \ | ||
openjdk-8-jdk \ | ||
openssh-client \ | ||
software-properties-common \ | ||
# Container must be run as root to use NB_UID | ||
USER root | ||
|
||
# Install OS pacakges | ||
# | ||
# The reason we have installed these has been lost. Including just in case. | ||
# | ||
# - gdal-bin | ||
# - libspatialindex-dev | ||
# - openssh-client | ||
# | ||
RUN apt-get update && \ | ||
apt-get install -y \ | ||
gdal-bin \ | ||
libspatialindex-dev \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
openssh-client && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
COPY files/pyspark-s3.py /tmp/pyspark-s3.py | ||
# I'm not sure this has any affect | ||
COPY files/hdfs-site.xml /usr/local/spark/conf/hdfs-site.xml | ||
|
||
RUN usermod -a -G "staff,users" "${NB_USER}" \ | ||
&& update-alternatives --set editor /bin/nano-tiny | ||
# add-user-to-group.sh add the $NB_USER to group 50 (staff) used by RStudio | ||
COPY files/add-user-to-group.sh /usr/local/bin/before-notebook.d/ | ||
|
||
USER $NB_USER | ||
# Install pythong packages | ||
# - pip - python package manager | ||
# - boto3 - python AWS library | ||
# - nbstripout - tool for stripping sensitive data out of notebooks | ||
# | ||
RUN pip install --upgrade \ | ||
pip \ | ||
boto3 \ | ||
pyspark==3.0.1 \ | ||
nbstripout \ | ||
etl-manager==7.3.0 \ | ||
gluejobutils==3.1.1 | ||
dataengineeringutils3==1.3.0 \ | ||
etl-manager==7.4.0 | ||
|
||
# Vi just doesn't cut it for some people | ||
RUN update-alternatives --set editor /bin/nano-tiny |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
usermod -a -G 50 "${NB_USER}" |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters