From b2fb654371a07a77ba4a39f11395836c6b593527 Mon Sep 17 00:00:00 2001 From: MarkKoz Date: Sat, 28 Dec 2019 15:17:34 -0800 Subject: Mount only what's needed in the chroot jail devfs and sysfs were problematic since they were being mounted as tmpfs, which is r/w. For example, the Python process could write to cgroups. Now, only what is needed to run Python gets mounted. This boils down to the venv itself and some shared libraries Python needs. * Use a config file for NsJail instead of command-line options * Map 65534 (nobody) user & group inside the user namespace to 65534 outside the namespace rather than mapping to current uid/guid (which was 0 AKA root) --- .dockerignore | 2 +- README.md | 2 +- docker/venv.Dockerfile | 2 +- scripts/.profile | 18 +--------- snekbox.cfg | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++ snekbox/nsjail.py | 22 +++--------- 6 files changed, 99 insertions(+), 37 deletions(-) create mode 100644 snekbox.cfg diff --git a/.dockerignore b/.dockerignore index afc786a..4f43e08 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,8 +2,8 @@ * # Make exceptions for what's needed -!docker/.profile !snekbox +!snekbox.cfg !tests !Pipfile !Pipfile.lock diff --git a/README.md b/README.md index f1fcac5..d90609e 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ result <- | |<----------| | <----------+ The code is executed in a Python process that is launched through [NsJail](https://github.com/google/nsjail), which is responsible for sandboxing the Python process. NsJail is configured as follows: -* Root directory is mounted as read-only +* All mounts are read-only * Time limit of 2 seconds * Maximum of 1 PID * Maximum memory of 52428800 bytes diff --git a/docker/venv.Dockerfile b/docker/venv.Dockerfile index be15f08..b415430 100644 --- a/docker/venv.Dockerfile +++ b/docker/venv.Dockerfile @@ -7,7 +7,7 @@ ENV PIP_NO_CACHE_DIR=false \ PIPENV_NOSPIN=1 \ PIPENV_VENV_IN_PROJECT=1 -COPY Pipfile Pipfile.lock /snekbox/ +COPY Pipfile Pipfile.lock snekbox.cfg /snekbox/ WORKDIR /snekbox RUN if [ -n "${DEV}" ]; then pipenv sync --dev; else pipenv sync; fi diff --git a/scripts/.profile b/scripts/.profile index daaf1dd..47ee141 100644 --- a/scripts/.profile +++ b/scripts/.profile @@ -15,23 +15,7 @@ nsjpy() { echo "${MEM_MAX}" > /sys/fs/cgroup/memory/NSJAIL/memory.memsw.limit_in_bytes nsjail \ - -Mo \ - --rlimit_as 700 \ - --chroot / \ - -E LANG=en_US.UTF-8 \ - -E OMP_NUM_THREADS=1 \ - -E OPENBLAS_NUM_THREADS=1 \ - -E MKL_NUM_THREADS=1 \ - -E VECLIB_MAXIMUM_THREADS=1 \ - -E NUMEXPR_NUM_THREADS=1 \ - -R/usr -R/lib -R/lib64 \ - --user 65534 \ - --group 65534 \ - --time_limit 2 \ - --disable_proc \ - --iface_no_lo \ - --cgroup_pids_max=1 \ - --cgroup_mem_max="${MEM_MAX}" \ + --config "${NSJAIL_CFG:-/snekbox/snekbox.cfg}" \ $nsj_args -- \ /snekbox/.venv/bin/python3 -Iq -c "$@" } diff --git a/snekbox.cfg b/snekbox.cfg new file mode 100644 index 0000000..2f4a0e4 --- /dev/null +++ b/snekbox.cfg @@ -0,0 +1,90 @@ +name: "snekbox" +description: "Execute Python" + +mode: ONCE +hostname: "snekbox" +cwd: "/snekbox" + +time_limit: 2 + +keep_env: false +envar: "LANG=en_US.UTF-8" +envar: "OMP_NUM_THREADS=1" +envar: "OPENBLAS_NUM_THREADS=1" +envar: "MKL_NUM_THREADS=1" +envar: "VECLIB_MAXIMUM_THREADS=1" +envar: "NUMEXPR_NUM_THREADS=1" + +keep_caps: false + +rlimit_as: 700 + +clone_newnet: true +clone_newuser: true +clone_newns: true +clone_newpid: true +clone_newipc: true +clone_newuts: true +clone_newcgroup: true + +uidmap { + inside_id: "65534" + outside_id: "65534" +} + +gidmap { + inside_id: "65534" + outside_id: "65534" +} + +mount_proc: false + +mount { + src: "/etc/ld.so.cache" + dst: "/etc/ld.so.cache" + is_bind: true + rw: false +} + +mount { + src: "/lib" + dst: "/lib" + is_bind: true + rw: false +} + +mount { + src: "/lib64" + dst: "/lib64" + is_bind: true + rw: false +} + +mount { + src: "/snekbox" + dst: "/snekbox" + is_bind: true + rw: false +} + +mount { + src: "/usr/local/lib" + dst: "/usr/local/lib" + is_bind: true + rw: false +} + +cgroup_mem_max: 52428800 +cgroup_mem_mount: "/sys/fs/cgroup/memory" +cgroup_mem_parent: "NSJAIL" + +cgroup_pids_max: 1 +cgroup_pids_mount: "/sys/fs/cgroup/pids" +cgroup_pids_parent: "NSJAIL" + +iface_no_lo: true + +exec_bin { + path: "/snekbox/.venv/bin/python3" + arg: "-Iq" +} diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py index f160aa8..83d3b8d 100644 --- a/snekbox/nsjail.py +++ b/snekbox/nsjail.py @@ -24,6 +24,7 @@ CGROUP_PIDS_PARENT = Path("/sys/fs/cgroup/pids/NSJAIL") CGROUP_MEMORY_PARENT = Path("/sys/fs/cgroup/memory/NSJAIL") NSJAIL_PATH = os.getenv("NSJAIL_PATH", "/usr/sbin/nsjail") +NSJAIL_CFG = os.getenv("NSJAIL_CFG", "./snekbox.cfg") MEM_MAX = 52428800 @@ -31,9 +32,9 @@ class NsJail: """ Core Snekbox functionality, providing safe execution of Python code. - NsJail configuration: + Default NsJail configuration (snekbox.cfg): - - Root directory is mounted as read-only + - All mounts are read-only - Time limit of 2 seconds - Maximum of 1 PID - Maximum memory of 52428800 bytes @@ -117,21 +118,8 @@ class NsJail: """Execute Python 3 code in an isolated environment and return the completed process.""" with NamedTemporaryFile() as nsj_log: args = ( - self.nsjail_binary, "-Mo", - "--rlimit_as", "700", - "--chroot", "/", - "-E", "LANG=en_US.UTF-8", - "-E", "OMP_NUM_THREADS=1", - "-E", "OPENBLAS_NUM_THREADS=1", - "-E", "MKL_NUM_THREADS=1", - "-E", "VECLIB_MAXIMUM_THREADS=1", - "-E", "NUMEXPR_NUM_THREADS=1", - "-R/usr", "-R/lib", "-R/lib64", - "--user", "65534", # nobody - "--group", "65534", # nobody/nogroup - "--time_limit", "2", - "--disable_proc", - "--iface_no_lo", + self.nsjail_binary, + "--config", NSJAIL_CFG, "--log", nsj_log.name, f"--cgroup_mem_max={MEM_MAX}", "--cgroup_mem_mount", str(CGROUP_MEMORY_PARENT.parent), -- cgit v1.2.3