diff options
author | 2023-08-30 13:51:15 +0100 | |
---|---|---|
committer | 2023-08-30 13:51:15 +0100 | |
commit | 08e7636601733a939e024962f6e2013311928d63 (patch) | |
tree | d503649f9fd4481c45398736d5098b563072ff69 | |
parent | Update tests to account for shared memory (diff) | |
parent | Merge pull request #185 from python-discord/jb3/refactor-eval-deps (diff) |
Merge branch 'main' into jb3/shared-mem
-rw-r--r-- | .dockerignore | 2 | ||||
-rw-r--r-- | .github/CONTRIBUTING.md | 8 | ||||
-rw-r--r-- | Dockerfile | 82 | ||||
-rw-r--r-- | LICENSE-THIRD-PARTY (renamed from NOTICE) | 39 | ||||
-rw-r--r-- | README.md | 12 | ||||
-rw-r--r-- | config/snekbox.cfg | 33 | ||||
-rw-r--r-- | deployment.yaml | 48 | ||||
-rw-r--r-- | requirements/eval-deps.pip | 24 | ||||
-rw-r--r-- | requirements/pip-tools.in | 4 | ||||
-rw-r--r-- | requirements/pip-tools.pip | 2 | ||||
-rwxr-xr-x | scripts/build_python.sh | 22 | ||||
-rw-r--r-- | snekbox/memfs.py | 18 | ||||
-rw-r--r-- | snekbox/nsjail.py | 4 | ||||
-rw-r--r-- | tests/test_nsjail.py | 63 |
14 files changed, 247 insertions, 114 deletions
diff --git a/.dockerignore b/.dockerignore index 6a360ff..ab815cd 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,6 +9,6 @@ !snekbox/ !tests/ !LICENSE -!NOTICE +!LICENSE-THIRD-PARTY !pyproject.toml !README.md diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index d0a6921..1124b8e 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -62,6 +62,14 @@ Updating NsJail mainly involves two steps: Other things to look out for are breaking changes to NsJail's config format, its command-line interface, or its logging format. Additionally, dependencies may have to be adjusted in the Dockerfile to get a new version to build or run. +## Adding and Updating Python Interpreters + +Python interpreters are built using pyenv via the `scripts/build_python.sh` helper script. This script accepts a pyenv version specifier (`pyenv install --list`) and builds the interpreter in a version-specific directory under `/lang/python`. In the image, each minor version of a Python interpreter should have its own build stage and the resulting `/lang/python` directory can be copied from that stage into the `base` stage. + +When updating a patch version (e.g. 3.11.3 to 3.11.4), edit the existing build stage in the image for the minor version (3.11); do not add a new build stage. To have access to a new version, pyenv likely needs to be updated. To do so, change the tag in the `git clone` command in the image, but only for the build stage that needs access to the new version. Updating pyenv for all build stages will just cause unnecessary build cache invalidations. + +To change the default interpreter used by NsJail, update the target of the `/lang/python/default` symlink created in the `base` stage. + [readme]: ../README.md [Dockerfile]: ../Dockerfile [Compose v2]: https://docs.docker.com/compose/compose-v2/ @@ -1,52 +1,73 @@ -# syntax=docker/dockerfile:1 -FROM python:3.11-slim-buster as builder +# syntax=docker/dockerfile:1.4 +FROM buildpack-deps:buster as builder-nsjail WORKDIR /nsjail RUN apt-get -y update \ - && apt-get install -y \ - bison=2:3.3.* \ - flex=2.6.* \ - g++=4:8.3.* \ - gcc=4:8.3.* \ - git=1:2.20.* \ - libprotobuf-dev=3.6.* \ - libnl-route-3-dev=3.4.* \ - make=4.2.* \ - pkg-config=0.29-6 \ - protobuf-compiler=3.6.* + && apt-get install -y --no-install-recommends \ + bison\ + flex \ + libprotobuf-dev\ + libnl-route-3-dev \ + protobuf-compiler \ + && rm -rf /var/lib/apt/lists/* + RUN git clone -b master --single-branch https://github.com/google/nsjail.git . \ && git checkout dccf911fd2659e7b08ce9507c25b2b38ec2c5800 RUN make # ------------------------------------------------------------------------------ +FROM buildpack-deps:buster as builder-py-base + +ENV PYENV_ROOT=/pyenv \ + PYTHON_CONFIGURE_OPTS='--disable-test-modules --enable-optimizations \ + --with-lto --with-system-expat --without-ensurepip' + +RUN apt-get -y update \ + && apt-get install -y --no-install-recommends \ + libxmlsec1-dev \ + tk-dev \ + && rm -rf /var/lib/apt/lists/* + +COPY --link scripts/build_python.sh / + +# ------------------------------------------------------------------------------ +FROM builder-py-base as builder-py-3_11 +RUN git clone -b v2.3.24 --depth 1 https://github.com/pyenv/pyenv.git $PYENV_ROOT \ + && /build_python.sh 3.11.4 + +# ------------------------------------------------------------------------------ +FROM builder-py-base as builder-py-3_12 +RUN git clone -b v2.3.24 --depth 1 https://github.com/pyenv/pyenv.git $PYENV_ROOT \ + && /build_python.sh 3.12.0rc1 + +# ------------------------------------------------------------------------------ FROM python:3.11-slim-buster as base -# Everything will be a user install to allow snekbox's dependencies to be kept -# separate from the packages exposed during eval. -ENV PATH=/root/.local/bin:$PATH \ - PIP_DISABLE_PIP_VERSION_CHECK=1 \ - PIP_NO_CACHE_DIR=false \ - PIP_USER=1 +ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_NO_CACHE_DIR=false RUN apt-get -y update \ - && apt-get install -y \ - gcc=4:8.3.* \ - git=1:2.20.* \ - libnl-route-3-200=3.4.* \ - libprotobuf17=3.6.* \ + && apt-get install -y --no-install-recommends \ + gcc \ + git \ + libnl-route-3-200 \ + libprotobuf17 \ && rm -rf /var/lib/apt/lists/* -COPY --from=builder /nsjail/nsjail /usr/sbin/ -RUN chmod +x /usr/sbin/nsjail +COPY --link --from=builder-nsjail /nsjail/nsjail /usr/sbin/ +COPY --link --from=builder-py-3_11 /lang/ /lang/ +COPY --link --from=builder-py-3_12 /lang/ /lang/ + +RUN chmod +x /usr/sbin/nsjail \ + && ln -s /lang/python/3.11/ /lang/python/default # ------------------------------------------------------------------------------ FROM base as venv -COPY requirements/ /snekbox/requirements/ +COPY --link requirements/ /snekbox/requirements/ WORKDIR /snekbox -# pip installs to the default user site since PIP_USER is set. RUN pip install -U -r requirements/requirements.pip # This must come after the first pip command! From the docs: @@ -58,11 +79,12 @@ ARG DEV RUN if [ -n "${DEV}" ]; \ then \ pip install -U -r requirements/coverage.pip \ - && PYTHONUSERBASE=/snekbox/user_base pip install numpy~=1.19; \ + && export PYTHONUSERBASE=/snekbox/user_base \ + && /lang/python/default/bin/python -m pip install --user numpy~=1.19; \ fi # At the end to avoid re-installing dependencies when only a config changes. -COPY config/ /snekbox/config/ +COPY --link config/ /snekbox/config/ ENTRYPOINT ["gunicorn"] CMD ["-c", "config/gunicorn.conf.py"] diff --git a/NOTICE b/LICENSE-THIRD-PARTY index b6e5fbc..684f2df 100644 --- a/NOTICE +++ b/LICENSE-THIRD-PARTY @@ -1,9 +1,36 @@ -The Python code at snekbox/config_pb2.py was generated from config.proto in nsjail -Copyright 2014 Google Inc. All Rights Reserved. -Copyright 2016 Sergiusz Bazanski. All Rights Reserved. - -------------------------------------------------------------------------------- - +-------------------------------------------------------------------------------- + MIT License +Applies to: + - Copyright (c) 2014 Docker, Inc. + - scripts/build_python.sh: find command for de-bloating Python install +-------------------------------------------------------------------------------- + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + Apache License, Version 2.0 +Applies to: + - Copyright 2014 Google Inc. All Rights Reserved. + Copyright 2016 Sergiusz Bazanski. All Rights Reserved. + - snekbox/config_pb2.py: generated from config.proto in nsjail +-------------------------------------------------------------------------------- Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -7,8 +7,7 @@ Python sandbox runners for executing code in isolation aka snekbox. -Supports a memory [virtual read/write file system](#virtual-file-system) within the sandbox, -allowing text or binary files to be sent and returned. +Supports a memory [virtual read/write file system](#virtual-file-system) within the sandbox, allowing text or binary files to be sent and returned. A client sends Python code to a snekbox, the snekbox executes the code, and finally the results of the execution are returned to the client. @@ -100,22 +99,19 @@ Name | Description ## Third-party Packages -By default, the Python interpreter has no access to any packages besides the -standard library. Even snekbox's own dependencies like Falcon and Gunicorn are -not exposed. +By default, the Python interpreter has no access to any packages besides the standard library. Even snekbox's own dependencies like Falcon and Gunicorn are not exposed. To expose third-party Python packages during evaluation, install them to a custom user site: ```sh -docker exec snekbox /bin/sh -c 'PYTHONUSERBASE=/snekbox/user_base pip install numpy' +docker exec snekbox /bin/sh -c \ + 'PYTHONUSERBASE=/snekbox/user_base /lang/python/default/bin/python -m pip install --user numpy' ``` In the above command, `snekbox` is the name of the running container. The name may be different and can be checked with `docker ps`. The packages will be installed to the user site within `/snekbox/user_base`. To persist the installed packages, a volume for the directory can be created with Docker. For an example, see [`docker-compose.yml`]. -If `pip`, `setuptools`, or `wheel` are dependencies or need to be exposed, then use the `--ignore-installed` option with pip. However, note that this will also re-install packages present in the custom user site, effectively making caching it futile. Current limitations of pip don't allow it to ignore packages extant outside the installation destination. - ## Development Environment See [CONTRIBUTING.md](.github/CONTRIBUTING.md). diff --git a/config/snekbox.cfg b/config/snekbox.cfg index c5d7ae3..1bd2ab6 100644 --- a/config/snekbox.cfg +++ b/config/snekbox.cfg @@ -14,8 +14,10 @@ envar: "OPENBLAS_NUM_THREADS=5" envar: "MKL_NUM_THREADS=5" envar: "VECLIB_MAXIMUM_THREADS=5" envar: "NUMEXPR_NUM_THREADS=5" -envar: "PYTHONPATH=/snekbox/user_base/lib/python3.11/site-packages" +envar: "PYTHONDONTWRITEBYTECODE=true" envar: "PYTHONIOENCODING=utf-8:strict" +envar: "PYTHONUNBUFFERED=true" +envar: "PYTHONUSERBASE=/snekbox/user_base" envar: "HOME=home" keep_caps: false @@ -79,29 +81,8 @@ mount { } mount { - src: "/usr/local/lib" - dst: "/usr/local/lib" - is_bind: true - rw: false -} - -mount { - src: "/usr/local/bin/python" - dst: "/usr/local/bin/python" - is_bind: true - rw: false -} - -mount { - src: "/usr/local/bin/python3" - dst: "/usr/local/bin/python3" - is_bind: true - rw: false -} - -mount { - src: "/usr/local/bin/python3.11" - dst: "/usr/local/bin/python3.11" + src: "/lang" + dst: "/lang" is_bind: true rw: false } @@ -124,6 +105,6 @@ cgroup_pids_mount: "/sys/fs/cgroup/pids" iface_no_lo: true exec_bin { - path: "/usr/local/bin/python" - arg: "-BSqu" + path: "/lang/python/default/bin/python" + arg: "" } diff --git a/deployment.yaml b/deployment.yaml index b0856f3..3ce8d00 100644 --- a/deployment.yaml +++ b/deployment.yaml @@ -12,6 +12,22 @@ spec: labels: app: snekbox spec: + initContainers: + - name: deps-install + image: ghcr.io/python-discord/snekbox:latest + imagePullPolicy: Always + volumeMounts: + - name: snekbox-user-base-volume + mountPath: /snekbox/user_base + env: + - name: PYTHONUSERBASE + value: /snekbox/user_base + command: + - "/bin/sh" + - "-c" + - >- + find /lang/python -mindepth 1 -maxdepth 1 -type d -exec + {}/bin/python -m pip install --user -U -r requirements/eval-deps.pip \; containers: - name: snekbox image: ghcr.io/python-discord/snekbox:latest @@ -23,38 +39,6 @@ spec: volumeMounts: - name: snekbox-user-base-volume mountPath: /snekbox/user_base - lifecycle: - postStart: - exec: - command: - - "/bin/sh" - - "-c" - - >- - PYTHONUSERBASE=/snekbox/user_base - pip install --user --upgrade - anyio[trio]~=3.6 - arrow~=1.2 - attrs~=22.2 - beautifulsoup4~=4.11 - einspect~=0.5 - fishhook~=0.2 - forbiddenfruit~=0.1 - fuzzywuzzy~=0.18 - lark~=1.1 - matplotlib~=3.6 - more-itertools~=9.0 - networkx~=3.0 - numpy~=1.24 - pandas~=1.5 - pendulum~=2.1 - python-dateutil~=2.8 - pyyaml~=6.0 - scipy~=1.10 - sympy~=1.11 - toml~=0.10 - typing-extensions~=4.4 - tzdata~=2022.7 - yarl~=1.8 volumes: - name: snekbox-user-base-volume hostPath: diff --git a/requirements/eval-deps.pip b/requirements/eval-deps.pip new file mode 100644 index 0000000..79f8218 --- /dev/null +++ b/requirements/eval-deps.pip @@ -0,0 +1,24 @@ +anyio[trio]~=3.6 +arrow~=1.2 +attrs~=22.2 +beautifulsoup4~=4.11 +einspect~=0.5 +fishhook~=0.2 +forbiddenfruit~=0.1 +fuzzywuzzy~=0.18 +lark~=1.1 +matplotlib~=3.6 ; python_version == '3.11' +more-itertools~=9.0 +networkx~=3.0 +numpy~=1.24 ; python_version == '3.11' +numpy==1.26.0b1 ; python_version == '3.12' +pandas~=1.5 ; python_version == '3.11' +pendulum~=2.1 ; python_version == '3.11' +python-dateutil~=2.8 +pyyaml~=6.0 +scipy~=1.10 +sympy~=1.11 +toml~=0.10 +typing-extensions~=4.4 +tzdata~=2022.7 +yarl~=1.8 ; python_version == '3.11' diff --git a/requirements/pip-tools.in b/requirements/pip-tools.in index e459df9..29d8d31 100644 --- a/requirements/pip-tools.in +++ b/requirements/pip-tools.in @@ -2,5 +2,5 @@ -c lint.pip -c requirements.pip -# Minimum version which supports pip>=22.1 -pip-tools>=6.6.1 +# Minimum version which supports pip>=23.2 +pip-tools>=7.0.0 diff --git a/requirements/pip-tools.pip b/requirements/pip-tools.pip index d87f3d6..4793c2d 100644 --- a/requirements/pip-tools.pip +++ b/requirements/pip-tools.pip @@ -10,7 +10,7 @@ click==8.1.3 # via pip-tools packaging==23.0 # via build -pip-tools==6.12.3 +pip-tools==7.3.0 # via -r requirements/pip-tools.in pyproject-hooks==1.0.0 # via build diff --git a/scripts/build_python.sh b/scripts/build_python.sh new file mode 100755 index 0000000..da937c2 --- /dev/null +++ b/scripts/build_python.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -euxo pipefail +shopt -s inherit_errexit + +py_version="${1}" + +# Install Python interpreter under e.g. /lang/python/3.11/ (no patch version). +"${PYENV_ROOT}/plugins/python-build/bin/python-build" \ + "${py_version}" \ + "/lang/python/${py_version%.*}" +"/lang/python/${py_version%.*}/bin/python" -m pip install -U pip + +# Clean up some unnecessary files to reduce image size bloat. +find /lang/python/ -depth \ +\( \ + \( -type d -a \( \ + -name test -o -name tests -o -name idle_test \ + \) \) \ + -o \( -type f -a \( \ + -name '*.pyc' -o -name '*.pyo' -o -name 'libpython*.a' \ + \) \) \ +\) -exec rm -rf '{}' + diff --git a/snekbox/memfs.py b/snekbox/memfs.py index 991766b..40b57c4 100644 --- a/snekbox/memfs.py +++ b/snekbox/memfs.py @@ -144,6 +144,7 @@ class MemFS: """ start_time = time.monotonic() count = 0 + total_size = 0 files = glob.iglob(pattern, root_dir=str(self.output), recursive=True, include_hidden=False) for file in (Path(self.output, f) for f in files): if timeout and (time.monotonic() - start_time) > timeout: @@ -152,10 +153,15 @@ class MemFS: if not file.is_file(): continue + # file.is_file allows file to be a regular file OR a symlink pointing to a regular file. + # It is important that we follow symlinks here, so when we check st_size later it is the + # size of the underlying file rather than of the symlink. + stat = file.stat(follow_symlinks=True) + if exclude_files and (orig_time := exclude_files.get(file)): - new_time = file.stat().st_mtime + new_time = stat.st_mtime log.info(f"Checking {file.name} ({orig_time=}, {new_time=})") - if file.stat().st_mtime == orig_time: + if stat.st_mtime == orig_time: log.info(f"Skipping {file.name!r} as it has not been modified") continue @@ -163,6 +169,14 @@ class MemFS: log.info(f"Max attachments {limit} reached, skipping remaining files") break + # Due to sparse files and links the total size could end up being greater + # than the size limit of the tmpfs. Limit the total size to be read to + # prevent high memory usage / OOM when reading files. + total_size += stat.st_size + if total_size > self.instance_size: + log.info(f"Max file size {self.instance_size} reached, skipping remaining files") + break + count += 1 log.info(f"Found valid file for upload {file.name!r}") yield FileAttachment.from_path(file, relative_to=self.output) diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py index f64830a..1de7b1e 100644 --- a/snekbox/nsjail.py +++ b/snekbox/nsjail.py @@ -221,9 +221,9 @@ class NsJail: *nsjail_args, "--", self.config.exec_bin.path, - *self.config.exec_bin.arg, - # Filter out empty strings at start of py_args + # Filter out empty strings at start of Python args # (causes issues with python cli) + *iter_lstrip(self.config.exec_bin.arg), *iter_lstrip(py_args), ] diff --git a/tests/test_nsjail.py b/tests/test_nsjail.py index 61d0e8b..fe55290 100644 --- a/tests/test_nsjail.py +++ b/tests/test_nsjail.py @@ -82,7 +82,7 @@ class NsJailTests(unittest.TestCase): for _ in range({max_pids}): print(subprocess.Popen( [ - '/usr/local/bin/python3', + '/lang/python/default/bin/python', '-c', 'import time; time.sleep(1)' ], @@ -240,8 +240,9 @@ class NsJailTests(unittest.TestCase): os.symlink("file", f"file{i}") """ ).strip() - - nsjail = NsJail(memfs_instance_size=32 * Size.MiB, files_timeout=1) + # A value higher than the actual memory needed is used to avoid the limit + # on total file size being reached before the timeout when reading. + nsjail = NsJail(memfs_instance_size=512 * Size.MiB, files_timeout=1) result = nsjail.python3(["-c", code]) self.assertEqual(result.returncode, None) self.assertEqual( @@ -272,6 +273,60 @@ class NsJailTests(unittest.TestCase): ) self.assertEqual(result.stderr, None) + def test_file_parsing_size_limit_sparse_files(self): + tmpfs_size = 8 * Size.MiB + code = dedent( + f""" + import os + with open("test.txt", "w") as f: + os.truncate(f.fileno(), {tmpfs_size // 2 + 1}) + + with open("test2.txt", "w") as f: + os.truncate(f.fileno(), {tmpfs_size // 2 + 1}) + """ + ) + nsjail = NsJail(memfs_instance_size=tmpfs_size, files_timeout=5) + result = nsjail.python3(["-c", code]) + self.assertEqual(result.returncode, 0) + self.assertEqual(len(result.files), 1) + + def test_file_parsing_size_limit_sparse_files_large(self): + tmpfs_size = 8 * Size.MiB + code = dedent( + f""" + import os + with open("test.txt", "w") as f: + # Use a very large value to ensure the test fails if the + # file is read even if would have been discarded later. + os.truncate(f.fileno(), {1024 * Size.TiB}) + """ + ) + nsjail = NsJail(memfs_instance_size=tmpfs_size, files_timeout=5) + result = nsjail.python3(["-c", code]) + self.assertEqual(result.returncode, 0) + self.assertEqual(len(result.files), 0) + + def test_file_parsing_size_limit_symlinks(self): + tmpfs_size = 8 * Size.MiB + code = dedent( + f""" + import os + data = "a" * 1024 + size = {tmpfs_size // 8} + + with open("file", "w") as f: + for _ in range(size // 1024): + f.write(data) + + for i in range(20): + os.symlink("file", f"file{{i}}") + """ + ) + nsjail = NsJail(memfs_instance_size=tmpfs_size, files_timeout=5) + result = nsjail.python3(["-c", code]) + self.assertEqual(result.returncode, 0) + self.assertEqual(len(result.files), 8) + def test_file_write_error(self): """Test errors during file write.""" result = self.nsjail.python3( @@ -478,7 +533,7 @@ class NsJailTests(unittest.TestCase): for args, expected in cases: with self.subTest(args=args): result = self.nsjail.python3(py_args=args) - idx = result.args.index("-BSqu") + idx = result.args.index(self.nsjail.config.exec_bin.path) self.assertEqual(result.args[idx + 1 :], expected) self.assertEqual(result.returncode, 0) |