aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.dockerignore2
-rw-r--r--.github/CONTRIBUTING.md8
-rw-r--r--Dockerfile82
-rw-r--r--LICENSE-THIRD-PARTY (renamed from NOTICE)39
-rw-r--r--README.md12
-rw-r--r--config/snekbox.cfg33
-rw-r--r--deployment.yaml48
-rw-r--r--requirements/eval-deps.pip24
-rw-r--r--requirements/pip-tools.in4
-rw-r--r--requirements/pip-tools.pip2
-rwxr-xr-xscripts/build_python.sh22
-rw-r--r--snekbox/memfs.py18
-rw-r--r--snekbox/nsjail.py4
-rw-r--r--tests/test_nsjail.py63
14 files changed, 247 insertions, 114 deletions
diff --git a/.dockerignore b/.dockerignore
index 6a360ff..ab815cd 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,6 +9,6 @@
!snekbox/
!tests/
!LICENSE
-!NOTICE
+!LICENSE-THIRD-PARTY
!pyproject.toml
!README.md
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index d0a6921..1124b8e 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -62,6 +62,14 @@ Updating NsJail mainly involves two steps:
Other things to look out for are breaking changes to NsJail's config format, its command-line interface, or its logging format. Additionally, dependencies may have to be adjusted in the Dockerfile to get a new version to build or run.
+## Adding and Updating Python Interpreters
+
+Python interpreters are built using pyenv via the `scripts/build_python.sh` helper script. This script accepts a pyenv version specifier (`pyenv install --list`) and builds the interpreter in a version-specific directory under `/lang/python`. In the image, each minor version of a Python interpreter should have its own build stage and the resulting `/lang/python` directory can be copied from that stage into the `base` stage.
+
+When updating a patch version (e.g. 3.11.3 to 3.11.4), edit the existing build stage in the image for the minor version (3.11); do not add a new build stage. To have access to a new version, pyenv likely needs to be updated. To do so, change the tag in the `git clone` command in the image, but only for the build stage that needs access to the new version. Updating pyenv for all build stages will just cause unnecessary build cache invalidations.
+
+To change the default interpreter used by NsJail, update the target of the `/lang/python/default` symlink created in the `base` stage.
+
[readme]: ../README.md
[Dockerfile]: ../Dockerfile
[Compose v2]: https://docs.docker.com/compose/compose-v2/
diff --git a/Dockerfile b/Dockerfile
index 6355dac..6b5e2f6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,52 +1,73 @@
-# syntax=docker/dockerfile:1
-FROM python:3.11-slim-buster as builder
+# syntax=docker/dockerfile:1.4
+FROM buildpack-deps:buster as builder-nsjail
WORKDIR /nsjail
RUN apt-get -y update \
- && apt-get install -y \
- bison=2:3.3.* \
- flex=2.6.* \
- g++=4:8.3.* \
- gcc=4:8.3.* \
- git=1:2.20.* \
- libprotobuf-dev=3.6.* \
- libnl-route-3-dev=3.4.* \
- make=4.2.* \
- pkg-config=0.29-6 \
- protobuf-compiler=3.6.*
+ && apt-get install -y --no-install-recommends \
+ bison\
+ flex \
+ libprotobuf-dev\
+ libnl-route-3-dev \
+ protobuf-compiler \
+ && rm -rf /var/lib/apt/lists/*
+
RUN git clone -b master --single-branch https://github.com/google/nsjail.git . \
&& git checkout dccf911fd2659e7b08ce9507c25b2b38ec2c5800
RUN make
# ------------------------------------------------------------------------------
+FROM buildpack-deps:buster as builder-py-base
+
+ENV PYENV_ROOT=/pyenv \
+ PYTHON_CONFIGURE_OPTS='--disable-test-modules --enable-optimizations \
+ --with-lto --with-system-expat --without-ensurepip'
+
+RUN apt-get -y update \
+ && apt-get install -y --no-install-recommends \
+ libxmlsec1-dev \
+ tk-dev \
+ && rm -rf /var/lib/apt/lists/*
+
+COPY --link scripts/build_python.sh /
+
+# ------------------------------------------------------------------------------
+FROM builder-py-base as builder-py-3_11
+RUN git clone -b v2.3.24 --depth 1 https://github.com/pyenv/pyenv.git $PYENV_ROOT \
+ && /build_python.sh 3.11.4
+
+# ------------------------------------------------------------------------------
+FROM builder-py-base as builder-py-3_12
+RUN git clone -b v2.3.24 --depth 1 https://github.com/pyenv/pyenv.git $PYENV_ROOT \
+ && /build_python.sh 3.12.0rc1
+
+# ------------------------------------------------------------------------------
FROM python:3.11-slim-buster as base
-# Everything will be a user install to allow snekbox's dependencies to be kept
-# separate from the packages exposed during eval.
-ENV PATH=/root/.local/bin:$PATH \
- PIP_DISABLE_PIP_VERSION_CHECK=1 \
- PIP_NO_CACHE_DIR=false \
- PIP_USER=1
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \
+ PIP_NO_CACHE_DIR=false
RUN apt-get -y update \
- && apt-get install -y \
- gcc=4:8.3.* \
- git=1:2.20.* \
- libnl-route-3-200=3.4.* \
- libprotobuf17=3.6.* \
+ && apt-get install -y --no-install-recommends \
+ gcc \
+ git \
+ libnl-route-3-200 \
+ libprotobuf17 \
&& rm -rf /var/lib/apt/lists/*
-COPY --from=builder /nsjail/nsjail /usr/sbin/
-RUN chmod +x /usr/sbin/nsjail
+COPY --link --from=builder-nsjail /nsjail/nsjail /usr/sbin/
+COPY --link --from=builder-py-3_11 /lang/ /lang/
+COPY --link --from=builder-py-3_12 /lang/ /lang/
+
+RUN chmod +x /usr/sbin/nsjail \
+ && ln -s /lang/python/3.11/ /lang/python/default
# ------------------------------------------------------------------------------
FROM base as venv
-COPY requirements/ /snekbox/requirements/
+COPY --link requirements/ /snekbox/requirements/
WORKDIR /snekbox
-# pip installs to the default user site since PIP_USER is set.
RUN pip install -U -r requirements/requirements.pip
# This must come after the first pip command! From the docs:
@@ -58,11 +79,12 @@ ARG DEV
RUN if [ -n "${DEV}" ]; \
then \
pip install -U -r requirements/coverage.pip \
- && PYTHONUSERBASE=/snekbox/user_base pip install numpy~=1.19; \
+ && export PYTHONUSERBASE=/snekbox/user_base \
+ && /lang/python/default/bin/python -m pip install --user numpy~=1.19; \
fi
# At the end to avoid re-installing dependencies when only a config changes.
-COPY config/ /snekbox/config/
+COPY --link config/ /snekbox/config/
ENTRYPOINT ["gunicorn"]
CMD ["-c", "config/gunicorn.conf.py"]
diff --git a/NOTICE b/LICENSE-THIRD-PARTY
index b6e5fbc..684f2df 100644
--- a/NOTICE
+++ b/LICENSE-THIRD-PARTY
@@ -1,9 +1,36 @@
-The Python code at snekbox/config_pb2.py was generated from config.proto in nsjail
-Copyright 2014 Google Inc. All Rights Reserved.
-Copyright 2016 Sergiusz Bazanski. All Rights Reserved.
-
--------------------------------------------------------------------------------
-
+--------------------------------------------------------------------------------
+ MIT License
+Applies to:
+ - Copyright (c) 2014 Docker, Inc.
+ - scripts/build_python.sh: find command for de-bloating Python install
+--------------------------------------------------------------------------------
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+--------------------------------------------------------------------------------
+ Apache License, Version 2.0
+Applies to:
+ - Copyright 2014 Google Inc. All Rights Reserved.
+ Copyright 2016 Sergiusz Bazanski. All Rights Reserved.
+ - snekbox/config_pb2.py: generated from config.proto in nsjail
+--------------------------------------------------------------------------------
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
diff --git a/README.md b/README.md
index 0b23848..6916090 100644
--- a/README.md
+++ b/README.md
@@ -7,8 +7,7 @@
Python sandbox runners for executing code in isolation aka snekbox.
-Supports a memory [virtual read/write file system](#virtual-file-system) within the sandbox,
-allowing text or binary files to be sent and returned.
+Supports a memory [virtual read/write file system](#virtual-file-system) within the sandbox, allowing text or binary files to be sent and returned.
A client sends Python code to a snekbox, the snekbox executes the code, and finally the results of the execution are returned to the client.
@@ -100,22 +99,19 @@ Name | Description
## Third-party Packages
-By default, the Python interpreter has no access to any packages besides the
-standard library. Even snekbox's own dependencies like Falcon and Gunicorn are
-not exposed.
+By default, the Python interpreter has no access to any packages besides the standard library. Even snekbox's own dependencies like Falcon and Gunicorn are not exposed.
To expose third-party Python packages during evaluation, install them to a custom user site:
```sh
-docker exec snekbox /bin/sh -c 'PYTHONUSERBASE=/snekbox/user_base pip install numpy'
+docker exec snekbox /bin/sh -c \
+ 'PYTHONUSERBASE=/snekbox/user_base /lang/python/default/bin/python -m pip install --user numpy'
```
In the above command, `snekbox` is the name of the running container. The name may be different and can be checked with `docker ps`.
The packages will be installed to the user site within `/snekbox/user_base`. To persist the installed packages, a volume for the directory can be created with Docker. For an example, see [`docker-compose.yml`].
-If `pip`, `setuptools`, or `wheel` are dependencies or need to be exposed, then use the `--ignore-installed` option with pip. However, note that this will also re-install packages present in the custom user site, effectively making caching it futile. Current limitations of pip don't allow it to ignore packages extant outside the installation destination.
-
## Development Environment
See [CONTRIBUTING.md](.github/CONTRIBUTING.md).
diff --git a/config/snekbox.cfg b/config/snekbox.cfg
index c5d7ae3..1bd2ab6 100644
--- a/config/snekbox.cfg
+++ b/config/snekbox.cfg
@@ -14,8 +14,10 @@ envar: "OPENBLAS_NUM_THREADS=5"
envar: "MKL_NUM_THREADS=5"
envar: "VECLIB_MAXIMUM_THREADS=5"
envar: "NUMEXPR_NUM_THREADS=5"
-envar: "PYTHONPATH=/snekbox/user_base/lib/python3.11/site-packages"
+envar: "PYTHONDONTWRITEBYTECODE=true"
envar: "PYTHONIOENCODING=utf-8:strict"
+envar: "PYTHONUNBUFFERED=true"
+envar: "PYTHONUSERBASE=/snekbox/user_base"
envar: "HOME=home"
keep_caps: false
@@ -79,29 +81,8 @@ mount {
}
mount {
- src: "/usr/local/lib"
- dst: "/usr/local/lib"
- is_bind: true
- rw: false
-}
-
-mount {
- src: "/usr/local/bin/python"
- dst: "/usr/local/bin/python"
- is_bind: true
- rw: false
-}
-
-mount {
- src: "/usr/local/bin/python3"
- dst: "/usr/local/bin/python3"
- is_bind: true
- rw: false
-}
-
-mount {
- src: "/usr/local/bin/python3.11"
- dst: "/usr/local/bin/python3.11"
+ src: "/lang"
+ dst: "/lang"
is_bind: true
rw: false
}
@@ -124,6 +105,6 @@ cgroup_pids_mount: "/sys/fs/cgroup/pids"
iface_no_lo: true
exec_bin {
- path: "/usr/local/bin/python"
- arg: "-BSqu"
+ path: "/lang/python/default/bin/python"
+ arg: ""
}
diff --git a/deployment.yaml b/deployment.yaml
index b0856f3..3ce8d00 100644
--- a/deployment.yaml
+++ b/deployment.yaml
@@ -12,6 +12,22 @@ spec:
labels:
app: snekbox
spec:
+ initContainers:
+ - name: deps-install
+ image: ghcr.io/python-discord/snekbox:latest
+ imagePullPolicy: Always
+ volumeMounts:
+ - name: snekbox-user-base-volume
+ mountPath: /snekbox/user_base
+ env:
+ - name: PYTHONUSERBASE
+ value: /snekbox/user_base
+ command:
+ - "/bin/sh"
+ - "-c"
+ - >-
+ find /lang/python -mindepth 1 -maxdepth 1 -type d -exec
+ {}/bin/python -m pip install --user -U -r requirements/eval-deps.pip \;
containers:
- name: snekbox
image: ghcr.io/python-discord/snekbox:latest
@@ -23,38 +39,6 @@ spec:
volumeMounts:
- name: snekbox-user-base-volume
mountPath: /snekbox/user_base
- lifecycle:
- postStart:
- exec:
- command:
- - "/bin/sh"
- - "-c"
- - >-
- PYTHONUSERBASE=/snekbox/user_base
- pip install --user --upgrade
- anyio[trio]~=3.6
- arrow~=1.2
- attrs~=22.2
- beautifulsoup4~=4.11
- einspect~=0.5
- fishhook~=0.2
- forbiddenfruit~=0.1
- fuzzywuzzy~=0.18
- lark~=1.1
- matplotlib~=3.6
- more-itertools~=9.0
- networkx~=3.0
- numpy~=1.24
- pandas~=1.5
- pendulum~=2.1
- python-dateutil~=2.8
- pyyaml~=6.0
- scipy~=1.10
- sympy~=1.11
- toml~=0.10
- typing-extensions~=4.4
- tzdata~=2022.7
- yarl~=1.8
volumes:
- name: snekbox-user-base-volume
hostPath:
diff --git a/requirements/eval-deps.pip b/requirements/eval-deps.pip
new file mode 100644
index 0000000..79f8218
--- /dev/null
+++ b/requirements/eval-deps.pip
@@ -0,0 +1,24 @@
+anyio[trio]~=3.6
+arrow~=1.2
+attrs~=22.2
+beautifulsoup4~=4.11
+einspect~=0.5
+fishhook~=0.2
+forbiddenfruit~=0.1
+fuzzywuzzy~=0.18
+lark~=1.1
+matplotlib~=3.6 ; python_version == '3.11'
+more-itertools~=9.0
+networkx~=3.0
+numpy~=1.24 ; python_version == '3.11'
+numpy==1.26.0b1 ; python_version == '3.12'
+pandas~=1.5 ; python_version == '3.11'
+pendulum~=2.1 ; python_version == '3.11'
+python-dateutil~=2.8
+pyyaml~=6.0
+scipy~=1.10
+sympy~=1.11
+toml~=0.10
+typing-extensions~=4.4
+tzdata~=2022.7
+yarl~=1.8 ; python_version == '3.11'
diff --git a/requirements/pip-tools.in b/requirements/pip-tools.in
index e459df9..29d8d31 100644
--- a/requirements/pip-tools.in
+++ b/requirements/pip-tools.in
@@ -2,5 +2,5 @@
-c lint.pip
-c requirements.pip
-# Minimum version which supports pip>=22.1
-pip-tools>=6.6.1
+# Minimum version which supports pip>=23.2
+pip-tools>=7.0.0
diff --git a/requirements/pip-tools.pip b/requirements/pip-tools.pip
index d87f3d6..4793c2d 100644
--- a/requirements/pip-tools.pip
+++ b/requirements/pip-tools.pip
@@ -10,7 +10,7 @@ click==8.1.3
# via pip-tools
packaging==23.0
# via build
-pip-tools==6.12.3
+pip-tools==7.3.0
# via -r requirements/pip-tools.in
pyproject-hooks==1.0.0
# via build
diff --git a/scripts/build_python.sh b/scripts/build_python.sh
new file mode 100755
index 0000000..da937c2
--- /dev/null
+++ b/scripts/build_python.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+set -euxo pipefail
+shopt -s inherit_errexit
+
+py_version="${1}"
+
+# Install Python interpreter under e.g. /lang/python/3.11/ (no patch version).
+"${PYENV_ROOT}/plugins/python-build/bin/python-build" \
+ "${py_version}" \
+ "/lang/python/${py_version%.*}"
+"/lang/python/${py_version%.*}/bin/python" -m pip install -U pip
+
+# Clean up some unnecessary files to reduce image size bloat.
+find /lang/python/ -depth \
+\( \
+ \( -type d -a \( \
+ -name test -o -name tests -o -name idle_test \
+ \) \) \
+ -o \( -type f -a \( \
+ -name '*.pyc' -o -name '*.pyo' -o -name 'libpython*.a' \
+ \) \) \
+\) -exec rm -rf '{}' +
diff --git a/snekbox/memfs.py b/snekbox/memfs.py
index 991766b..40b57c4 100644
--- a/snekbox/memfs.py
+++ b/snekbox/memfs.py
@@ -144,6 +144,7 @@ class MemFS:
"""
start_time = time.monotonic()
count = 0
+ total_size = 0
files = glob.iglob(pattern, root_dir=str(self.output), recursive=True, include_hidden=False)
for file in (Path(self.output, f) for f in files):
if timeout and (time.monotonic() - start_time) > timeout:
@@ -152,10 +153,15 @@ class MemFS:
if not file.is_file():
continue
+ # file.is_file allows file to be a regular file OR a symlink pointing to a regular file.
+ # It is important that we follow symlinks here, so when we check st_size later it is the
+ # size of the underlying file rather than of the symlink.
+ stat = file.stat(follow_symlinks=True)
+
if exclude_files and (orig_time := exclude_files.get(file)):
- new_time = file.stat().st_mtime
+ new_time = stat.st_mtime
log.info(f"Checking {file.name} ({orig_time=}, {new_time=})")
- if file.stat().st_mtime == orig_time:
+ if stat.st_mtime == orig_time:
log.info(f"Skipping {file.name!r} as it has not been modified")
continue
@@ -163,6 +169,14 @@ class MemFS:
log.info(f"Max attachments {limit} reached, skipping remaining files")
break
+ # Due to sparse files and links the total size could end up being greater
+ # than the size limit of the tmpfs. Limit the total size to be read to
+ # prevent high memory usage / OOM when reading files.
+ total_size += stat.st_size
+ if total_size > self.instance_size:
+ log.info(f"Max file size {self.instance_size} reached, skipping remaining files")
+ break
+
count += 1
log.info(f"Found valid file for upload {file.name!r}")
yield FileAttachment.from_path(file, relative_to=self.output)
diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py
index f64830a..1de7b1e 100644
--- a/snekbox/nsjail.py
+++ b/snekbox/nsjail.py
@@ -221,9 +221,9 @@ class NsJail:
*nsjail_args,
"--",
self.config.exec_bin.path,
- *self.config.exec_bin.arg,
- # Filter out empty strings at start of py_args
+ # Filter out empty strings at start of Python args
# (causes issues with python cli)
+ *iter_lstrip(self.config.exec_bin.arg),
*iter_lstrip(py_args),
]
diff --git a/tests/test_nsjail.py b/tests/test_nsjail.py
index 61d0e8b..fe55290 100644
--- a/tests/test_nsjail.py
+++ b/tests/test_nsjail.py
@@ -82,7 +82,7 @@ class NsJailTests(unittest.TestCase):
for _ in range({max_pids}):
print(subprocess.Popen(
[
- '/usr/local/bin/python3',
+ '/lang/python/default/bin/python',
'-c',
'import time; time.sleep(1)'
],
@@ -240,8 +240,9 @@ class NsJailTests(unittest.TestCase):
os.symlink("file", f"file{i}")
"""
).strip()
-
- nsjail = NsJail(memfs_instance_size=32 * Size.MiB, files_timeout=1)
+ # A value higher than the actual memory needed is used to avoid the limit
+ # on total file size being reached before the timeout when reading.
+ nsjail = NsJail(memfs_instance_size=512 * Size.MiB, files_timeout=1)
result = nsjail.python3(["-c", code])
self.assertEqual(result.returncode, None)
self.assertEqual(
@@ -272,6 +273,60 @@ class NsJailTests(unittest.TestCase):
)
self.assertEqual(result.stderr, None)
+ def test_file_parsing_size_limit_sparse_files(self):
+ tmpfs_size = 8 * Size.MiB
+ code = dedent(
+ f"""
+ import os
+ with open("test.txt", "w") as f:
+ os.truncate(f.fileno(), {tmpfs_size // 2 + 1})
+
+ with open("test2.txt", "w") as f:
+ os.truncate(f.fileno(), {tmpfs_size // 2 + 1})
+ """
+ )
+ nsjail = NsJail(memfs_instance_size=tmpfs_size, files_timeout=5)
+ result = nsjail.python3(["-c", code])
+ self.assertEqual(result.returncode, 0)
+ self.assertEqual(len(result.files), 1)
+
+ def test_file_parsing_size_limit_sparse_files_large(self):
+ tmpfs_size = 8 * Size.MiB
+ code = dedent(
+ f"""
+ import os
+ with open("test.txt", "w") as f:
+ # Use a very large value to ensure the test fails if the
+ # file is read even if would have been discarded later.
+ os.truncate(f.fileno(), {1024 * Size.TiB})
+ """
+ )
+ nsjail = NsJail(memfs_instance_size=tmpfs_size, files_timeout=5)
+ result = nsjail.python3(["-c", code])
+ self.assertEqual(result.returncode, 0)
+ self.assertEqual(len(result.files), 0)
+
+ def test_file_parsing_size_limit_symlinks(self):
+ tmpfs_size = 8 * Size.MiB
+ code = dedent(
+ f"""
+ import os
+ data = "a" * 1024
+ size = {tmpfs_size // 8}
+
+ with open("file", "w") as f:
+ for _ in range(size // 1024):
+ f.write(data)
+
+ for i in range(20):
+ os.symlink("file", f"file{{i}}")
+ """
+ )
+ nsjail = NsJail(memfs_instance_size=tmpfs_size, files_timeout=5)
+ result = nsjail.python3(["-c", code])
+ self.assertEqual(result.returncode, 0)
+ self.assertEqual(len(result.files), 8)
+
def test_file_write_error(self):
"""Test errors during file write."""
result = self.nsjail.python3(
@@ -478,7 +533,7 @@ class NsJailTests(unittest.TestCase):
for args, expected in cases:
with self.subTest(args=args):
result = self.nsjail.python3(py_args=args)
- idx = result.args.index("-BSqu")
+ idx = result.args.index(self.nsjail.config.exec_bin.path)
self.assertEqual(result.args[idx + 1 :], expected)
self.assertEqual(result.returncode, 0)