From 0db60fc25bb48d2c139b91e78b7bcfe7b98475b4 Mon Sep 17 00:00:00 2001 From: Mark <1515135+MarkKoz@users.noreply.github.com> Date: Sat, 19 Aug 2023 16:10:59 -0700 Subject: Install multiple Python versions in image Separate snekbox's Python interpreter from the interpreter used by NsJail. This allows for the interpreters to be updated on different cadences and provides better isolation of packages. Each Python interpreter adds about 70 MB to the built image. --- tests/test_nsjail.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/tests/test_nsjail.py b/tests/test_nsjail.py index c701d3a..25c8354 100644 --- a/tests/test_nsjail.py +++ b/tests/test_nsjail.py @@ -79,7 +79,7 @@ class NsJailTests(unittest.TestCase): for _ in range({max_pids}): print(subprocess.Popen( [ - '/usr/local/bin/python3', + '/lang/python/default/bin/python', '-c', 'import time; time.sleep(1)' ], @@ -431,7 +431,7 @@ class NsJailTests(unittest.TestCase): for args, expected in cases: with self.subTest(args=args): result = self.nsjail.python3(py_args=args) - idx = result.args.index("-BSqu") + idx = result.args.index(self.nsjail.config.exec_bin.path) self.assertEqual(result.args[idx + 1 :], expected) self.assertEqual(result.returncode, 0) -- cgit v1.2.3 From 16b1a13e206fa34bfc7af05363d5e78742e26e40 Mon Sep 17 00:00:00 2001 From: wookie184 Date: Tue, 29 Aug 2023 14:57:50 +0000 Subject: Limit total file size read from tmpfs to avoid high memory usage --- snekbox/memfs.py | 18 ++++++++++++++-- tests/test_nsjail.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 4 deletions(-) (limited to 'tests') diff --git a/snekbox/memfs.py b/snekbox/memfs.py index 991766b..40b57c4 100644 --- a/snekbox/memfs.py +++ b/snekbox/memfs.py @@ -144,6 +144,7 @@ class MemFS: """ start_time = time.monotonic() count = 0 + total_size = 0 files = glob.iglob(pattern, root_dir=str(self.output), recursive=True, include_hidden=False) for file in (Path(self.output, f) for f in files): if timeout and (time.monotonic() - start_time) > timeout: @@ -152,10 +153,15 @@ class MemFS: if not file.is_file(): continue + # file.is_file allows file to be a regular file OR a symlink pointing to a regular file. + # It is important that we follow symlinks here, so when we check st_size later it is the + # size of the underlying file rather than of the symlink. + stat = file.stat(follow_symlinks=True) + if exclude_files and (orig_time := exclude_files.get(file)): - new_time = file.stat().st_mtime + new_time = stat.st_mtime log.info(f"Checking {file.name} ({orig_time=}, {new_time=})") - if file.stat().st_mtime == orig_time: + if stat.st_mtime == orig_time: log.info(f"Skipping {file.name!r} as it has not been modified") continue @@ -163,6 +169,14 @@ class MemFS: log.info(f"Max attachments {limit} reached, skipping remaining files") break + # Due to sparse files and links the total size could end up being greater + # than the size limit of the tmpfs. Limit the total size to be read to + # prevent high memory usage / OOM when reading files. + total_size += stat.st_size + if total_size > self.instance_size: + log.info(f"Max file size {self.instance_size} reached, skipping remaining files") + break + count += 1 log.info(f"Found valid file for upload {file.name!r}") yield FileAttachment.from_path(file, relative_to=self.output) diff --git a/tests/test_nsjail.py b/tests/test_nsjail.py index c701d3a..5b06534 100644 --- a/tests/test_nsjail.py +++ b/tests/test_nsjail.py @@ -218,8 +218,9 @@ class NsJailTests(unittest.TestCase): os.symlink("file", f"file{i}") """ ).strip() - - nsjail = NsJail(memfs_instance_size=32 * Size.MiB, files_timeout=1) + # A value higher than the actual memory needed is used to avoid the limit + # on total file size being reached before the timeout when reading. + nsjail = NsJail(memfs_instance_size=512 * Size.MiB, files_timeout=1) result = nsjail.python3(["-c", code]) self.assertEqual(result.returncode, None) self.assertEqual( @@ -250,6 +251,60 @@ class NsJailTests(unittest.TestCase): ) self.assertEqual(result.stderr, None) + def test_file_parsing_size_limit_sparse_files(self): + tmpfs_size = 8 * Size.MiB + code = dedent( + f""" + import os + with open("test.txt", "w") as f: + os.truncate(f.fileno(), {tmpfs_size // 2 + 1}) + + with open("test2.txt", "w") as f: + os.truncate(f.fileno(), {tmpfs_size // 2 + 1}) + """ + ) + nsjail = NsJail(memfs_instance_size=tmpfs_size, files_timeout=5) + result = nsjail.python3(["-c", code]) + self.assertEqual(result.returncode, 0) + self.assertEqual(len(result.files), 1) + + def test_file_parsing_size_limit_sparse_files_large(self): + tmpfs_size = 8 * Size.MiB + code = dedent( + f""" + import os + with open("test.txt", "w") as f: + # Use a very large value to ensure the test fails if the + # file is read even if would have been discarded later. + os.truncate(f.fileno(), {1024 * Size.TiB}) + """ + ) + nsjail = NsJail(memfs_instance_size=tmpfs_size, files_timeout=5) + result = nsjail.python3(["-c", code]) + self.assertEqual(result.returncode, 0) + self.assertEqual(len(result.files), 0) + + def test_file_parsing_size_limit_symlinks(self): + tmpfs_size = 8 * Size.MiB + code = dedent( + f""" + import os + data = "a" * 1024 + size = {tmpfs_size // 8} + + with open("file", "w") as f: + for _ in range(size // 1024): + f.write(data) + + for i in range(20): + os.symlink("file", f"file{{i}}") + """ + ) + nsjail = NsJail(memfs_instance_size=tmpfs_size, files_timeout=5) + result = nsjail.python3(["-c", code]) + self.assertEqual(result.returncode, 0) + self.assertEqual(len(result.files), 8) + def test_file_write_error(self): """Test errors during file write.""" result = self.nsjail.python3( -- cgit v1.2.3