aboutsummaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorGravatar Bast <[email protected]>2021-04-06 16:07:48 -0700
committerGravatar Bast <[email protected]>2021-04-06 16:16:47 -0700
commit3072fffec237b05bba469c346918e1dcd83e8521 (patch)
treebe101bdb6dd1f227940e71dc68879f979bc18506 /tests
parentBump urllib3 from 1.26.3 to 1.26.4 (diff)
Use PYTHONIOENCODING to enable utf-8 stdout for the nsjail pipe, and handle the potential case where this is bypassable
Since snekbox does not run with a tty, stdout is technically raw bytes, and thus incomplete surrogate pairs can be printed without the client application erroring, and instead fail within _consume_stdout when we attempt to decode it to a str. This commit sets the PYTHONIOENCODING environment variable to inform python to open the pipe in utf-8 mode. However, clever use of execl and os.unsetenv() can unset this environment variable, so we add a safety check to _consume_stdout to fail out of parsing output if it contains invalid unicode. This should only happen in deliberate cases, or significant bugs in python or a c library where output is printed to stdout ignoring the python stdout encoding.
Diffstat (limited to 'tests')
-rw-r--r--tests/test_nsjail.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/tests/test_nsjail.py b/tests/test_nsjail.py
index 4d4676b..cab9344 100644
--- a/tests/test_nsjail.py
+++ b/tests/test_nsjail.py
@@ -100,6 +100,27 @@ class NsJailTests(unittest.TestCase):
self.assertEqual(result.stdout, "ValueError: embedded null byte")
self.assertEqual(result.stderr, None)
+ def test_print_bad_unicode_encode_error(self):
+ result = self.nsjail.python3("print(chr(56550))")
+ self.assertEqual(result.returncode, 1)
+ unicode_traceback = (
+ "Traceback (most recent call last):\n"
+ ' File "<string>", line 1, in <module>\n'
+ "UnicodeEncodeError: 'utf-8' codec can't encode character '\\udce6'"
+ " in position 0: surrogates not allowed\n"
+ )
+ self.assertEqual(result.stdout, unicode_traceback)
+ self.assertEqual(result.stderr, None)
+
+ def test_unicode_env_erase_escape_fails(self):
+ result = self.nsjail.python3(
+ "import os, sys\nos.unsetenv('PYTHONIOENCODING')\n"
+ "os.execl(sys.executable, 'python', '-c', 'print(chr(56550))')"
+ )
+ self.assertEqual(result.returncode, None)
+ self.assertEqual(result.stdout, "UnicodeDecodeError: invalid unicode in output pipe")
+ self.assertEqual(result.stderr, None)
+
@unittest.mock.patch("snekbox.nsjail.DEBUG", new=False)
def test_log_parser(self):
log_lines = (