diff options
author | 2021-04-06 16:07:48 -0700 | |
---|---|---|
committer | 2021-04-06 16:16:47 -0700 | |
commit | 3072fffec237b05bba469c346918e1dcd83e8521 (patch) | |
tree | be101bdb6dd1f227940e71dc68879f979bc18506 | |
parent | Bump urllib3 from 1.26.3 to 1.26.4 (diff) |
Use PYTHONIOENCODING to enable utf-8 stdout for the nsjail pipe, and handle the potential case where this is bypassable
Since snekbox does not run with a tty, stdout is technically raw bytes, and thus incomplete surrogate pairs can be printed without the client application erroring, and instead fail within _consume_stdout when we attempt to decode it to a str.
This commit sets the PYTHONIOENCODING environment variable to inform python to open the pipe in utf-8 mode.
However, clever use of execl and os.unsetenv() can unset this environment variable, so we add a safety check to _consume_stdout to fail out of parsing output if it contains invalid unicode. This should only happen in deliberate cases, or significant bugs in python or a c library where output is printed to stdout ignoring the python stdout encoding.
-rw-r--r-- | config/snekbox.cfg | 1 | ||||
-rw-r--r-- | snekbox/nsjail.py | 10 | ||||
-rw-r--r-- | tests/test_nsjail.py | 21 |
3 files changed, 31 insertions, 1 deletions
diff --git a/config/snekbox.cfg b/config/snekbox.cfg index 257b5ca..73e36e1 100644 --- a/config/snekbox.cfg +++ b/config/snekbox.cfg @@ -15,6 +15,7 @@ envar: "MKL_NUM_THREADS=1" envar: "VECLIB_MAXIMUM_THREADS=1" envar: "NUMEXPR_NUM_THREADS=1" envar: "PYTHONPATH=/snekbox/user_base/lib/python3.9/site-packages" +envar: "PYTHONIOENCODING=utf-8:strict" keep_caps: false diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py index 814b46c..a182406 100644 --- a/snekbox/nsjail.py +++ b/snekbox/nsjail.py @@ -208,7 +208,15 @@ class NsJail: except ValueError: return CompletedProcess(args, None, "ValueError: embedded null byte", None) - output = self._consume_stdout(nsjail) + try: + output = self._consume_stdout(nsjail) + except UnicodeDecodeError: + return CompletedProcess( + args, + None, + "UnicodeDecodeError: invalid unicode in output pipe", + None, + ) # When you send signal `N` to a subprocess to terminate it using Popen, it # will return `-N` as its exit code. As we normally get `N + 128` back, we diff --git a/tests/test_nsjail.py b/tests/test_nsjail.py index 4d4676b..cab9344 100644 --- a/tests/test_nsjail.py +++ b/tests/test_nsjail.py @@ -100,6 +100,27 @@ class NsJailTests(unittest.TestCase): self.assertEqual(result.stdout, "ValueError: embedded null byte") self.assertEqual(result.stderr, None) + def test_print_bad_unicode_encode_error(self): + result = self.nsjail.python3("print(chr(56550))") + self.assertEqual(result.returncode, 1) + unicode_traceback = ( + "Traceback (most recent call last):\n" + ' File "<string>", line 1, in <module>\n' + "UnicodeEncodeError: 'utf-8' codec can't encode character '\\udce6'" + " in position 0: surrogates not allowed\n" + ) + self.assertEqual(result.stdout, unicode_traceback) + self.assertEqual(result.stderr, None) + + def test_unicode_env_erase_escape_fails(self): + result = self.nsjail.python3( + "import os, sys\nos.unsetenv('PYTHONIOENCODING')\n" + "os.execl(sys.executable, 'python', '-c', 'print(chr(56550))')" + ) + self.assertEqual(result.returncode, None) + self.assertEqual(result.stdout, "UnicodeDecodeError: invalid unicode in output pipe") + self.assertEqual(result.stderr, None) + @unittest.mock.patch("snekbox.nsjail.DEBUG", new=False) def test_log_parser(self): log_lines = ( |