From 3072fffec237b05bba469c346918e1dcd83e8521 Mon Sep 17 00:00:00 2001 From: Bast Date: Tue, 6 Apr 2021 16:07:48 -0700 Subject: Use PYTHONIOENCODING to enable utf-8 stdout for the nsjail pipe, and handle the potential case where this is bypassable Since snekbox does not run with a tty, stdout is technically raw bytes, and thus incomplete surrogate pairs can be printed without the client application erroring, and instead fail within _consume_stdout when we attempt to decode it to a str. This commit sets the PYTHONIOENCODING environment variable to inform python to open the pipe in utf-8 mode. However, clever use of execl and os.unsetenv() can unset this environment variable, so we add a safety check to _consume_stdout to fail out of parsing output if it contains invalid unicode. This should only happen in deliberate cases, or significant bugs in python or a c library where output is printed to stdout ignoring the python stdout encoding. --- tests/test_nsjail.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'tests') diff --git a/tests/test_nsjail.py b/tests/test_nsjail.py index 4d4676b..cab9344 100644 --- a/tests/test_nsjail.py +++ b/tests/test_nsjail.py @@ -100,6 +100,27 @@ class NsJailTests(unittest.TestCase): self.assertEqual(result.stdout, "ValueError: embedded null byte") self.assertEqual(result.stderr, None) + def test_print_bad_unicode_encode_error(self): + result = self.nsjail.python3("print(chr(56550))") + self.assertEqual(result.returncode, 1) + unicode_traceback = ( + "Traceback (most recent call last):\n" + ' File "", line 1, in \n' + "UnicodeEncodeError: 'utf-8' codec can't encode character '\\udce6'" + " in position 0: surrogates not allowed\n" + ) + self.assertEqual(result.stdout, unicode_traceback) + self.assertEqual(result.stderr, None) + + def test_unicode_env_erase_escape_fails(self): + result = self.nsjail.python3( + "import os, sys\nos.unsetenv('PYTHONIOENCODING')\n" + "os.execl(sys.executable, 'python', '-c', 'print(chr(56550))')" + ) + self.assertEqual(result.returncode, None) + self.assertEqual(result.stdout, "UnicodeDecodeError: invalid unicode in output pipe") + self.assertEqual(result.stderr, None) + @unittest.mock.patch("snekbox.nsjail.DEBUG", new=False) def test_log_parser(self): log_lines = ( -- cgit v1.2.3 From cf1cb44582ac165acf3113587a9ead2e4f964c26 Mon Sep 17 00:00:00 2001 From: Bast Date: Thu, 8 Apr 2021 14:31:26 -0700 Subject: Match new unicode eval tests and output to the format and functions of others --- snekbox/nsjail.py | 2 +- tests/test_nsjail.py | 20 ++++++++------------ 2 files changed, 9 insertions(+), 13 deletions(-) (limited to 'tests') diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py index a182406..9367cb2 100644 --- a/snekbox/nsjail.py +++ b/snekbox/nsjail.py @@ -214,7 +214,7 @@ class NsJail: return CompletedProcess( args, None, - "UnicodeDecodeError: invalid unicode in output pipe", + "UnicodeDecodeError: invalid Unicode in output pipe", None, ) diff --git a/tests/test_nsjail.py b/tests/test_nsjail.py index cab9344..46193b2 100644 --- a/tests/test_nsjail.py +++ b/tests/test_nsjail.py @@ -103,22 +103,18 @@ class NsJailTests(unittest.TestCase): def test_print_bad_unicode_encode_error(self): result = self.nsjail.python3("print(chr(56550))") self.assertEqual(result.returncode, 1) - unicode_traceback = ( - "Traceback (most recent call last):\n" - ' File "", line 1, in \n' - "UnicodeEncodeError: 'utf-8' codec can't encode character '\\udce6'" - " in position 0: surrogates not allowed\n" - ) - self.assertEqual(result.stdout, unicode_traceback) + self.assertIn("UnicodeEncodeError", result.stdout) self.assertEqual(result.stderr, None) def test_unicode_env_erase_escape_fails(self): - result = self.nsjail.python3( - "import os, sys\nos.unsetenv('PYTHONIOENCODING')\n" - "os.execl(sys.executable, 'python', '-c', 'print(chr(56550))')" - ) + result = self.nsjail.python3(dedent(""" + import os + import sys + os.unsetenv('PYTHONIOENCODING') + os.execl(sys.executable, 'python', '-c', 'print(chr(56550))') + """).strip()) self.assertEqual(result.returncode, None) - self.assertEqual(result.stdout, "UnicodeDecodeError: invalid unicode in output pipe") + self.assertEqual(result.stdout, "UnicodeDecodeError: invalid Unicode in output pipe") self.assertEqual(result.stderr, None) @unittest.mock.patch("snekbox.nsjail.DEBUG", new=False) -- cgit v1.2.3