From 8d9a0029bd3c8a8629fbf8db3903b412bef538e1 Mon Sep 17 00:00:00 2001
From: MarkKoz <KozlovMark@gmail.com>
Date: Wed, 29 May 2019 03:12:47 -0700
Subject: Add API tests for eval resource

---
 tests/api/test_eval.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 tests/api/test_eval.py

(limited to 'tests/api/test_eval.py')

diff --git a/tests/api/test_eval.py b/tests/api/test_eval.py
new file mode 100644
index 0000000..a5b83fd
--- /dev/null
+++ b/tests/api/test_eval.py
@@ -0,0 +1,49 @@
+from tests.api import SnekAPITestCase
+
+
+class TestEvalResource(SnekAPITestCase):
+    PATH = "/eval"
+
+    def test_post_valid_200(self):
+        body = {"input": "foo"}
+        result = self.simulate_post(self.PATH, json=body)
+
+        self.assertEqual(result.status_code, 200)
+        self.assertEqual(body["input"], result.json["input"])
+        self.assertEqual("test output", result.json["output"])
+
+    def test_post_invalid_schema_400(self):
+        body = {"stuff": "foo"}
+        result = self.simulate_post(self.PATH, json=body)
+
+        self.assertEqual(result.status_code, 400)
+
+        expected = {
+            "title": "Request data failed validation",
+            "description": "'input' is a required property"
+        }
+
+        self.assertEqual(expected, result.json)
+
+    def test_post_invalid_content_type_415(self):
+        body = "{\"input\": \"foo\"}"
+        headers = {"Content-Type": "application/xml"}
+        result = self.simulate_post(self.PATH, body=body, headers=headers)
+
+        self.assertEqual(result.status_code, 415)
+
+        expected = {
+            "title": "Unsupported media type",
+            "description": "application/xml is an unsupported media type."
+        }
+
+        self.assertEqual(expected, result.json)
+
+    def test_disallowed_method_405(self):
+        result = self.simulate_get(self.PATH)
+        self.assertEqual(result.status_code, 405)
+
+    def test_options_allow_post_only(self):
+        result = self.simulate_options(self.PATH)
+        self.assertEqual(result.status_code, 200)
+        self.assertEqual(result.headers.get("Allow"), "POST")
-- 
cgit v1.2.3


From 7916804de8176fa34e4dfc56c0543157e72985f1 Mon Sep 17 00:00:00 2001
From: MarkKoz <KozlovMark@gmail.com>
Date: Tue, 4 Jun 2019 22:29:08 -0700
Subject: Add logging for NsJail

NsJail's is configured to log to a temporary file rather than stderr.
The contents of the file are parsed using regex after the process exits.
When not debugging, some blacklisted messages and most info-level
messages are skipped.

* Add a snekbox logger
* Log the Python code being executed if debugging
* Use nested single quotes in a test to fix a linter error
---
 snekbox/__init__.py    |  10 +++++
 snekbox/nsjail.py      | 106 ++++++++++++++++++++++++++++++++++++-------------
 tests/api/test_eval.py |   2 +-
 3 files changed, 90 insertions(+), 28 deletions(-)

(limited to 'tests/api/test_eval.py')

diff --git a/snekbox/__init__.py b/snekbox/__init__.py
index af8429b..a48abd5 100644
--- a/snekbox/__init__.py
+++ b/snekbox/__init__.py
@@ -1,5 +1,6 @@
 import logging
 import os
+import sys
 
 from gunicorn import glogging
 
@@ -22,3 +23,12 @@ class GunicornLogger(glogging.Logger):
             self.loglevel = self.LOG_LEVELS.get(cfg.loglevel.lower(), logging.INFO)
 
         self.error_log.setLevel(self.loglevel)
+
+
+log = logging.getLogger("snekbox")
+log.setLevel(logging.DEBUG if DEBUG else logging.INFO)
+log.propagate = True
+formatter = logging.Formatter(GunicornLogger.error_fmt, GunicornLogger.datefmt)
+handler = logging.StreamHandler(sys.stdout)
+handler.setFormatter(formatter)
+log.addHandler(handler)
diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py
index 2484ba2..0ebfe0c 100644
--- a/snekbox/nsjail.py
+++ b/snekbox/nsjail.py
@@ -1,6 +1,20 @@
+import logging
+import re
 import subprocess
 import sys
+import textwrap
 from pathlib import Path
+from tempfile import NamedTemporaryFile
+
+from snekbox import DEBUG
+
+log = logging.getLogger(__name__)
+
+# [level][timestamp][PID]? function_signature:line_no? message
+LOG_PATTERN = re.compile(
+    r"\[(?P<level>(I)|[WEF])\]\[.+?\](?(2)|(?P<func>\[\d+\] .+?:\d+ )) ?(?P<msg>.+)"
+)
+LOG_BLACKLIST = ("Process will be ",)
 
 # Explicitly define constants for NsJail's default values.
 CGROUP_PIDS_PARENT = Path("/sys/fs/cgroup/pids/NSJAIL")
@@ -56,39 +70,77 @@ class NsJail:
         pids.mkdir(parents=True, exist_ok=True)
         mem.mkdir(parents=True, exist_ok=True)
 
+    @staticmethod
+    def _parse_log(log_file):
+        """Parse and log NsJail's log messages."""
+        for line in log_file.read().decode("UTF-8").splitlines():
+            match = LOG_PATTERN.fullmatch(line)
+            if match is None:
+                log.warning(f"Failed to parse log line '{line}'")
+                continue
+
+            msg = match["msg"]
+            if not DEBUG and any(msg.startswith(s) for s in LOG_BLACKLIST):
+                # Skip blacklisted messages if not debugging.
+                continue
+
+            if DEBUG and match["func"]:
+                # Prepend PID, function signature, and line number if debugging.
+                msg = f"{match['func']}{msg}"
+
+            if match["level"] == "D":
+                log.debug(msg)
+            elif match["level"] == "I":
+                if DEBUG or msg.startswith("pid="):
+                    # Skip messages unrelated to process exit if not debugging.
+                    log.info(msg)
+            elif match["level"] == "W":
+                log.warning(msg)
+            else:
+                # Treat fatal as error.
+                log.error(msg)
+
     def python3(self, code: str) -> str:
         """Execute Python 3 code in an isolated environment and return stdout or an error."""
-        args = (
-            self.nsjail_binary, "-Mo",
-            "--rlimit_as", "700",
-            "--chroot", "/",
-            "-E", "LANG=en_US.UTF-8",
-            "-R/usr", "-R/lib", "-R/lib64",
-            "--user", "nobody",
-            "--group", "nogroup",
-            "--time_limit", "2",
-            "--disable_proc",
-            "--iface_no_lo",
-            "--cgroup_mem_max=52428800",
-            "--cgroup_mem_mount", str(CGROUP_MEMORY_PARENT.parent),
-            "--cgroup_mem_parent", CGROUP_MEMORY_PARENT.name,
-            "--cgroup_pids_max=1",
-            "--cgroup_pids_mount", str(CGROUP_PIDS_PARENT.parent),
-            "--cgroup_pids_parent", CGROUP_PIDS_PARENT.name,
-            "--quiet", "--",
-            self.python_binary, "-ISq", "-c", code
-        )
-
-        try:
-            proc = subprocess.run(args, capture_output=True, env=ENV, text=True)
-        except ValueError:
-            return "ValueError: embedded null byte"
+        with NamedTemporaryFile() as nsj_log:
+            args = (
+                self.nsjail_binary, "-Mo",
+                "--rlimit_as", "700",
+                "--chroot", "/",
+                "-E", "LANG=en_US.UTF-8",
+                "-R/usr", "-R/lib", "-R/lib64",
+                "--user", "nobody",
+                "--group", "nogroup",
+                "--time_limit", "2",
+                "--disable_proc",
+                "--iface_no_lo",
+                "--log", nsj_log.name,
+                "--cgroup_mem_max=52428800",
+                "--cgroup_mem_mount", str(CGROUP_MEMORY_PARENT.parent),
+                "--cgroup_mem_parent", CGROUP_MEMORY_PARENT.name,
+                "--cgroup_pids_max=1",
+                "--cgroup_pids_mount", str(CGROUP_PIDS_PARENT.parent),
+                "--cgroup_pids_parent", CGROUP_PIDS_PARENT.name,
+                "--",
+                self.python_binary, "-ISq", "-c", code
+            )
+
+            try:
+                msg = "Executing code..."
+                if DEBUG:
+                    msg = f"{msg[:-3]}:\n{textwrap.indent(code, '    ')}"
+                log.info(msg)
+
+                proc = subprocess.run(args, capture_output=True, env=ENV, text=True)
+            except ValueError:
+                return "ValueError: embedded null byte"
+
+            self._parse_log(nsj_log)
 
         if proc.returncode == 0:
             output = proc.stdout
         elif proc.returncode == 1:
-            filtered = (line for line in proc.stderr.split("\n") if not line.startswith("["))
-            output = "\n".join(filtered)
+            output = proc.stderr
         elif proc.returncode == 109:
             return "timed out or memory limit exceeded"
         elif proc.returncode == 255:
diff --git a/tests/api/test_eval.py b/tests/api/test_eval.py
index a5b83fd..bcd0ec4 100644
--- a/tests/api/test_eval.py
+++ b/tests/api/test_eval.py
@@ -26,7 +26,7 @@ class TestEvalResource(SnekAPITestCase):
         self.assertEqual(expected, result.json)
 
     def test_post_invalid_content_type_415(self):
-        body = "{\"input\": \"foo\"}"
+        body = "{'input': 'foo'}"
         headers = {"Content-Type": "application/xml"}
         result = self.simulate_post(self.PATH, body=body, headers=headers)
 
-- 
cgit v1.2.3


From 66d3836dd27f3b0e9f1cb780c7c37c8c0f081c70 Mon Sep 17 00:00:00 2001
From: MarkKoz <KozlovMark@gmail.com>
Date: Wed, 5 Jun 2019 00:46:04 -0700
Subject: Respond to eval with stdout, stderr, and the return code

The previous implementation limited the client's flexibility in
presenting the results of the process. A process can write to both
stdout and stderr and do so even when the return code is not 0 or 1.

* Return a CompletedProcess from NsJail
* Don't check the return code; this should be done client-side now
---
 snekbox/api/resources/eval.py | 23 +++++++++++++++++------
 snekbox/nsjail.py             | 33 ++++++++++-----------------------
 tests/api/__init__.py         |  8 +++++++-
 tests/api/test_eval.py        |  5 +++--
 4 files changed, 37 insertions(+), 32 deletions(-)

(limited to 'tests/api/test_eval.py')

diff --git a/snekbox/api/resources/eval.py b/snekbox/api/resources/eval.py
index b2f4260..4779557 100644
--- a/snekbox/api/resources/eval.py
+++ b/snekbox/api/resources/eval.py
@@ -36,7 +36,16 @@ class EvalResource:
     @validate(REQ_SCHEMA)
     def on_post(self, req, resp):
         """
-        Evaluate Python code and return the result.
+        Evaluate Python code and return stdout, stderr, and the return code.
+
+        The return codes mostly resemble those of a Unix shell. Some noteworthy cases:
+
+        - None
+            The NsJail process failed to launch
+        - 137 (SIGKILL)
+            Typically because NsJail killed the Python process due to time or memory constraints
+        - 255
+            NsJail encountered a fatal error
 
         Request body:
 
@@ -47,8 +56,9 @@ class EvalResource:
         Response format:
 
         >>> {
-        ...     "input": "print(1 + 1)",
-        ...     "output": "2\\n"
+        ...     "stdout": "2\\n",
+        ...     "stderr": "",
+        ...     "returncode": 0
         ... }
 
         Status codes:
@@ -63,12 +73,13 @@ class EvalResource:
         code = req.media["input"]
 
         try:
-            output = self.nsjail.python3(code)
+            result = self.nsjail.python3(code)
         except Exception:
             log.exception("An exception occurred while trying to process the request")
             raise falcon.HTTPInternalServerError
 
         resp.media = {
-            "input": code,
-            "output": output
+            "stdout": result.stdout,
+            "stderr": result.stderr,
+            "returncode": result.returncode
         }
diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py
index 0ebfe0c..ff12ec4 100644
--- a/snekbox/nsjail.py
+++ b/snekbox/nsjail.py
@@ -100,8 +100,8 @@ class NsJail:
                 # Treat fatal as error.
                 log.error(msg)
 
-    def python3(self, code: str) -> str:
-        """Execute Python 3 code in an isolated environment and return stdout or an error."""
+    def python3(self, code: str) -> subprocess.CompletedProcess:
+        """Execute Python 3 code in an isolated environment and return the completed process."""
         with NamedTemporaryFile() as nsj_log:
             args = (
                 self.nsjail_binary, "-Mo",
@@ -125,29 +125,16 @@ class NsJail:
                 self.python_binary, "-ISq", "-c", code
             )
 
-            try:
-                msg = "Executing code..."
-                if DEBUG:
-                    msg = f"{msg[:-3]}:\n{textwrap.indent(code, '    ')}"
-                log.info(msg)
+            msg = "Executing code..."
+            if DEBUG:
+                msg = f"{msg[:-3]}:\n{textwrap.indent(code, '    ')}"
+            log.info(msg)
 
-                proc = subprocess.run(args, capture_output=True, env=ENV, text=True)
+            try:
+                result = subprocess.run(args, capture_output=True, env=ENV, text=True)
             except ValueError:
-                return "ValueError: embedded null byte"
+                return subprocess.CompletedProcess(args, None, "", "ValueError: embedded null byte")
 
             self._parse_log(nsj_log)
 
-        if proc.returncode == 0:
-            output = proc.stdout
-        elif proc.returncode == 1:
-            output = proc.stderr
-        elif proc.returncode == 109:
-            return "timed out or memory limit exceeded"
-        elif proc.returncode == 255:
-            return "permission denied (root required)"
-        elif proc.returncode:
-            return f"unknown error, code: {proc.returncode}"
-        else:
-            return "unknown error, no error code"
-
-        return output
+        return result
diff --git a/tests/api/__init__.py b/tests/api/__init__.py
index fd4679a..dcee5b5 100644
--- a/tests/api/__init__.py
+++ b/tests/api/__init__.py
@@ -1,3 +1,4 @@
+from subprocess import CompletedProcess
 from unittest import mock
 
 from falcon import testing
@@ -11,7 +12,12 @@ class SnekAPITestCase(testing.TestCase):
 
         self.patcher = mock.patch("snekbox.api.resources.eval.NsJail", autospec=True)
         self.mock_nsjail = self.patcher.start()
-        self.mock_nsjail.return_value.python3.return_value = "test output"
+        self.mock_nsjail.return_value.python3.return_value = CompletedProcess(
+            args=[],
+            returncode=0,
+            stdout="output",
+            stderr="error"
+        )
         self.addCleanup(self.patcher.stop)
 
         self.app = SnekAPI()
diff --git a/tests/api/test_eval.py b/tests/api/test_eval.py
index bcd0ec4..03f0e39 100644
--- a/tests/api/test_eval.py
+++ b/tests/api/test_eval.py
@@ -9,8 +9,9 @@ class TestEvalResource(SnekAPITestCase):
         result = self.simulate_post(self.PATH, json=body)
 
         self.assertEqual(result.status_code, 200)
-        self.assertEqual(body["input"], result.json["input"])
-        self.assertEqual("test output", result.json["output"])
+        self.assertEqual("output", result.json["stdout"])
+        self.assertEqual("error", result.json["stderr"])
+        self.assertEqual(0, result.json["returncode"])
 
     def test_post_invalid_schema_400(self):
         body = {"stuff": "foo"}
-- 
cgit v1.2.3


From bc130a4d44f38824b6173c0babff4eefe18ac1db Mon Sep 17 00:00:00 2001
From: MarkKoz <KozlovMark@gmail.com>
Date: Sat, 15 Jun 2019 18:32:32 -0700
Subject: Merge stdout and stderr

Removes the need for redirecting stderr using contextlib in the input.
Furthermore, it captures errors which don't directly come from the
input, such as SyntaxErrors.
---
 snekbox/api/resources/eval.py |  2 --
 snekbox/nsjail.py             | 10 ++++++++--
 tests/api/test_eval.py        |  1 -
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'tests/api/test_eval.py')

diff --git a/snekbox/api/resources/eval.py b/snekbox/api/resources/eval.py
index 4779557..c4bd666 100644
--- a/snekbox/api/resources/eval.py
+++ b/snekbox/api/resources/eval.py
@@ -57,7 +57,6 @@ class EvalResource:
 
         >>> {
         ...     "stdout": "2\\n",
-        ...     "stderr": "",
         ...     "returncode": 0
         ... }
 
@@ -80,6 +79,5 @@ class EvalResource:
 
         resp.media = {
             "stdout": result.stdout,
-            "stderr": result.stderr,
             "returncode": result.returncode
         }
diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py
index ff12ec4..1675b3e 100644
--- a/snekbox/nsjail.py
+++ b/snekbox/nsjail.py
@@ -131,9 +131,15 @@ class NsJail:
             log.info(msg)
 
             try:
-                result = subprocess.run(args, capture_output=True, env=ENV, text=True)
+                result = subprocess.run(
+                    args,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.STDOUT,
+                    env=ENV,
+                    text=True
+                )
             except ValueError:
-                return subprocess.CompletedProcess(args, None, "", "ValueError: embedded null byte")
+                return subprocess.CompletedProcess(args, None, "ValueError: embedded null byte", "")
 
             self._parse_log(nsj_log)
 
diff --git a/tests/api/test_eval.py b/tests/api/test_eval.py
index 03f0e39..3350763 100644
--- a/tests/api/test_eval.py
+++ b/tests/api/test_eval.py
@@ -10,7 +10,6 @@ class TestEvalResource(SnekAPITestCase):
 
         self.assertEqual(result.status_code, 200)
         self.assertEqual("output", result.json["stdout"])
-        self.assertEqual("error", result.json["stderr"])
         self.assertEqual(0, result.json["returncode"])
 
     def test_post_invalid_schema_400(self):
-- 
cgit v1.2.3