diff options
author | 2023-03-02 19:36:02 -0500 | |
---|---|---|
committer | 2023-03-02 19:36:02 -0500 | |
commit | eb64d2aefae844513cb549b62228d11608280782 (patch) | |
tree | 38c2690bfc836caf91d9a370fe8eaf3c4ff5aee3 | |
parent | Allow uploaded files to be writeable in nsjail (diff) |
Add `config/.ignore` file, parsed to ignore file patterns for upload
Currently only includes some python bytecode files
-rw-r--r-- | config/.ignore | 4 | ||||
-rw-r--r-- | snekbox/memfs.py | 29 | ||||
-rw-r--r-- | snekbox/nsjail.py | 11 |
3 files changed, 37 insertions, 7 deletions
diff --git a/config/.ignore b/config/.ignore new file mode 100644 index 0000000..961321b --- /dev/null +++ b/config/.ignore @@ -0,0 +1,4 @@ +__pycache__/ +*.py[cod] +*$py.class +*.so diff --git a/snekbox/memfs.py b/snekbox/memfs.py index 1a18e03..feba252 100644 --- a/snekbox/memfs.py +++ b/snekbox/memfs.py @@ -4,8 +4,9 @@ from __future__ import annotations import logging import warnings import weakref -from collections.abc import Generator +from collections.abc import Generator, Sequence from contextlib import suppress +from fnmatch import fnmatch from pathlib import Path from types import TracebackType from typing import Type @@ -124,6 +125,7 @@ class MemFS: self, limit: int, pattern: str = "**/*", + ignores: Sequence[str] = (), exclude_files: dict[Path, float] | None = None, ) -> Generator[FileAttachment, None, None]: """ @@ -132,32 +134,43 @@ class MemFS: Args: limit: The maximum number of files to parse. pattern: The glob pattern to match files against. + ignores: A sequence of fnmatch patterns to ignore. exclude_files: A dict of Paths and last modified times. Files will be excluded if their last modified time is equal to the provided value. """ count = 0 for file in self.output.rglob(pattern): + if any( + fnmatch(str(file.relative_to(self.home)), match_pattern := ignore_pattern) + for ignore_pattern in ignores + ): + log.info(f"Ignoring {file.name!r} as it matches {match_pattern!r}") + continue + if exclude_files and (orig_time := exclude_files.get(file)): new_time = file.stat().st_mtime log.info(f"Checking {file.name} ({orig_time=}, {new_time=})") if file.stat().st_mtime == orig_time: - log.info(f"Skipping {file.name} as it has not been modified") + log.info(f"Skipping {file.name!r} as it has not been modified") continue + if count > limit: log.info(f"Max attachments {limit} reached, skipping remaining files") break + if file.is_file(): count += 1 - log.info(f"Found file {file!s}") + log.info(f"Found valid file for upload {file.name!r}") yield FileAttachment.from_path(file, relative_to=self.output) def files_list( self, limit: int, pattern: str, - preload_dict: bool = False, + ignores: Sequence[str] = (), exclude_files: dict[Path, float] | None = None, + preload_dict: bool = False, ) -> list[FileAttachment]: """ Return a sorted list of file paths within the output directory. @@ -165,14 +178,18 @@ class MemFS: Args: limit: The maximum number of files to parse. pattern: The glob pattern to match files against. - preload_dict: Whether to preload as_dict property data. + ignores: A sequence of fnmatch patterns to ignore. exclude_files: A dict of Paths and last modified times. Files will be excluded if their last modified time is equal to the provided value. + preload_dict: Whether to preload as_dict property data. Returns: List of FileAttachments sorted lexically by path name. """ - res = sorted(self.files(limit, pattern, exclude_files), key=lambda f: f.path) + res = sorted( + self.files(limit=limit, pattern=pattern, ignores=ignores, exclude_files=exclude_files), + key=lambda f: f.path, + ) if preload_dict: for file in res: # Loads the cached property as attribute diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py index 3e0f742..2c691e4 100644 --- a/snekbox/nsjail.py +++ b/snekbox/nsjail.py @@ -58,6 +58,7 @@ class NsJail: files_limit: int | None = 100, files_timeout: float | None = 8, files_pattern: str = "**/[!_]*", + files_ignore_path: str = "./config/.ignore", ): """ Initialize NsJail. @@ -74,17 +75,21 @@ class NsJail: files_limit: Maximum number of output files to parse. files_timeout: Maximum time in seconds to wait for output files to be read. files_pattern: Pattern to match files to attach within the output directory. + files_ignore_path: Path to a file containing a gitignore-like list of file + patterns to ignore for upload. """ self.nsjail_path = nsjail_path self.config_path = config_path self.max_output_size = max_output_size self.read_chunk_size = read_chunk_size + self.memfs_instance_size = memfs_instance_size self.memfs_home = memfs_home self.memfs_output = memfs_output self.files_limit = files_limit self.files_timeout = files_timeout self.files_pattern = files_pattern + self.files_ignores = Path(files_ignore_path).read_text().splitlines() self.config = self._read_config(config_path) self.cgroup_version = utils.cgroup.init(self.config) @@ -269,7 +274,11 @@ class NsJail: attachments = timed( MemFS.files_list, (fs, self.files_limit, self.files_pattern), - {"preload_dict": True, "exclude_files": files_written}, + { + "ignores": self.files_ignores, + "preload_dict": True, + "exclude_files": files_written, + }, timeout=self.files_timeout, ) log.info(f"Found {len(attachments)} files.") |