aboutsummaryrefslogtreecommitdiffstats
path: root/poetry_restrict_plugin/plugin.py
blob: 998743fe8d3a31b8a4d23d8c1c939c2461902b14 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os.path
import pathlib
import sys
import traceback

import poetry as poetry_package
from cleo.io.io import IO
from landlock import FSAccess, Ruleset
from poetry.plugins.plugin import Plugin
from poetry.poetry import Poetry


def existing_paths(paths):
    assert isinstance(paths, (list, tuple))
    for path in paths:
        if os.path.exists(path):
            yield path


def ensure_paths(paths):
    assert isinstance(paths, (list, tuple))
    for path in paths:
        if not os.path.exists(path):
            os.makedirs(path)
        yield path


class RestrictPlugin(Plugin):
    def landlock(self, poetry: Poetry):
        # /home/user/.local/pipx/venvs/poetry/lib/python3.11/site-packages
        poetry_libs_path = pathlib.Path(poetry_package.__path__._path[0]).parent
        # Needed, otherwise raises:
        #   Fatal Python error: init_import_site: Failed to import the site module
        # /home/user/.local/pipx/venvs/poetry/pyvenv.cfg
        poetry_pyvenv_cfg = poetry_libs_path.parent.parent.parent / "pyvenv.cfg"

        ruleset = Ruleset()

        # Rules for Poetry's virtual environment management
        ruleset.allow(
            *ensure_paths(
                (
                    # Storing the virtual environment
                    poetry.config.virtualenvs_path,
                    # Cached dependencies
                    poetry.config.artifacts_cache_directory,
                    poetry.config.repository_cache_directory
                ),
            ),
            rules=FSAccess.all(),
        )
        #   Temporary storage
        ruleset.allow("/tmp", rules=FSAccess.all() & ~FSAccess.EXECUTE)
        # Poetry may also want to late-import some of its dependencies, or built-in modules
        ruleset.allow(*existing_paths(sys.path), rules=FSAccess.READ_FILE | FSAccess.READ_DIR)

        # Finally, the Python executable may need to import some of its shared libraries
        ruleset.allow(
            *existing_paths(("/lib", "/lib64")),
            rules=FSAccess.READ_FILE | FSAccess.READ_DIR | FSAccess.EXECUTE,
        )
        # and in poetry shell, we might want to run some system executables, too
        ruleset.allow("/usr/bin", rules=FSAccess.READ_FILE | FSAccess.READ_DIR | FSAccess.EXECUTE)

        # For compilation of C dependencies, we need to be able to find headers
        ruleset.allow(*existing_paths(("/usr/include",)), rules=FSAccess.READ_FILE | FSAccess.READ_DIR)

        # We allow read access here, later we might want to restrict the pid namespace though
        ruleset.allow("/proc", rules=FSAccess.READ_FILE | FSAccess.READ_DIR)
        # needed for /dev/tty and /dev/pty devices, see /usr/lib/python3.11/pty.py
        ruleset.allow("/dev", rules=FSAccess.READ_FILE | FSAccess.READ_DIR | FSAccess.WRITE_FILE)

        # Python's `zoneinfo` module
        ruleset.allow("/usr/share/zoneinfo/", rules=FSAccess.READ_FILE | FSAccess.READ_DIR)

        ruleset.allow(
            # We need to know which DNS resolver to use, and any custom hosts
            *existing_paths(("/etc/resolv.conf", "/etc/hosts")),
            # pip reads this file in _vendor/distro/distro.py
            *existing_paths(("/etc/debian_version",)),
            # I'm not opposed to including things like this because I don't want to annoy people
            # when their tooling doesn't work. But we have to be conservative. I think shells
            # are fine, but if there was some further tooling (e.g. shell tools run at startup)
            # I don't think those should be included.
            *existing_paths(("/etc/bash.bashrc", os.path.expanduser("~/.bashrc"))),
            rules=FSAccess.READ_FILE,
        )
        ruleset.allow("/etc/ssl/certs", "/usr/local/share/ca-certificates", rules=FSAccess.READ_FILE | FSAccess.READ_DIR)

        # Allow determining mime types. Used for ruamel.yaml installation.
        ruleset.allow("/etc/mime.types", rules=FSAccess.READ_FILE)

        # Allow working with shared memory
        ruleset.allow("/dev/shm")

        # Black cache access
        ruleset.allow(
            *existing_paths((os.path.expanduser("~/.cache/black"),)),
            rules=FSAccess.READ_FILE | FSAccess.WRITE_FILE | FSAccess.READ_DIR,
        )

        pre_commit_cache = os.path.expanduser("~/.cache/pre-commit")
        if os.path.exists(pre_commit_cache):
            ruleset.allow(pre_commit_cache)
            # pre-commit runs git to figure out the diff to lint, which will
            # be pretty noisy if we do not whitelist the gitconfig.
            ruleset.allow(
                *existing_paths(
                    (
                        os.path.expanduser("~/.gitconfig"),
                        os.path.expanduser("~/.config/git/config")
                    )
                ),
                rules=FSAccess.READ_FILE,
            )

        # Allow manipulation of files in our projects, e.g. for linters.
        # We might need to check this more thoroughly. For instance, configuring custom
        # filter programs in gitattributes might allow a sandbox escape.
        ruleset.allow(os.path.dirname(poetry.pyproject_path))

        # => Rules for poetry-in-poetry
        #
        # This is suboptimal. It is needed for nested invocations of poetry, which
        # sometimes happen through a combination of tooling (e.g. script calling
        # command through poetry being run in poetry shell). However, the
        # poetry configuration directory contains a file named `auth.toml`, which
        # sounds it makes sense to restrict. The cleaner solution here would be
        # to mount a tmpfs over here so it appears empty.
        ruleset.allow(
            *existing_paths((os.path.expanduser("~/.config/pypoetry"),)),
            rules=FSAccess.READ_FILE | FSAccess.READ_DIR,
        )
        # Python may need to read pyvenv.cfg
        ruleset.allow(poetry_pyvenv_cfg, rules=FSAccess.READ_FILE)

        ruleset.apply()

    def activate(self, poetry: Poetry, io: IO):
        if os.getenv("POETRY_NO_RESTRICT") == "1":
            io.write_line(
                "<info>poetry-restrict-plugin</info>: "
                "<comment>Disabled via POETRY_NO_RESTRICT environment variable!</comment>"
            )
            return

        try:
            self.landlock(poetry)
            io.write_line("<info>poetry-restrict-plugin</info>: Landlock engaged.")
        except Exception as err:
            io.write_line("<error>Fatal error trying to enforce Landlock rules:</error>")
            traceback.print_exception(err)
            io.write_line("<error>This is an issue of the Poetry restrict plugin, not of Poetry itself.</error>")
            raise