NVIDIA-Container-Toolkit—源码阅读—tools

NVIDIA-Container-Toolkit—CodeReading—tools

Posted by PYQ on September 20, 2023

tools/container

tree

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
.
├── containerd
│   ├── config_v1_test.go
│   ├── config_v2_test.go
│   └── containerd.go
├── container.go
├── crio
│   └── crio.go
├── docker
│   ├── docker.go
│   └── docker_test.go
├── nvidia-toolkit
│   ├── run.go
│   └── run_test.go
├── operator
│   ├── operator.go
│   └── operator_test.go
├── README.md
└── toolkit
    ├── executable.go
    ├── executable_test.go
    ├── replacements.go
    ├── runtime.go
    ├── runtime_test.go
    └── toolkit.go

README.md

该文件夹包含一些工具,可用于将 docker、containerd 或 cri-o 配置为使用NVIDIA Container Toolkit。

用法如下:

1
2
3
docker setup \
    --runtime-name NAME \
        /run/nvidia/toolkit

container.go

定义了一个container包。

引入的两个外部模块:

  1. urfave/cli 框架
  2. logrus

一些常量和结构体定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
const (
	restartModeNone    = "none"
	restartModeSignal  = "signal"
	restartModeSystemd = "systemd"
)
// Options defines the shared options for the CLIs to configure containers runtimes.
type Options struct {
	Config        string
	Socket        string
	RuntimeName   string
	RuntimeDir    string
	SetAsDefault  bool
	RestartMode   string
	HostRootMount string
}

ParseArgs函数:提取出命令行参数的RuntimeDir赋值给Options结构体指针o中的RuntimeDir,这里的命令行参数指除了定义了的子命令以及flag外的参数,只接受一个。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// ParseArgs parses the command line arguments to the CLI
func ParseArgs(c *cli.Context, o *Options) error {
	if o.RuntimeDir != "" {
		logrus.Debug("Runtime directory already set; ignoring arguments")
		return nil
	}
	args := c.Args()
	logrus.Infof("Parsing arguments: %v", args.Slice())
	if c.NArg() != 1 {
		return fmt.Errorf("incorrect number of arguments")  
	}
	o.RuntimeDir = args.Get(0)
	logrus.Infof("Successfully parsed arguments")
	return nil
}

因为cfg是interface,所以container.go中所有cfg的函数都和调用的运行时(docker、containerd和cri-o)有关。

Configure和UpdateConfig:根据options中的相关内容(RuntimeName、SetAsDefault和RuntimeDir)创建Runtimes,并更新到cfg。

Unconfigure和RevertConfig与Configure和UpdateConfig类似,不过是将Runtimes从cfg移除。

flush则将对cfg的更改写入到o.config中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
// Configure applies the options to the specified config
func (o Options) Configure(cfg engine.Interface) error {
	err := o.UpdateConfig(cfg)
	if err != nil {
		return fmt.Errorf("unable to update config: %v", err)
	}
	return o.flush(cfg)
}
// Unconfigure removes the options from the specified config
func (o Options) Unconfigure(cfg engine.Interface) error {
	err := o.RevertConfig(cfg)
	if err != nil {
		return fmt.Errorf("unable to update config: %v", err)
	}
	return o.flush(cfg)
}
// flush flushes the specified config to disk
func (o Options) flush(cfg engine.Interface) error {
	logrus.Infof("Flushing config to %v", o.Config)
	n, err := cfg.Save(o.Config)
	if err != nil {
		return fmt.Errorf("unable to flush config: %v", err)
	}
	if n == 0 {
		logrus.Infof("Config file is empty, removed")
	}
	return nil
}
// UpdateConfig updates the specified config to include the nvidia runtimes
func (o Options) UpdateConfig(cfg engine.Interface) error {
	runtimes := operator.GetRuntimes(
		operator.WithNvidiaRuntimeName(o.RuntimeName),
		operator.WithSetAsDefault(o.SetAsDefault),
		operator.WithRoot(o.RuntimeDir),
	)
	for name, runtime := range runtimes {
		err := cfg.AddRuntime(name, runtime.Path, runtime.SetAsDefault)
		if err != nil {
			return fmt.Errorf("failed to update runtime %q: %v", name, err)
		}
	}
	return nil
}
// RevertConfig reverts the specified config to remove the nvidia runtimes
func (o Options) RevertConfig(cfg engine.Interface) error {
	runtimes := operator.GetRuntimes(
		operator.WithNvidiaRuntimeName(o.RuntimeName),
		operator.WithSetAsDefault(o.SetAsDefault),
		operator.WithRoot(o.RuntimeDir),
	)
	for name := range runtimes {
		err := cfg.RemoveRuntime(name)
		if err != nil {
			return fmt.Errorf("failed to remove runtime %q: %v", name, err)
		}
	}
	return nil
}

Restart(根据o.RestartMode)和SystemdRestart(也就是systemctl restart,不过会判断o.HostRootMount是否进入虚拟环境执行命令)用于重启套接字和systemd服务。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
// Restart restarts the specified service
func (o Options) Restart(service string, withSignal func(string) error) error {
	switch o.RestartMode {
	case restartModeNone:
		logrus.Warningf("Skipping restart of %v due to --restart-mode=%v", service, o.RestartMode)
		return nil
	case restartModeSignal:
		return withSignal(o.Socket)
	case restartModeSystemd:
		return o.SystemdRestart(service)
	}

	return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode)
}
// SystemdRestart restarts the specified service using systemd
func (o Options) SystemdRestart(service string) error {
	var args []string
	var msg string
	if o.HostRootMount != "" {
		msg = " on host"
		args = append(args, "chroot", o.HostRootMount)
	}
	args = append(args, "systemctl", "restart", service)

	logrus.Infof("Restarting %v%v using systemd: %v", service, msg, args)

	cmd := exec.Command(args[0], args[1:]...)
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	err := cmd.Run()
	if err != nil {
		return fmt.Errorf("error restarting %v using systemd: %v", service, err)
	}

	return nil
}

operator

定义了一个operator包。

一些常量和结构体定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
const (
	defaultRuntimeName = "nvidia"
	defaultRoot = "/usr/bin"
)
// Runtime defines a runtime to be configured.
// The path and whether the runtime is the default runtime can be specfied
// 下面三个成员均根据config中来
type Runtime struct {
	name         string
	Path         string
	SetAsDefault bool
}
// Runtimes defines a set of runtimes to be configure for use in the GPU Operator
type Runtimes map[string]Runtime
type config struct {
	root              string
	nvidiaRuntimeName string
	setAsDefault      bool
}
// Option is a functional option for configuring set of runtimes.
type Option func(*config)

GetRuntimes函数用于配置Runtime,并添加到Runtimes,并返回当前所有可用的运行时:包括一个Runtime、”nvidia-cdi”和”nvidia-legacy”相关的运行时。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
// GetRuntimes returns the set of runtimes to be configured for use with the GPU Operator.
func GetRuntimes(opts ...Option) Runtimes {
	c := &config{}
	for _, opt := range opts {
		opt(c)
	}

	if c.root == "" {
		c.root = defaultRoot
	}
	if c.nvidiaRuntimeName == "" {
		c.nvidiaRuntimeName = defaultRuntimeName
	}

	runtimes := make(Runtimes)
	runtimes.add(c.nvidiaRuntime())

	modes := []string{"cdi", "legacy"}
	for _, mode := range modes {
		runtimes.add(c.modeRuntime(mode))
	}
	return runtimes
}

DefaultRuntimeName函数返回Runtimes中的默认Runtime的name,即Runtime中setAsDefault为true即SetAsDefault: c.setAsDefault && name == c.nvidiaRuntimeName,也就是config中开启了setAsDefault,config中的nvidiaRuntimeName,实际其实是来自options中的RuntimeName。

1
2
3
4
5
6
7
8
9
// DefaultRuntimeName returns the name of the default runtime.
func (r Runtimes) DefaultRuntimeName() string {
	for _, runtime := range r {
		if runtime.SetAsDefault {
			return runtime.name
		}
	}
	return ""
}

add函数没太多好说的,就是添加Runtimes[Rutime.name] = Runtime到Runtimes中。

1
2
3
4
// Add a runtime to the set of runtimes.
func (r *Runtimes) add(runtime Runtime) {
	(*r)[runtime.name] = runtime
}

nvidiaRuntime创建一个新的runtime,如果config中的nvidiaRuntimeName为”nvidia-cdi”或者”nvidia-legacy”,创建的新runtime的名字为默认的nvidia;否则沿用config中的名字。(这里是因为GetRuntimes中modRuntime会对”nvidia-cdi”和”nvidia-legacy”进行配置)

modeRuntime和newRuntime是用来设置nvidia相关的runtime。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
// nvidiaRuntime creates a runtime that corresponds to the nvidia runtime.
// If name is equal to one of the predefined runtimes, `nvidia` is used as the runtime name instead. 
func (c config) nvidiaRuntime() Runtime {
	predefinedRuntimes := map[string]struct{}{
		"nvidia-cdi":    {},
		"nvidia-legacy": {},
	}
	name := c.nvidiaRuntimeName
	if _, isPredefinedRuntime := predefinedRuntimes[name]; isPredefinedRuntime {
		name = defaultRuntimeName
	}
	return c.newRuntime(name, "nvidia-container-runtime")
}
// modeRuntime creates a runtime for the specified mode.
func (c config) modeRuntime(mode string) Runtime {
	return c.newRuntime("nvidia-"+mode, "nvidia-container-runtime."+mode)
}
// newRuntime creates a runtime based on the configuration
func (c config) newRuntime(name string, binary string) Runtime {
	return Runtime{
		name:         name,
		Path:         filepath.Join(c.root, binary),
		SetAsDefault: c.setAsDefault && name == c.nvidiaRuntimeName,
	}
}

With函数系列通过选项函数用于设置config相关参数,调用的时候需要With…(name)(c),这也是GetRuntimes函数中使用opt(c)的原因。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// WithRoot sets the root directory for the runtime binaries.
func WithRoot(root string) Option {
	return func(c *config) {
		c.root = root
	}
}
// WithNvidiaRuntimeName sets the name of the nvidia runtime.
func WithNvidiaRuntimeName(name string) Option {
	return func(c *config) {
		c.nvidiaRuntimeName = name
	}
}
// WithSetAsDefault sets the default runtime to the nvidia runtime.
func WithSetAsDefault(set bool) Option {
	return func(c *config) {
		c.setAsDefault = set
	}
}

docker

是一个main执行程序。

定义的一些常量和结构体:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
const (
	defaultConfig       = "/etc/docker/daemon.json"
	defaultSocket       = "/var/run/docker.sock"
	defaultSetAsDefault = true
	// defaultRuntimeName specifies the NVIDIA runtime to be use as the default runtime if setting the default runtime is enabled
	defaultRuntimeName   = "nvidia"
	defaultRestartMode   = "signal"
	defaultHostRootMount = "/host"
	reloadBackoff     = 5 * time.Second
	maxReloadAttempts = 6
	socketMessageToGetPID = "GET /info HTTP/1.0\r\n\r\n"
)
// options stores the configuration from the command line or environment variables
type options struct {
	container.Options
}

main函数主要是通过urfave/cli包构建了一个cli程序,支持setup和cleanup两个子命令,并支持一系列flag(setup和cleanup都支持)。

1
2
3
4
5
6
7
--config
--socket
--restart-mode
--host-root
--runtime-name (或者--nvidia-runtime-name--runtime-class)
--nvidia-runtime-dir 或者--runtime-dir
--set-as-default

Setup函数创建了一个新的docker config,并根据option进行更改和写入文件,最后重启docker服务。

Cleanup函数和Setup函数类似。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// Setup updates docker configuration to include the nvidia runtime and reloads it
func Setup(c *cli.Context, o *options) error {
	log.Infof("Starting 'setup' for %v", c.App.Name)
	cfg, err := docker.New(
		docker.WithPath(o.Config),
	)
	if err != nil {
		return fmt.Errorf("unable to load config: %v", err)
	}
	err = o.Configure(cfg)
	if err != nil {
		return fmt.Errorf("unable to configure docker: %v", err)
	}
	err = RestartDocker(o)
	if err != nil {
		return fmt.Errorf("unable to restart docker: %v", err)
	}
	log.Infof("Completed 'setup' for %v", c.App.Name)
	return nil
}

RestartDocker函数调用option.Restart函数对docker进行重启,根据option中的RestartMode选择通过SignalDocker函数用发送信号进行重启,还是直接通过systemd进行重启,默认是signal。

SignalDocker函数发送一个SIGHUP信号给docker daemon来重启docker,最多尝试6次。

containerd

是一个main执行程序。

定义了一些常量和结构体,runc.v1和v2的区别见容器中的 Shim 到底是个什么鬼?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
const (
	defaultConfig        = "/etc/containerd/config.toml"
	defaultSocket        = "/run/containerd/containerd.sock"
	defaultRuntimeClass  = "nvidia"
	defaultRuntmeType    = "io.containerd.runc.v2"
	defaultSetAsDefault  = true
	defaultRestartMode   = "signal"
	defaultHostRootMount = "/host"
	reloadBackoff     = 5 * time.Second
	maxReloadAttempts = 6
	socketMessageToGetPID = ""
)
// options stores the configuration from the command line or environment variables
type options struct {
	container.Options
	// containerd-specific options
	useLegacyConfig bool
	runtimeType     string
	ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice
}

main函数主要是通过urfave/cli包构建了一个cli程序,支持的子命令和flag和docker的一致,不再赘述。

有趣的是这里在运行命令时,如果出错用的是log.Fatal(),而docker中是log.Errorf,再exit(1),当然两者效果是一样的。

Setup函数创建了一个新的containerd config,并根据option进行更改和写入文件,最后重启containerd服务。新建config也和docker操作不一样。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
// Setup updates a containerd configuration to include the nvidia-containerd-runtime and reloads it
func Setup(c *cli.Context, o *options) error {
	log.Infof("Starting 'setup' for %v", c.App.Name)
	cfg, err := containerd.New(
		containerd.WithPath(o.Config),
		containerd.WithRuntimeType(o.runtimeType),
		containerd.WithUseLegacyConfig(o.useLegacyConfig),		containerd.WithContainerAnnotations(o.containerAnnotationsFromCDIPrefixes()...),
	)
	if err != nil {
		return fmt.Errorf("unable to load config: %v", err)
	}
	err = o.Configure(cfg)
	if err != nil {
		return fmt.Errorf("unable to configure containerd: %v", err)
	}
	err = RestartContainerd(o)
	if err != nil {
		return fmt.Errorf("unable to restart containerd: %v", err)
	}
	log.Infof("Completed 'setup' for %v", c.App.Name)
	return nil
}

Cleanup函数、RestartContainerd和SignalContainerd函数不再赘述,和docker中描述的类似。

containerAnnotationsFromCDIPrefixes函数应该是用于容器调用nvidia GPU的相关函数,CDI是容器设备接口,用于Linux容器的设备。

但是我想说containerd的两个config的测试写的是真的牛x

crio

是一个main执行程序。

定义了一些常量和结构体,不同于docker和containerd,cri-o的默认重启模式是systemd,也只支持systemd:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
const (
	defaultConfigMode = "hook"
	// Hook-based settings
	defaultHooksDir     = "/usr/share/containers/oci/hooks.d"
	defaultHookFilename = "oci-nvidia-hook.json"
	// Config-based settings
	defaultConfig        = "/etc/crio/crio.conf"
	defaultSocket        = "/var/run/crio/crio.sock"
	defaultRuntimeClass  = "nvidia"
	defaultSetAsDefault  = true
	defaultRestartMode   = "systemd"
	defaultHostRootMount = "/host"
)
// options stores the configuration from the command linek or environment variables
type options struct {
	container.Options
	configMode string
	// hook-specific options
	hooksDir     string
	hookFilename string
}

main函数主要是通过urfave/cli包构建了一个cli程序,支持的子命令和flag和docker、containerd的一致,不再赘述。

Setup、setupHook和setupConfig三个函数用于配置cri-o的config文件,默认模式是hook。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
// Setup installs the prestart hook required to launch GPU-enabled containers
func Setup(c *cli.Context, o *options) error {
	log.Infof("Starting 'setup' for %v", c.App.Name)
	switch o.configMode {
	case "hook":
		return setupHook(o)
	case "config":
		return setupConfig(o)
	default:
		return fmt.Errorf("invalid config-mode '%v'", o.configMode)
	}
}
// setupHook installs the prestart hook required to launch GPU-enabled containers
func setupHook(o *options) error {
	log.Infof("Installing prestart hook")
	hookPath := filepath.Join(o.hooksDir, o.hookFilename)
	err := ocihook.CreateHook(hookPath, filepath.Join(o.RuntimeDir, config.NVIDIAContainerRuntimeHookExecutable))
	if err != nil {
		return fmt.Errorf("error creating hook: %v", err)
	}
	return nil
}
// setupConfig updates the cri-o config for the NVIDIA container runtime
func setupConfig(o *options) error {
	log.Infof("Updating config file")
	cfg, err := crio.New(
		crio.WithPath(o.Config),
	)
	if err != nil {
		return fmt.Errorf("unable to load config: %v", err)
	}
	err = o.Configure(cfg)
	if err != nil {
		return fmt.Errorf("unable to configure cri-o: %v", err)
	}
	err = RestartCrio(o)
	if err != nil {
		return fmt.Errorf("unable to restart crio: %v", err)
	}
	return nil
}

cri-o重启的相关函数,可以看到signal的函数直接就是不支持。

1
2
3
4
// RestartCrio restarts crio depending on the value of restartModeFlag
func RestartCrio(o *options) error {
	return o.Restart("crio", func(string) error { return fmt.Errorf("supporting crio via signal is unsupported") })
}

toolkit

executable.go

包是main,定义了executable的相关函数。

定义的一些结构体:

1
2
3
4
5
6
7
8
9
10
11
type executableTarget struct {
	dotfileName string
	wrapperName string
}
type executable struct {
	source   string
	target   executableTarget
	env      map[string]string
	preLines []string
	argLines []string
}

install函数下载NVIDIA container toolkit组件的可执行文件到destFolder,可执行文件被复制重命名为一个.real文件,还创建了一个wrapper(其实就是个可执行的脚本文件)来设置需要的环境变量。

installFileToFolderWithName函数很简单,就是把源可执行文件复制重命名为源可执行文件.real,并设置相同的访问执行权限。

ps:这里注释的wapper写错了x

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// install installs an executable component of the NVIDIA container toolkit. The source executable
// is copied to a `.real` file and a wapper is created to set up the environment as required.
func (e executable) install(destFolder string) (string, error) {
	log.Infof("Installing executable '%v' to %v", e.source, destFolder)
	dotfileName := e.dotfileName()
	installedDotfileName, err := installFileToFolderWithName(destFolder, dotfileName, e.source)
	if err != nil {
		return "", fmt.Errorf("error installing file '%v' as '%v': %v", e.source, dotfileName, err)
	}
	log.Infof("Installed '%v'", installedDotfileName)
	wrapperFilename, err := e.installWrapper(destFolder, installedDotfileName)
	if err != nil {
		return "", fmt.Errorf("error wrapping '%v': %v", installedDotfileName, err)
	}
	log.Infof("Installed wrapper '%v'", wrapperFilename)
	return wrapperFilename, nil
}

installWrapper函数,流程就是创建wrapper文件、写入wrapper文件和修改wrapper文件权限使其可执行。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
func (e executable) installWrapper(destFolder string, dotfileName string) (string, error) {
	wrapperPath := filepath.Join(destFolder, e.wrapperName())
	wrapper, err := os.Create(wrapperPath)
	if err != nil {
		return "", fmt.Errorf("error creating executable wrapper: %v", err)
	}
	defer wrapper.Close()
	err = e.writeWrapperTo(wrapper, destFolder, dotfileName)
	if err != nil {
		return "", fmt.Errorf("error writing wrapper contents: %v", err)
	}
	err = ensureExecutable(wrapperPath)
	if err != nil {
		return "", fmt.Errorf("error making wrapper executable: %v", err)
	}
	return wrapperPath, nil
}

writeWrapperTo函数:首先创建了替换规则,即将占位符@destDir@替换为destFolder。向wrapper中写入shell程序内容:遍历e.preLines,将每行应用替换规则后写入wrapper。将destFolder加入PATH环境变量,然后对e中的环境变量值进行了排序,并对环境变量应用替换规则以单个环境变量名=环境变量值\的形式写入wrapper(每个占一行)。然后写入可执行程序dotfileName及其调用参数(也需应用替换规则),并通过”$@”将调用wrapper后面的参数传递给dotfileName。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
func (e executable) writeWrapperTo(wrapper io.Writer, destFolder string, dotfileName string) error {
	r := newReplacements(destDirPattern, destFolder)
	// Add the shebang
	fmt.Fprintln(wrapper, "#! /bin/sh")
	// Add the preceding lines if any
	for _, line := range e.preLines {
		fmt.Fprintf(wrapper, "%s\n", r.apply(line))
	}
	// Update the path to include the destination folder
	var env map[string]string
	if e.env == nil {
		env = make(map[string]string)
	} else {
		env = e.env
	}
	path, specified := env["PATH"]
	if !specified {
		path = "$PATH"
	}
	env["PATH"] = strings.Join([]string{destFolder, path}, ":")
	var sortedEnvvars []string
	for e := range env {
		sortedEnvvars = append(sortedEnvvars, e)
	}
	sort.Strings(sortedEnvvars)
	for _, e := range sortedEnvvars {
		v := env[e]
		fmt.Fprintf(wrapper, "%s=%s \\\n", e, r.apply(v))
	}
	// Add the call to the target executable
	fmt.Fprintf(wrapper, "%s \\\n", dotfileName)
	// Insert additional lines in the `arg` list
	for _, line := range e.argLines {
		fmt.Fprintf(wrapper, "\t%s \\\n", r.apply(line))
	}
	// Add the script arguments "$@"
	fmt.Fprintln(wrapper, "\t\"$@\"")
	return nil
}

ensureExecutable函数比较简单,就是把wrapper文件的权限变为user、group和others均可执行。

replacements.go

同属于main,定义了一些replacements的函数。

定义的一些常量和结构体:

1
2
3
4
const (
	destDirPattern = "@destDir@"
)
type replacements map[string]string

newReplacements函数将给定的rules创建了一个map结构replacements,奇数的rule为索引(旧值),偶数的为值(新值)。

1
2
3
4
5
6
7
8
9
func newReplacements(rules ...string) replacements {
	r := make(replacements)
	for i := 0; i < len(rules)-1; i += 2 {
		old := rules[i]
		new := rules[i+1]
		r[old] = new
	}
	return r
}

apply函数将input中的旧值替换为新值。

1
2
3
4
5
6
7
func (r replacements) apply(input string) string {
	output := input
	for old, new := range r {
		output = strings.ReplaceAll(output, old, new)
	}
	return output
}

runtime.go

同属于main,定义了runtime相关的函数。

定义了一些常量:

1
2
3
const (
	nvidiaContainerRuntimeSource = "/usr/bin/nvidia-container-runtime"
)

installContainerRuntimes函数将所有runtime进行复制和执行wrapper设置环境等,关键还是install函数

ps:driverRoot变量没用到

1
2
3
4
5
6
7
8
9
10
11
12
13
// installContainerRuntimes sets up the NVIDIA container runtimes, copying the executables
// and implementing the required wrapper
func installContainerRuntimes(toolkitDir string, driverRoot string) error {
	runtimes := operator.GetRuntimes()
	for _, runtime := range runtimes {
		r := newNvidiaContainerRuntimeInstaller(runtime.Path)
		_, err := r.install(toolkitDir)
		if err != nil {
			return fmt.Errorf("error installing NVIDIA container runtime: %v", err)
		}
	}
	return nil
}

newNvidiaContainerRuntimeInstaller函数构造了executableTarget结构体,包含了执行文件的重命名版本(.real)和执行文件,再调用newRuntimeInstaller生成一个新的*executable。

1
2
3
4
5
6
7
8
9
func newNvidiaContainerRuntimeInstaller(source string) *executable {
	wrapperName := filepath.Base(source)
	dotfileName := wrapperName + ".real"
	target := executableTarget{
		dotfileName: dotfileName,
		wrapperName: wrapperName,
	}
	return newRuntimeInstaller(source, target, nil)
}	

newRuntimeInstaller函数:preLines是一个脚本,检查是否加载了 “nvidia” 驱动模块。如果没有加载,输出一条消息,并直接调用 runc 命令。否则,不执行任何操作。然后创建了一个XDG_CONFIG_HOME的环境变量,其值为@destDir@.config。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
func newRuntimeInstaller(source string, target executableTarget, env map[string]string) *executable {
	preLines := []string{
		"",
		"cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1",
		"if [ \"${?}\" != \"0\" ]; then",
		"	echo \"nvidia driver modules are not yet loaded, invoking runc directly\"",
		"	exec runc \"$@\"",
		"fi",
		"",
	}
	runtimeEnv := make(map[string]string)
	runtimeEnv["XDG_CONFIG_HOME"] = filepath.Join(destDirPattern, ".config")
	for k, v := range env {
		runtimeEnv[k] = v
	}
	r := executable{
		source:   source,
		target:   target,
		env:      runtimeEnv,
		preLines: preLines,
	}
	return &r
}

toolkit.go

同属于main包,真正的main函数,构建了一个toolkit的cli程序,提供了install和delete两个子命令及若干flag。

Install函数用于安装NVIDIA container toolkit的所有组件,包括如下步骤:

  • 移除所有已有的安装:os.RemoveAll(opts.toolkitRoot)
  • 创建目录:createDirectories(opts.toolkitRoot, toolkitConfigDir),toolkitConfigDir:opts.toolkitRoot/.config/nvidia-container-runtime
  • 安装容器相关的库:installContainerLibraries(opts.toolkitRoot)
  • 下载容器运行时:installContainerRuntimes(opts.toolkitRoot, opts.DriverRoot)
  • 下载 NVIDIA container CLI:installContainerCLI(opts.toolkitRoot)
  • 下载NVIDIA container runtime hook:installRuntimeHook(opts.toolkitRoot, toolkitConfigPath),toolkitConfigPath:opts.toolkitRoot/.config/nvidia-container-runtime/config.toml
  • 下载NVIDIA Container Toolkit CLI:installContainerToolkitCLI(opts.toolkitRoot) nvidia-ctk
  • 下载NVIDIA container toolkit config:installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, nvidiaContainerRuntimeHookPath, opts)
  • 创建CDISpec:generateCDISpec(opts, nvidiaCTKPath)

installContainerToolkitCLI函数:nvidia-ctk的安装

1
2
3
4
5
6
7
8
9
10
func installContainerToolkitCLI(toolkitDir string) (string, error) {
	e := executable{
		source: "/usr/bin/nvidia-ctk",
		target: executableTarget{
			dotfileName: "nvidia-ctk.real",
			wrapperName: "nvidia-ctk",
		},
	}
	return e.install(toolkitDir)
}

installContainerCLI函数:nvidia-container-cli的安装

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
func installContainerCLI(toolkitRoot string) (string, error) {
	log.Infof("Installing NVIDIA container CLI from '%v'", nvidiaContainerCliSource)

	env := map[string]string{
		"LD_LIBRARY_PATH": toolkitRoot,
	}
	e := executable{
		source: nvidiaContainerCliSource,
		target: executableTarget{
			dotfileName: "nvidia-container-cli.real",
			wrapperName: "nvidia-container-cli",
		},
		env: env,
	}
	installedPath, err := e.install(toolkitRoot)
	if err != nil {
		return "", fmt.Errorf("error installing NVIDIA container CLI: %v", err)
	}
	return installedPath, nil
}

installRuntimeHook函数:NVIDIA runtime hook的安装,还创建了一个toolkitRoot/nvidia-container-toolkit到nvidia-container-runtime-hook(wrapper)的软链接。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
func installRuntimeHook(toolkitRoot string, configFilePath string) (string, error) {
	log.Infof("Installing NVIDIA container runtime hook from '%v'", nvidiaContainerRuntimeHookSource)

	argLines := []string{
		fmt.Sprintf("-config \"%s\"", configFilePath),
	}
	e := executable{
		source: nvidiaContainerRuntimeHookSource,
		target: executableTarget{
			dotfileName: "nvidia-container-runtime-hook.real",
			wrapperName: "nvidia-container-runtime-hook",
		},
		argLines: argLines,
	}
	installedPath, err := e.install(toolkitRoot)
	if err != nil {
		return "", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)
	}
	err = installSymlink(toolkitRoot, "nvidia-container-toolkit", installedPath)
	if err != nil {
		return "", fmt.Errorf("error installing symlink to NVIDIA container runtime hook: %v", err)
	}
	return installedPath, nil
}

// installSymlink creates a symlink in the toolkitDirectory that points to the specified target.
// Note: The target is assumed to be local to the toolkit directory
func installSymlink(toolkitRoot string, link string, target string) error {
	symlinkPath := filepath.Join(toolkitRoot, link)
	targetPath := filepath.Base(target)
	log.Infof("Creating symlink '%v' -> '%v'", symlinkPath, targetPath)
	err := os.Symlink(targetPath, symlinkPath)
	if err != nil {
		return fmt.Errorf("error creating symlink '%v' => '%v': %v", symlinkPath, targetPath, err)
	}
	return nil
}

createDirectories用于创建参数中的所有目录,权限是0755即RWXR_XR_X

1
2
3
4
5
6
7
8
9
10
func createDirectories(dir ...string) error {
	for _, d := range dir {
		log.Infof("Creating directory '%v'", d)
		err := os.MkdirAll(d, 0755)
		if err != nil {
			return fmt.Errorf("error creating directory: %v", err)
		}
	}
	return nil
}

nvidia-toolkit

定义了一些常量和结构体:toolkit的pid文件为/run/nvidia/toolkit.pid,支持的运行时包括docker(default)、crio和containerd

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
const (
	runDir         = "/run/nvidia"
	pidFile        = runDir + "/toolkit.pid"
	toolkitCommand = "toolkit"
	toolkitSubDir  = "toolkit"
	defaultToolkitArgs = ""
	defaultRuntime     = "docker"
	defaultRuntimeArgs = ""
)
var availableRuntimes = map[string]struct{}{"docker": {}, "crio": {}, "containerd": {}}
var waitingForSignal = make(chan bool, 1)
var signalReceived = make(chan bool, 1)
// options stores the command line arguments
type options struct {
	noDaemon    bool
	runtime     string
	runtimeArgs string
	root        string
}
// Version defines the CLI version. This is set at build time using LD FLAGS
var Version = "development"

main函数创建了一个nvidia-toolkit的cli程序,描述如下:

1
2
3
4
c.Name = "nvidia-toolkit"
c.Usage = "Install the nvidia-container-toolkit for use by a given runtime"
c.UsageText = "[DESTINATION] [-n | --no-daemon] [-r | --runtime] [-u | --runtime-args]"
c.Description = "DESTINATION points to the host path underneath which the nvidia-container-toolkit should be installed.\nIt will be installed at ${DESTINATION}/toolkit"

Run函数定义了该nvidia-toolkit的核心逻辑:包括验证命令的flag、初始化、下载toolkit、设置runtime

ParseArgs函数用于解析出命令行中的路径作为root,关于root的描述:the folder where the NVIDIA Container Toolkit is to be installed. It will be installed to ROOT/toolkit,代码写的挺有意思。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
func ParseArgs(args []string) ([]string, string, error) {
	log.Infof("Parsing arguments")
	if len(args) < 2 {
		return args, "", nil
	} // 只有命令
	var lastPositionalArg int
	for i, arg := range args {
		if strings.HasPrefix(arg, "-") {
			break
		}
		lastPositionalArg = i
	}
	if lastPositionalArg == 0 {
		return args, "", nil
	} // 0是命令本身加flag的情况
	if lastPositionalArg == 1 {
		return append([]string{args[0]}, args[2:]...), args[1], nil
	} // 1是命令 root flag的情况
	return nil, "", fmt.Errorf("unexpected positional argument(s) %v", args[2:lastPositionalArg+1])  // 有多个root报错
}

verifyFlags函数比较简单,做检查参数的一些工作,包括安装文件夹root是否指定、用户输入的runtime是否支持。

initialize函数执行了一些初始化操作,如创建PID文件,获取文件锁,向pid文件写入pid,并设置了信号处理程序,以便在接收到特定信号时执行相应的操作。

installToolkit函数调用toolkit install –toolkit-root path命令将toolkit文件安装在指定目录(o.root/toolkit),又回到toolkit.go中去了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
func installToolkit(o *options) error {
	log.Infof("Installing toolkit")
	cmdline := []string{
		toolkitCommand,
		"install",
		"--toolkit-root",
		filepath.Join(o.root, toolkitSubDir),
	}
	cmd := exec.Command("sh", "-c", strings.Join(cmdline, " "))
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	err := cmd.Run()
	if err != nil {
		return fmt.Errorf("error running %v command: %v", cmdline, err)
	}
	return nil
}	

setupRuntime函数o.runtime setup o.runtimeArgs toolkitDir命令,也就是docker、containerd、crio三个运行时的相关配置。相对应的cleanupRuntime函数就是对应runtime的clean up命令。

1
2
3
4
5
6
7
8
9
10
11
12
13
func setupRuntime(o *options) error {
	toolkitDir := filepath.Join(o.root, toolkitSubDir)
	log.Infof("Setting up runtime")
	cmdline := fmt.Sprintf("%v setup %v %v\n", o.runtime, o.runtimeArgs, toolkitDir)
	cmd := exec.Command("sh", "-c", cmdline)
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	err := cmd.Run()
	if err != nil {
		return fmt.Errorf("error running %v command: %v", o.runtime, err)
	}
	return nil
}

shutdown函数比较简单,移除pid文件。

conclusion

tools/container文件夹下创建了几个cli程序:

  • docker
  • containerd
  • crio
  • toolkit
  • nvidia-toolkit

前面三个cli程序子命令和flag指相同,用于配置或取消对应的运行时使用nvidia-runtime。

toolkit用于安装NVIDIA container toolkit的所有组件:

  • 容器相关的库
  • nvidia、nvidia-cdi和nvidia-legacy的运行时
  • nvidia-container-cli
  • nvidia-container-runtime-hook
  • nvidia-ctk
  • 相关的配置文件和cdi spec

nvidia-toolkit主要是调用前面两个程序完成所有组件的安装和配置:

  • toolkit install –toolkit-root path
  • o.runtime setup o.runtimeArgs toolkitDir