gpu-operator values

This commit is contained in:
nomadics9 2025-01-12 04:11:56 +03:00
parent a2b2bd17c5
commit 0912f36c1b

View file

@ -1,15 +1,435 @@
toolkit:
ccManager:
defaultMode: 'off'
enabled: false
env:
- name: CONTAINERD_CONFIG
value: "/etc/containerd/config.toml.tmpl"
- name: CONTAINERD_SOCKET
value: "/run/k3s/containerd/containerd.sock"
- name: CONTAINERD_RUNTIME_CLASS
value: "nvidia"
- name: CONTAINERD_SET_AS_DEFAULT
value: "true"
- name: CC_CAPABLE_DEVICE_IDS
value: 0x2339,0x2331,0x2330,0x2324,0x2322,0x233d
image: k8s-cc-manager
imagePullPolicy: IfNotPresent
imagePullSecrets: []
repository: nvcr.io/nvidia/cloud-native
resources: {}
version: v0.1.1
cdi:
default: false
enabled: false
daemonsets:
annotations: {}
labels: {}
priorityClassName: system-node-critical
rollingUpdate:
maxUnavailable: '1'
tolerations:
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
updateStrategy: RollingUpdate
dcgm:
args: []
enabled: false
env: []
image: dcgm
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/cloud-native
resources: {}
version: 3.3.9-1-ubuntu22.04
dcgmExporter:
enabled: true
env:
- name: DCGM_EXPORTER_LISTEN
value: :9400
- name: DCGM_EXPORTER_KUBERNETES
value: 'true'
- name: DCGM_EXPORTER_COLLECTORS
value: /etc/dcgm-exporter/dcp-metrics-included.csv
image: dcgm-exporter
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/k8s
resources: {}
serviceMonitor:
additionalLabels: {}
enabled: false
honorLabels: false
interval: 15s
relabelings: []
version: 3.3.9-3.6.1-ubuntu22.04
devicePlugin:
args: []
config:
name: time-slicing-config-all
create: false
data: {}
default: any
name: time-slicing-config-all
enabled: true
env:
- name: PASS_DEVICE_SPECS
value: 'true'
- name: FAIL_ON_INIT_ERROR
value: 'true'
- name: DEVICE_LIST_STRATEGY
value: envvar
- name: DEVICE_ID_STRATEGY
value: uuid
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: all
image: k8s-device-plugin
imagePullPolicy: IfNotPresent
imagePullSecrets: []
mps:
root: /run/nvidia/mps
repository: nvcr.io/nvidia
resources: {}
version: v0.17.0
driver:
certConfig:
name: ''
enabled: true
env: []
image: driver
imagePullPolicy: IfNotPresent
imagePullSecrets: []
kernelModuleConfig:
name: ''
licensingConfig:
configMapName: ''
nlsEnabled: true
manager:
env:
- name: ENABLE_GPU_POD_EVICTION
value: 'true'
- name: ENABLE_AUTO_DRAIN
value: 'false'
- name: DRAIN_USE_FORCE
value: 'false'
- name: DRAIN_POD_SELECTOR_LABEL
value: ''
- name: DRAIN_TIMEOUT_SECONDS
value: 0s
- name: DRAIN_DELETE_EMPTYDIR_DATA
value: 'false'
image: k8s-driver-manager
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/cloud-native
version: v0.7.0
nvidiaDriverCRD:
deployDefaultCR: true
driverType: gpu
enabled: false
nodeSelector: {}
rdma:
enabled: false
useHostMofed: false
repoConfig:
configMapName: ''
repository: nvcr.io/nvidia
resources: {}
startupProbe:
failureThreshold: 120
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 60
upgradePolicy:
autoUpgrade: true
drain:
deleteEmptyDir: false
enable: false
force: false
podSelector: ''
timeoutSeconds: 300
gpuPodDeletion:
deleteEmptyDir: false
force: false
timeoutSeconds: 300
maxParallelUpgrades: 1
maxUnavailable: 25%
waitForCompletion:
podSelector: ''
timeoutSeconds: 0
useOpenKernelModules: false
usePrecompiled: false
version: 550.127.08
virtualTopology:
config: ''
gdrcopy:
args: []
enabled: false
env: []
image: gdrdrv
imagePullPolicy: IfNotPresent
imagePullSecrets: []
repository: nvcr.io/nvidia/cloud-native
version: v2.4.1-2
gds:
args: []
enabled: false
env: []
image: nvidia-fs
imagePullPolicy: IfNotPresent
imagePullSecrets: []
repository: nvcr.io/nvidia/cloud-native
version: 2.20.5
gfd:
enabled: true
env:
- name: GFD_SLEEP_INTERVAL
value: 60s
- name: GFD_FAIL_ON_INIT_ERROR
value: 'true'
image: k8s-device-plugin
imagePullPolicy: IfNotPresent
imagePullSecrets: []
repository: nvcr.io/nvidia
resources: {}
version: v0.17.0
hostPaths:
driverInstallDir: /run/nvidia/driver
rootFS: /
kataManager:
config:
artifactsDir: /opt/nvidia-gpu-operator/artifacts/runtimeclasses
runtimeClasses:
- artifacts:
pullSecret: ''
url: nvcr.io/nvidia/cloud-native/kata-gpu-artifacts:ubuntu22.04-535.54.03
name: kata-nvidia-gpu
nodeSelector: {}
- artifacts:
pullSecret: ''
url: nvcr.io/nvidia/cloud-native/kata-gpu-artifacts:ubuntu22.04-535.86.10-snp
name: kata-nvidia-gpu-snp
nodeSelector:
nvidia.com/cc.capable: 'true'
enabled: false
env: []
image: k8s-kata-manager
imagePullPolicy: IfNotPresent
imagePullSecrets: []
repository: nvcr.io/nvidia/cloud-native
resources: {}
version: v0.2.2
mig:
strategy: single
migManager:
config:
create: false
data: {}
default: all-disabled
name: ''
enabled: true
env:
- name: WITH_REBOOT
value: 'false'
gpuClientsConfig:
name: ''
image: k8s-mig-manager
imagePullPolicy: IfNotPresent
imagePullSecrets: []
repository: nvcr.io/nvidia/cloud-native
resources: {}
version: v0.10.0-ubuntu20.04
nfd:
enabled: true
nodefeaturerules: false
node-feature-discovery:
enableNodeFeatureApi: true
gc:
enable: true
replicaCount: 1
serviceAccount:
create: false
name: node-feature-discovery
master:
config:
extraLabelNs:
- nvidia.com
serviceAccount:
create: true
name: node-feature-discovery
priorityClassName: system-node-critical
worker:
config:
sources:
pci:
deviceClassWhitelist:
- '02'
- '0200'
- '0207'
- '0300'
- '0302'
deviceLabelFields:
- vendor
serviceAccount:
create: false
name: node-feature-discovery
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Equal
value: ''
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Equal
value: ''
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
nodeStatusExporter:
enabled: false
image: gpu-operator-validator
imagePullPolicy: IfNotPresent
imagePullSecrets: []
repository: nvcr.io/nvidia/cloud-native
resources: {}
operator:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: node-role.kubernetes.io/master
operator: In
values:
- ''
weight: 1
- preference:
matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: In
values:
- ''
weight: 1
annotations:
openshift.io/scc: restricted-readonly
cleanupCRD: false
defaultRuntime: docker
image: gpu-operator
imagePullPolicy: IfNotPresent
imagePullSecrets: []
initContainer:
image: cuda
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia
version: 12.6.3-base-ubi9
logging:
develMode: false
level: info
timeEncoding: epoch
priorityClassName: system-node-critical
repository: nvcr.io/nvidia
resources:
limits:
cpu: 500m
memory: 350Mi
requests:
cpu: 200m
memory: 100Mi
runtimeClass: nvidia
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Equal
value: ''
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Equal
value: ''
upgradeCRD: true
use_ocp_driver_toolkit: false
platform:
openshift: false
psa:
enabled: false
sandboxDevicePlugin:
args: []
enabled: true
env: []
image: kubevirt-gpu-device-plugin
imagePullPolicy: IfNotPresent
imagePullSecrets: []
repository: nvcr.io/nvidia
resources: {}
version: v1.2.10
sandboxWorkloads:
defaultWorkload: container
enabled: false
toolkit:
enabled: true
env:
- name: CONTAINERD_CONFIG
value: /etc/containerd/config.toml.tmpl
- name: CONTAINERD_SOCKET
value: /run/k3s/containerd/containerd.sock
- name: CONTAINERD_RUNTIME_CLASS
value: nvidia
- name: CONTAINERD_SET_AS_DEFAULT
value: 'true'
image: container-toolkit
imagePullPolicy: IfNotPresent
imagePullSecrets: []
installDir: /usr/local/nvidia
repository: nvcr.io/nvidia/k8s
resources: {}
version: v1.17.3-ubuntu20.04
validator:
args: []
env: []
image: gpu-operator-validator
imagePullPolicy: IfNotPresent
imagePullSecrets: []
plugin:
env:
- name: WITH_WORKLOAD
value: 'false'
repository: nvcr.io/nvidia/cloud-native
resources: {}
vfioManager:
driverManager:
env:
- name: ENABLE_GPU_POD_EVICTION
value: 'false'
- name: ENABLE_AUTO_DRAIN
value: 'false'
image: k8s-driver-manager
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/cloud-native
version: v0.7.0
enabled: true
env: []
image: cuda
imagePullPolicy: IfNotPresent
imagePullSecrets: []
repository: nvcr.io/nvidia
resources: {}
version: 12.6.3-base-ubi9
vgpuDeviceManager:
config:
default: default
name: ''
enabled: true
env: []
image: vgpu-device-manager
imagePullPolicy: IfNotPresent
imagePullSecrets: []
repository: nvcr.io/nvidia/cloud-native
version: v0.2.8
vgpuManager:
driverManager:
env:
- name: ENABLE_GPU_POD_EVICTION
value: 'false'
- name: ENABLE_AUTO_DRAIN
value: 'false'
image: k8s-driver-manager
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/cloud-native
version: v0.7.0
enabled: false
env: []
image: vgpu-manager
imagePullPolicy: IfNotPresent
imagePullSecrets: []
repository: ''
resources: {}
version: ''