gpu-operator values
This commit is contained in:
parent
a2b2bd17c5
commit
0912f36c1b
1 changed files with 431 additions and 11 deletions
|
@ -1,15 +1,435 @@
|
|||
toolkit:
|
||||
ccManager:
|
||||
defaultMode: 'off'
|
||||
enabled: false
|
||||
env:
|
||||
- name: CONTAINERD_CONFIG
|
||||
value: "/etc/containerd/config.toml.tmpl"
|
||||
- name: CONTAINERD_SOCKET
|
||||
value: "/run/k3s/containerd/containerd.sock"
|
||||
- name: CONTAINERD_RUNTIME_CLASS
|
||||
value: "nvidia"
|
||||
- name: CONTAINERD_SET_AS_DEFAULT
|
||||
value: "true"
|
||||
|
||||
- name: CC_CAPABLE_DEVICE_IDS
|
||||
value: 0x2339,0x2331,0x2330,0x2324,0x2322,0x233d
|
||||
image: k8s-cc-manager
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
resources: {}
|
||||
version: v0.1.1
|
||||
cdi:
|
||||
default: false
|
||||
enabled: false
|
||||
daemonsets:
|
||||
annotations: {}
|
||||
labels: {}
|
||||
priorityClassName: system-node-critical
|
||||
rollingUpdate:
|
||||
maxUnavailable: '1'
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
updateStrategy: RollingUpdate
|
||||
dcgm:
|
||||
args: []
|
||||
enabled: false
|
||||
env: []
|
||||
image: dcgm
|
||||
imagePullPolicy: IfNotPresent
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
resources: {}
|
||||
version: 3.3.9-1-ubuntu22.04
|
||||
dcgmExporter:
|
||||
enabled: true
|
||||
env:
|
||||
- name: DCGM_EXPORTER_LISTEN
|
||||
value: :9400
|
||||
- name: DCGM_EXPORTER_KUBERNETES
|
||||
value: 'true'
|
||||
- name: DCGM_EXPORTER_COLLECTORS
|
||||
value: /etc/dcgm-exporter/dcp-metrics-included.csv
|
||||
image: dcgm-exporter
|
||||
imagePullPolicy: IfNotPresent
|
||||
repository: nvcr.io/nvidia/k8s
|
||||
resources: {}
|
||||
serviceMonitor:
|
||||
additionalLabels: {}
|
||||
enabled: false
|
||||
honorLabels: false
|
||||
interval: 15s
|
||||
relabelings: []
|
||||
version: 3.3.9-3.6.1-ubuntu22.04
|
||||
devicePlugin:
|
||||
args: []
|
||||
config:
|
||||
name: time-slicing-config-all
|
||||
create: false
|
||||
data: {}
|
||||
default: any
|
||||
name: time-slicing-config-all
|
||||
enabled: true
|
||||
env:
|
||||
- name: PASS_DEVICE_SPECS
|
||||
value: 'true'
|
||||
- name: FAIL_ON_INIT_ERROR
|
||||
value: 'true'
|
||||
- name: DEVICE_LIST_STRATEGY
|
||||
value: envvar
|
||||
- name: DEVICE_ID_STRATEGY
|
||||
value: uuid
|
||||
- name: NVIDIA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||
value: all
|
||||
image: k8s-device-plugin
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
mps:
|
||||
root: /run/nvidia/mps
|
||||
repository: nvcr.io/nvidia
|
||||
resources: {}
|
||||
version: v0.17.0
|
||||
driver:
|
||||
certConfig:
|
||||
name: ''
|
||||
enabled: true
|
||||
env: []
|
||||
image: driver
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
kernelModuleConfig:
|
||||
name: ''
|
||||
licensingConfig:
|
||||
configMapName: ''
|
||||
nlsEnabled: true
|
||||
manager:
|
||||
env:
|
||||
- name: ENABLE_GPU_POD_EVICTION
|
||||
value: 'true'
|
||||
- name: ENABLE_AUTO_DRAIN
|
||||
value: 'false'
|
||||
- name: DRAIN_USE_FORCE
|
||||
value: 'false'
|
||||
- name: DRAIN_POD_SELECTOR_LABEL
|
||||
value: ''
|
||||
- name: DRAIN_TIMEOUT_SECONDS
|
||||
value: 0s
|
||||
- name: DRAIN_DELETE_EMPTYDIR_DATA
|
||||
value: 'false'
|
||||
image: k8s-driver-manager
|
||||
imagePullPolicy: IfNotPresent
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
version: v0.7.0
|
||||
nvidiaDriverCRD:
|
||||
deployDefaultCR: true
|
||||
driverType: gpu
|
||||
enabled: false
|
||||
nodeSelector: {}
|
||||
rdma:
|
||||
enabled: false
|
||||
useHostMofed: false
|
||||
repoConfig:
|
||||
configMapName: ''
|
||||
repository: nvcr.io/nvidia
|
||||
resources: {}
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 60
|
||||
upgradePolicy:
|
||||
autoUpgrade: true
|
||||
drain:
|
||||
deleteEmptyDir: false
|
||||
enable: false
|
||||
force: false
|
||||
podSelector: ''
|
||||
timeoutSeconds: 300
|
||||
gpuPodDeletion:
|
||||
deleteEmptyDir: false
|
||||
force: false
|
||||
timeoutSeconds: 300
|
||||
maxParallelUpgrades: 1
|
||||
maxUnavailable: 25%
|
||||
waitForCompletion:
|
||||
podSelector: ''
|
||||
timeoutSeconds: 0
|
||||
useOpenKernelModules: false
|
||||
usePrecompiled: false
|
||||
version: 550.127.08
|
||||
virtualTopology:
|
||||
config: ''
|
||||
gdrcopy:
|
||||
args: []
|
||||
enabled: false
|
||||
env: []
|
||||
image: gdrdrv
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
version: v2.4.1-2
|
||||
gds:
|
||||
args: []
|
||||
enabled: false
|
||||
env: []
|
||||
image: nvidia-fs
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
version: 2.20.5
|
||||
gfd:
|
||||
enabled: true
|
||||
env:
|
||||
- name: GFD_SLEEP_INTERVAL
|
||||
value: 60s
|
||||
- name: GFD_FAIL_ON_INIT_ERROR
|
||||
value: 'true'
|
||||
image: k8s-device-plugin
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
repository: nvcr.io/nvidia
|
||||
resources: {}
|
||||
version: v0.17.0
|
||||
hostPaths:
|
||||
driverInstallDir: /run/nvidia/driver
|
||||
rootFS: /
|
||||
kataManager:
|
||||
config:
|
||||
artifactsDir: /opt/nvidia-gpu-operator/artifacts/runtimeclasses
|
||||
runtimeClasses:
|
||||
- artifacts:
|
||||
pullSecret: ''
|
||||
url: nvcr.io/nvidia/cloud-native/kata-gpu-artifacts:ubuntu22.04-535.54.03
|
||||
name: kata-nvidia-gpu
|
||||
nodeSelector: {}
|
||||
- artifacts:
|
||||
pullSecret: ''
|
||||
url: nvcr.io/nvidia/cloud-native/kata-gpu-artifacts:ubuntu22.04-535.86.10-snp
|
||||
name: kata-nvidia-gpu-snp
|
||||
nodeSelector:
|
||||
nvidia.com/cc.capable: 'true'
|
||||
enabled: false
|
||||
env: []
|
||||
image: k8s-kata-manager
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
resources: {}
|
||||
version: v0.2.2
|
||||
mig:
|
||||
strategy: single
|
||||
migManager:
|
||||
config:
|
||||
create: false
|
||||
data: {}
|
||||
default: all-disabled
|
||||
name: ''
|
||||
enabled: true
|
||||
env:
|
||||
- name: WITH_REBOOT
|
||||
value: 'false'
|
||||
gpuClientsConfig:
|
||||
name: ''
|
||||
image: k8s-mig-manager
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
resources: {}
|
||||
version: v0.10.0-ubuntu20.04
|
||||
nfd:
|
||||
enabled: true
|
||||
nodefeaturerules: false
|
||||
node-feature-discovery:
|
||||
enableNodeFeatureApi: true
|
||||
gc:
|
||||
enable: true
|
||||
replicaCount: 1
|
||||
serviceAccount:
|
||||
create: false
|
||||
name: node-feature-discovery
|
||||
master:
|
||||
config:
|
||||
extraLabelNs:
|
||||
- nvidia.com
|
||||
serviceAccount:
|
||||
create: true
|
||||
name: node-feature-discovery
|
||||
priorityClassName: system-node-critical
|
||||
worker:
|
||||
config:
|
||||
sources:
|
||||
pci:
|
||||
deviceClassWhitelist:
|
||||
- '02'
|
||||
- '0200'
|
||||
- '0207'
|
||||
- '0300'
|
||||
- '0302'
|
||||
deviceLabelFields:
|
||||
- vendor
|
||||
serviceAccount:
|
||||
create: false
|
||||
name: node-feature-discovery
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
key: node-role.kubernetes.io/master
|
||||
operator: Equal
|
||||
value: ''
|
||||
- effect: NoSchedule
|
||||
key: node-role.kubernetes.io/control-plane
|
||||
operator: Equal
|
||||
value: ''
|
||||
- effect: NoSchedule
|
||||
key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
nodeStatusExporter:
|
||||
enabled: false
|
||||
image: gpu-operator-validator
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
resources: {}
|
||||
operator:
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- preference:
|
||||
matchExpressions:
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: In
|
||||
values:
|
||||
- ''
|
||||
weight: 1
|
||||
- preference:
|
||||
matchExpressions:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: In
|
||||
values:
|
||||
- ''
|
||||
weight: 1
|
||||
annotations:
|
||||
openshift.io/scc: restricted-readonly
|
||||
cleanupCRD: false
|
||||
defaultRuntime: docker
|
||||
image: gpu-operator
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
initContainer:
|
||||
image: cuda
|
||||
imagePullPolicy: IfNotPresent
|
||||
repository: nvcr.io/nvidia
|
||||
version: 12.6.3-base-ubi9
|
||||
logging:
|
||||
develMode: false
|
||||
level: info
|
||||
timeEncoding: epoch
|
||||
priorityClassName: system-node-critical
|
||||
repository: nvcr.io/nvidia
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 350Mi
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 100Mi
|
||||
runtimeClass: nvidia
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
key: node-role.kubernetes.io/master
|
||||
operator: Equal
|
||||
value: ''
|
||||
- effect: NoSchedule
|
||||
key: node-role.kubernetes.io/control-plane
|
||||
operator: Equal
|
||||
value: ''
|
||||
upgradeCRD: true
|
||||
use_ocp_driver_toolkit: false
|
||||
platform:
|
||||
openshift: false
|
||||
psa:
|
||||
enabled: false
|
||||
sandboxDevicePlugin:
|
||||
args: []
|
||||
enabled: true
|
||||
env: []
|
||||
image: kubevirt-gpu-device-plugin
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
repository: nvcr.io/nvidia
|
||||
resources: {}
|
||||
version: v1.2.10
|
||||
sandboxWorkloads:
|
||||
defaultWorkload: container
|
||||
enabled: false
|
||||
toolkit:
|
||||
enabled: true
|
||||
env:
|
||||
- name: CONTAINERD_CONFIG
|
||||
value: /etc/containerd/config.toml.tmpl
|
||||
- name: CONTAINERD_SOCKET
|
||||
value: /run/k3s/containerd/containerd.sock
|
||||
- name: CONTAINERD_RUNTIME_CLASS
|
||||
value: nvidia
|
||||
- name: CONTAINERD_SET_AS_DEFAULT
|
||||
value: 'true'
|
||||
image: container-toolkit
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
installDir: /usr/local/nvidia
|
||||
repository: nvcr.io/nvidia/k8s
|
||||
resources: {}
|
||||
version: v1.17.3-ubuntu20.04
|
||||
validator:
|
||||
args: []
|
||||
env: []
|
||||
image: gpu-operator-validator
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
plugin:
|
||||
env:
|
||||
- name: WITH_WORKLOAD
|
||||
value: 'false'
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
resources: {}
|
||||
vfioManager:
|
||||
driverManager:
|
||||
env:
|
||||
- name: ENABLE_GPU_POD_EVICTION
|
||||
value: 'false'
|
||||
- name: ENABLE_AUTO_DRAIN
|
||||
value: 'false'
|
||||
image: k8s-driver-manager
|
||||
imagePullPolicy: IfNotPresent
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
version: v0.7.0
|
||||
enabled: true
|
||||
env: []
|
||||
image: cuda
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
repository: nvcr.io/nvidia
|
||||
resources: {}
|
||||
version: 12.6.3-base-ubi9
|
||||
vgpuDeviceManager:
|
||||
config:
|
||||
default: default
|
||||
name: ''
|
||||
enabled: true
|
||||
env: []
|
||||
image: vgpu-device-manager
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
version: v0.2.8
|
||||
vgpuManager:
|
||||
driverManager:
|
||||
env:
|
||||
- name: ENABLE_GPU_POD_EVICTION
|
||||
value: 'false'
|
||||
- name: ENABLE_AUTO_DRAIN
|
||||
value: 'false'
|
||||
image: k8s-driver-manager
|
||||
imagePullPolicy: IfNotPresent
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
version: v0.7.0
|
||||
enabled: false
|
||||
env: []
|
||||
image: vgpu-manager
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
repository: ''
|
||||
resources: {}
|
||||
version: ''
|
||||
|
|
Loading…
Reference in a new issue