2384 lines
113 KiB
YAML
2384 lines
113 KiB
YAML
---
|
||
apiVersion: apiextensions.k8s.io/v1
|
||
kind: CustomResourceDefinition
|
||
metadata:
|
||
annotations:
|
||
controller-gen.kubebuilder.io/version: v0.16.4
|
||
name: clusterpolicies.nvidia.com
|
||
spec:
|
||
group: nvidia.com
|
||
names:
|
||
kind: ClusterPolicy
|
||
listKind: ClusterPolicyList
|
||
plural: clusterpolicies
|
||
singular: clusterpolicy
|
||
scope: Cluster
|
||
versions:
|
||
- additionalPrinterColumns:
|
||
- jsonPath: .status.state
|
||
name: Status
|
||
type: string
|
||
- jsonPath: .metadata.creationTimestamp
|
||
name: Age
|
||
type: string
|
||
name: v1
|
||
schema:
|
||
openAPIV3Schema:
|
||
description: ClusterPolicy is the Schema for the clusterpolicies API
|
||
properties:
|
||
apiVersion:
|
||
description: |-
|
||
APIVersion defines the versioned schema of this representation of an object.
|
||
Servers should convert recognized schemas to the latest internal value, and
|
||
may reject unrecognized values.
|
||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||
type: string
|
||
kind:
|
||
description: |-
|
||
Kind is a string value representing the REST resource this object represents.
|
||
Servers may infer this from the endpoint the client submits requests to.
|
||
Cannot be updated.
|
||
In CamelCase.
|
||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||
type: string
|
||
metadata:
|
||
type: object
|
||
spec:
|
||
description: ClusterPolicySpec defines the desired state of ClusterPolicy
|
||
properties:
|
||
ccManager:
|
||
description: CCManager component spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
defaultMode:
|
||
description: Default CC mode setting for compatible GPUs on the
|
||
node
|
||
enum:
|
||
- "on"
|
||
- "off"
|
||
- devtools
|
||
type: string
|
||
enabled:
|
||
description: Enabled indicates if deployment of CC Manager is
|
||
enabled
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: CC Manager image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: CC Manager image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
version:
|
||
description: CC Manager image tag
|
||
type: string
|
||
type: object
|
||
cdi:
|
||
description: CDI configures how the Container Device Interface is
|
||
used in the cluster
|
||
properties:
|
||
default:
|
||
default: false
|
||
description: Default indicates whether to use CDI as the default
|
||
mechanism for providing GPU access to containers.
|
||
type: boolean
|
||
enabled:
|
||
default: false
|
||
description: Enabled indicates whether CDI can be used to make
|
||
GPUs accessible to containers.
|
||
type: boolean
|
||
type: object
|
||
daemonsets:
|
||
description: Daemonset defines common configuration for all Daemonsets
|
||
properties:
|
||
annotations:
|
||
additionalProperties:
|
||
type: string
|
||
description: |-
|
||
Optional: Annotations is an unstructured key value map stored with a resource that may be
|
||
set by external tools to store and retrieve arbitrary metadata. They are not
|
||
queryable and should be preserved when modifying objects.
|
||
type: object
|
||
labels:
|
||
additionalProperties:
|
||
type: string
|
||
description: |-
|
||
Optional: Map of string keys and values that can be used to organize and categorize
|
||
(scope and select) objects. May match selectors of replication controllers
|
||
and services.
|
||
type: object
|
||
priorityClassName:
|
||
type: string
|
||
rollingUpdate:
|
||
description: 'Optional: Configuration for rolling update of all
|
||
DaemonSet pods'
|
||
properties:
|
||
maxUnavailable:
|
||
type: string
|
||
type: object
|
||
tolerations:
|
||
description: 'Optional: Set tolerations'
|
||
items:
|
||
description: |-
|
||
The pod this Toleration is attached to tolerates any taint that matches
|
||
the triple <key,value,effect> using the matching operator <operator>.
|
||
properties:
|
||
effect:
|
||
description: |-
|
||
Effect indicates the taint effect to match. Empty means match all taint effects.
|
||
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
|
||
type: string
|
||
key:
|
||
description: |-
|
||
Key is the taint key that the toleration applies to. Empty means match all taint keys.
|
||
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
|
||
type: string
|
||
operator:
|
||
description: |-
|
||
Operator represents a key's relationship to the value.
|
||
Valid operators are Exists and Equal. Defaults to Equal.
|
||
Exists is equivalent to wildcard for value, so that a pod can
|
||
tolerate all taints of a particular category.
|
||
type: string
|
||
tolerationSeconds:
|
||
description: |-
|
||
TolerationSeconds represents the period of time the toleration (which must be
|
||
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
|
||
it is not set, which means tolerate the taint forever (do not evict). Zero and
|
||
negative values will be treated as 0 (evict immediately) by the system.
|
||
format: int64
|
||
type: integer
|
||
value:
|
||
description: |-
|
||
Value is the taint value the toleration matches to.
|
||
If the operator is Exists, the value should be empty, otherwise just a regular string.
|
||
type: string
|
||
type: object
|
||
type: array
|
||
updateStrategy:
|
||
default: RollingUpdate
|
||
enum:
|
||
- RollingUpdate
|
||
- OnDelete
|
||
type: string
|
||
type: object
|
||
dcgm:
|
||
description: DCGM component spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
enabled:
|
||
description: Enabled indicates if deployment of NVIDIA DCGM Hostengine
|
||
as a separate pod is enabled.
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
hostPort:
|
||
description: 'Deprecated: HostPort represents host port that needs
|
||
to be bound for DCGM engine (Default: 5555)'
|
||
format: int32
|
||
type: integer
|
||
image:
|
||
description: NVIDIA DCGM image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: NVIDIA DCGM image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
version:
|
||
description: NVIDIA DCGM image tag
|
||
type: string
|
||
type: object
|
||
dcgmExporter:
|
||
description: DCGMExporter spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
config:
|
||
description: 'Optional: Custom metrics configuration for NVIDIA
|
||
DCGM Exporter'
|
||
properties:
|
||
name:
|
||
description: ConfigMap name with file dcgm-metrics.csv for
|
||
metrics to be collected by NVIDIA DCGM Exporter
|
||
type: string
|
||
type: object
|
||
enabled:
|
||
description: Enabled indicates if deployment of NVIDIA DCGM Exporter
|
||
through operator is enabled
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: NVIDIA DCGM Exporter image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: NVIDIA DCGM Exporter image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
serviceMonitor:
|
||
description: 'Optional: ServiceMonitor configuration for NVIDIA
|
||
DCGM Exporter'
|
||
properties:
|
||
additionalLabels:
|
||
additionalProperties:
|
||
type: string
|
||
description: AdditionalLabels to add to ServiceMonitor instance
|
||
for NVIDIA DCGM Exporter
|
||
type: object
|
||
enabled:
|
||
description: Enabled indicates if ServiceMonitor is deployed
|
||
for NVIDIA DCGM Exporter
|
||
type: boolean
|
||
honorLabels:
|
||
description: HonorLabels chooses the metric’s labels on collisions
|
||
with target labels.
|
||
type: boolean
|
||
interval:
|
||
description: |-
|
||
Interval which metrics should be scraped from NVIDIA DCGM Exporter. If not specified Prometheus’ global scrape interval is used.
|
||
Supported units: y, w, d, h, m, s, ms
|
||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||
type: string
|
||
relabelings:
|
||
description: Relabelings allows to rewrite labels on metric
|
||
sets for NVIDIA DCGM Exporter
|
||
items:
|
||
description: |-
|
||
RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
|
||
scraped samples and remote write samples.
|
||
|
||
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
|
||
properties:
|
||
action:
|
||
default: replace
|
||
description: |-
|
||
Action to perform based on the regex matching.
|
||
|
||
`Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
|
||
`DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
|
||
|
||
Default: "Replace"
|
||
enum:
|
||
- replace
|
||
- Replace
|
||
- keep
|
||
- Keep
|
||
- drop
|
||
- Drop
|
||
- hashmod
|
||
- HashMod
|
||
- labelmap
|
||
- LabelMap
|
||
- labeldrop
|
||
- LabelDrop
|
||
- labelkeep
|
||
- LabelKeep
|
||
- lowercase
|
||
- Lowercase
|
||
- uppercase
|
||
- Uppercase
|
||
- keepequal
|
||
- KeepEqual
|
||
- dropequal
|
||
- DropEqual
|
||
type: string
|
||
modulus:
|
||
description: |-
|
||
Modulus to take of the hash of the source label values.
|
||
|
||
Only applicable when the action is `HashMod`.
|
||
format: int64
|
||
type: integer
|
||
regex:
|
||
description: Regular expression against which the extracted
|
||
value is matched.
|
||
type: string
|
||
replacement:
|
||
description: |-
|
||
Replacement value against which a Replace action is performed if the
|
||
regular expression matches.
|
||
|
||
Regex capture groups are available.
|
||
type: string
|
||
separator:
|
||
description: Separator is the string between concatenated
|
||
SourceLabels.
|
||
type: string
|
||
sourceLabels:
|
||
description: |-
|
||
The source labels select values from existing labels. Their content is
|
||
concatenated using the configured Separator and matched against the
|
||
configured regular expression.
|
||
items:
|
||
description: |-
|
||
LabelName is a valid Prometheus label name which may only contain ASCII
|
||
letters, numbers, as well as underscores.
|
||
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
|
||
type: string
|
||
type: array
|
||
targetLabel:
|
||
description: |-
|
||
Label to which the resulting string is written in a replacement.
|
||
|
||
It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
|
||
`KeepEqual` and `DropEqual` actions.
|
||
|
||
Regex capture groups are available.
|
||
type: string
|
||
type: object
|
||
type: array
|
||
type: object
|
||
version:
|
||
description: NVIDIA DCGM Exporter image tag
|
||
type: string
|
||
type: object
|
||
devicePlugin:
|
||
description: DevicePlugin component spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
config:
|
||
description: 'Optional: Configuration for the NVIDIA Device Plugin
|
||
via the ConfigMap'
|
||
properties:
|
||
default:
|
||
description: Default config name within the ConfigMap for
|
||
the NVIDIA Device Plugin config
|
||
type: string
|
||
name:
|
||
description: ConfigMap name for NVIDIA Device Plugin config
|
||
including shared config between plugin and GFD
|
||
type: string
|
||
type: object
|
||
enabled:
|
||
description: Enabled indicates if deployment of NVIDIA Device
|
||
Plugin through operator is enabled
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: NVIDIA Device Plugin image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
mps:
|
||
description: 'Optional: MPS related configuration for the NVIDIA
|
||
Device Plugin'
|
||
properties:
|
||
root:
|
||
default: /run/nvidia/mps
|
||
description: Root defines the MPS root path on the host
|
||
type: string
|
||
type: object
|
||
repository:
|
||
description: NVIDIA Device Plugin image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
version:
|
||
description: NVIDIA Device Plugin image tag
|
||
type: string
|
||
type: object
|
||
driver:
|
||
description: Driver component spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
certConfig:
|
||
description: 'Optional: Custom certificates configuration for
|
||
NVIDIA Driver container'
|
||
properties:
|
||
name:
|
||
type: string
|
||
type: object
|
||
enabled:
|
||
description: Enabled indicates if deployment of NVIDIA Driver
|
||
through operator is enabled
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: NVIDIA Driver image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
kernelModuleConfig:
|
||
description: 'Optional: Kernel module configuration parameters
|
||
for the NVIDIA Driver'
|
||
properties:
|
||
name:
|
||
type: string
|
||
type: object
|
||
licensingConfig:
|
||
description: 'Optional: Licensing configuration for NVIDIA vGPU
|
||
licensing'
|
||
properties:
|
||
configMapName:
|
||
type: string
|
||
nlsEnabled:
|
||
description: NLSEnabled indicates if NVIDIA Licensing System
|
||
is used for licensing.
|
||
type: boolean
|
||
type: object
|
||
livenessProbe:
|
||
description: NVIDIA Driver container liveness probe settings
|
||
properties:
|
||
failureThreshold:
|
||
description: |-
|
||
Minimum consecutive failures for the probe to be considered failed after having succeeded.
|
||
Defaults to 3. Minimum value is 1.
|
||
format: int32
|
||
minimum: 1
|
||
type: integer
|
||
initialDelaySeconds:
|
||
description: |-
|
||
Number of seconds after the container has started before liveness probes are initiated.
|
||
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||
format: int32
|
||
type: integer
|
||
periodSeconds:
|
||
description: |-
|
||
How often (in seconds) to perform the probe.
|
||
Default to 10 seconds. Minimum value is 1.
|
||
format: int32
|
||
minimum: 1
|
||
type: integer
|
||
successThreshold:
|
||
description: |-
|
||
Minimum consecutive successes for the probe to be considered successful after having failed.
|
||
Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
|
||
format: int32
|
||
minimum: 1
|
||
type: integer
|
||
timeoutSeconds:
|
||
description: |-
|
||
Number of seconds after which the probe times out.
|
||
Defaults to 1 second. Minimum value is 1.
|
||
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||
format: int32
|
||
minimum: 1
|
||
type: integer
|
||
type: object
|
||
manager:
|
||
description: Manager represents configuration for NVIDIA Driver
|
||
Manager initContainer
|
||
properties:
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: Image represents NVIDIA Driver Manager image
|
||
name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: Repository represents Driver Managerrepository
|
||
path
|
||
type: string
|
||
version:
|
||
description: Version represents NVIDIA Driver Manager image
|
||
tag(version)
|
||
type: string
|
||
type: object
|
||
rdma:
|
||
description: GPUDirectRDMASpec defines the properties for nvidia-peermem
|
||
deployment
|
||
properties:
|
||
enabled:
|
||
description: Enabled indicates if GPUDirect RDMA is enabled
|
||
through GPU operator
|
||
type: boolean
|
||
useHostMofed:
|
||
description: UseHostMOFED indicates to use MOFED drivers directly
|
||
installed on the host to enable GPUDirect RDMA
|
||
type: boolean
|
||
type: object
|
||
readinessProbe:
|
||
description: NVIDIA Driver container readiness probe settings
|
||
properties:
|
||
failureThreshold:
|
||
description: |-
|
||
Minimum consecutive failures for the probe to be considered failed after having succeeded.
|
||
Defaults to 3. Minimum value is 1.
|
||
format: int32
|
||
minimum: 1
|
||
type: integer
|
||
initialDelaySeconds:
|
||
description: |-
|
||
Number of seconds after the container has started before liveness probes are initiated.
|
||
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||
format: int32
|
||
type: integer
|
||
periodSeconds:
|
||
description: |-
|
||
How often (in seconds) to perform the probe.
|
||
Default to 10 seconds. Minimum value is 1.
|
||
format: int32
|
||
minimum: 1
|
||
type: integer
|
||
successThreshold:
|
||
description: |-
|
||
Minimum consecutive successes for the probe to be considered successful after having failed.
|
||
Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
|
||
format: int32
|
||
minimum: 1
|
||
type: integer
|
||
timeoutSeconds:
|
||
description: |-
|
||
Number of seconds after which the probe times out.
|
||
Defaults to 1 second. Minimum value is 1.
|
||
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||
format: int32
|
||
minimum: 1
|
||
type: integer
|
||
type: object
|
||
repoConfig:
|
||
description: 'Optional: Custom repo configuration for NVIDIA Driver
|
||
container'
|
||
properties:
|
||
configMapName:
|
||
type: string
|
||
type: object
|
||
repository:
|
||
description: NVIDIA Driver image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
startupProbe:
|
||
description: NVIDIA Driver container startup probe settings
|
||
properties:
|
||
failureThreshold:
|
||
description: |-
|
||
Minimum consecutive failures for the probe to be considered failed after having succeeded.
|
||
Defaults to 3. Minimum value is 1.
|
||
format: int32
|
||
minimum: 1
|
||
type: integer
|
||
initialDelaySeconds:
|
||
description: |-
|
||
Number of seconds after the container has started before liveness probes are initiated.
|
||
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||
format: int32
|
||
type: integer
|
||
periodSeconds:
|
||
description: |-
|
||
How often (in seconds) to perform the probe.
|
||
Default to 10 seconds. Minimum value is 1.
|
||
format: int32
|
||
minimum: 1
|
||
type: integer
|
||
successThreshold:
|
||
description: |-
|
||
Minimum consecutive successes for the probe to be considered successful after having failed.
|
||
Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
|
||
format: int32
|
||
minimum: 1
|
||
type: integer
|
||
timeoutSeconds:
|
||
description: |-
|
||
Number of seconds after which the probe times out.
|
||
Defaults to 1 second. Minimum value is 1.
|
||
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||
format: int32
|
||
minimum: 1
|
||
type: integer
|
||
type: object
|
||
upgradePolicy:
|
||
description: Driver auto-upgrade settings
|
||
properties:
|
||
autoUpgrade:
|
||
default: false
|
||
description: |-
|
||
AutoUpgrade is a global switch for automatic upgrade feature
|
||
if set to false all other options are ignored
|
||
type: boolean
|
||
drain:
|
||
description: DrainSpec describes configuration for node drain
|
||
during automatic upgrade
|
||
properties:
|
||
deleteEmptyDir:
|
||
default: false
|
||
description: |-
|
||
DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
|
||
(local data that will be deleted when the node is drained)
|
||
type: boolean
|
||
enable:
|
||
default: false
|
||
description: Enable indicates if node draining is allowed
|
||
during upgrade
|
||
type: boolean
|
||
force:
|
||
default: false
|
||
description: Force indicates if force draining is allowed
|
||
type: boolean
|
||
podSelector:
|
||
description: |-
|
||
PodSelector specifies a label selector to filter pods on the node that need to be drained
|
||
For more details on label selectors, see:
|
||
https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
|
||
type: string
|
||
timeoutSeconds:
|
||
default: 300
|
||
description: TimeoutSecond specifies the length of time
|
||
in seconds to wait before giving up drain, zero means
|
||
infinite
|
||
minimum: 0
|
||
type: integer
|
||
type: object
|
||
maxParallelUpgrades:
|
||
default: 1
|
||
description: |-
|
||
MaxParallelUpgrades indicates how many nodes can be upgraded in parallel
|
||
0 means no limit, all nodes will be upgraded in parallel
|
||
minimum: 0
|
||
type: integer
|
||
maxUnavailable:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
default: 25%
|
||
description: |-
|
||
MaxUnavailable is the maximum number of nodes with the driver installed, that can be unavailable during the upgrade.
|
||
Value can be an absolute number (ex: 5) or a percentage of total nodes at the start of upgrade (ex: 10%).
|
||
Absolute number is calculated from percentage by rounding up.
|
||
By default, a fixed value of 25% is used.
|
||
x-kubernetes-int-or-string: true
|
||
podDeletion:
|
||
description: PodDeletionSpec describes configuration for deletion
|
||
of pods using special resources during automatic upgrade
|
||
properties:
|
||
deleteEmptyDir:
|
||
default: false
|
||
description: |-
|
||
DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
|
||
(local data that will be deleted when the pod is deleted)
|
||
type: boolean
|
||
force:
|
||
default: false
|
||
description: Force indicates if force deletion is allowed
|
||
type: boolean
|
||
timeoutSeconds:
|
||
default: 300
|
||
description: |-
|
||
TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
|
||
infinite
|
||
minimum: 0
|
||
type: integer
|
||
type: object
|
||
waitForCompletion:
|
||
description: WaitForCompletionSpec describes the configuration
|
||
for waiting on job completions
|
||
properties:
|
||
podSelector:
|
||
description: |-
|
||
PodSelector specifies a label selector for the pods to wait for completion
|
||
For more details on label selectors, see:
|
||
https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
|
||
type: string
|
||
timeoutSeconds:
|
||
default: 0
|
||
description: |-
|
||
TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
|
||
infinite
|
||
minimum: 0
|
||
type: integer
|
||
type: object
|
||
type: object
|
||
useNvidiaDriverCRD:
|
||
description: UseNvidiaDriverCRD indicates if the deployment of
|
||
NVIDIA Driver is managed by the NVIDIADriver CRD type
|
||
type: boolean
|
||
useOpenKernelModules:
|
||
description: UseOpenKernelModules indicates if the open GPU kernel
|
||
modules should be used
|
||
type: boolean
|
||
usePrecompiled:
|
||
description: UsePrecompiled indicates if deployment of NVIDIA
|
||
Driver using pre-compiled modules is enabled
|
||
type: boolean
|
||
version:
|
||
description: NVIDIA Driver image tag
|
||
type: string
|
||
virtualTopology:
|
||
description: 'Optional: Virtual Topology Daemon configuration
|
||
for NVIDIA vGPU drivers'
|
||
properties:
|
||
config:
|
||
description: 'Optional: Config name representing virtual topology
|
||
daemon configuration file nvidia-topologyd.conf'
|
||
type: string
|
||
type: object
|
||
type: object
|
||
gdrcopy:
|
||
description: GDRCopy component spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
enabled:
|
||
description: Enabled indicates if GDRCopy is enabled through GPU
|
||
Operator
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: NVIDIA GDRCopy driver image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: NVIDIA GDRCopy driver image repository
|
||
type: string
|
||
version:
|
||
description: NVIDIA GDRCopy driver image tag
|
||
type: string
|
||
type: object
|
||
gds:
|
||
description: GPUDirectStorage defines the spec for GDS components(Experimental)
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
enabled:
|
||
description: Enabled indicates if GPUDirect Storage is enabled
|
||
through GPU operator
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: NVIDIA GPUDirect Storage Driver image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: NVIDIA GPUDirect Storage Driver image repository
|
||
type: string
|
||
version:
|
||
description: NVIDIA GPUDirect Storage Driver image tag
|
||
type: string
|
||
type: object
|
||
gfd:
|
||
description: GPUFeatureDiscovery spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
enabled:
|
||
description: Enabled indicates if deployment of GPU Feature Discovery
|
||
Plugin is enabled.
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: GFD image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: GFD image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
version:
|
||
description: GFD image tag
|
||
type: string
|
||
type: object
|
||
hostPaths:
|
||
description: HostPaths defines various paths on the host needed by
|
||
GPU Operator components
|
||
properties:
|
||
driverInstallDir:
|
||
description: |-
|
||
DriverInstallDir represents the root at which driver files including libraries,
|
||
config files, and executables can be found.
|
||
type: string
|
||
rootFS:
|
||
description: |-
|
||
RootFS represents the path to the root filesystem of the host.
|
||
This is used by components that need to interact with the host filesystem
|
||
and as such this must be a chroot-able filesystem.
|
||
Examples include the MIG Manager and Toolkit Container which may need to
|
||
stop, start, or restart systemd services.
|
||
type: string
|
||
type: object
|
||
kataManager:
|
||
description: KataManager component spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
config:
|
||
description: Kata Manager config
|
||
properties:
|
||
artifactsDir:
|
||
default: /opt/nvidia-gpu-operator/artifacts/runtimeclasses
|
||
description: |-
|
||
ArtifactsDir is the directory where kata artifacts (e.g. kernel / guest images, configuration, etc.)
|
||
are placed on the local filesystem.
|
||
type: string
|
||
runtimeClasses:
|
||
description: RuntimeClasses is a list of kata runtime classes
|
||
to configure.
|
||
items:
|
||
description: RuntimeClass defines the configuration for
|
||
a kata RuntimeClass
|
||
properties:
|
||
artifacts:
|
||
description: Artifacts are the kata artifacts associated
|
||
with the runtime class.
|
||
properties:
|
||
pullSecret:
|
||
description: PullSecret is the secret used to pull
|
||
the OCI artifact.
|
||
type: string
|
||
url:
|
||
description: |-
|
||
URL is the path to the OCI artifact (payload) containing all artifacts
|
||
associated with a kata runtime class.
|
||
type: string
|
||
required:
|
||
- url
|
||
type: object
|
||
name:
|
||
description: Name is the name of the kata runtime class.
|
||
type: string
|
||
nodeSelector:
|
||
additionalProperties:
|
||
type: string
|
||
description: |-
|
||
NodeSelector specifies the nodeSelector for the RuntimeClass object.
|
||
This ensures pods running with the RuntimeClass only get scheduled
|
||
onto nodes which support it.
|
||
type: object
|
||
required:
|
||
- artifacts
|
||
- name
|
||
type: object
|
||
type: array
|
||
type: object
|
||
enabled:
|
||
description: Enabled indicates if deployment of Kata Manager is
|
||
enabled
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: Kata Manager image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: Kata Manager image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
version:
|
||
description: Kata Manager image tag
|
||
type: string
|
||
type: object
|
||
mig:
|
||
description: MIG spec
|
||
properties:
|
||
strategy:
|
||
description: 'Optional: MIGStrategy to apply for GFD and NVIDIA
|
||
Device Plugin'
|
||
enum:
|
||
- none
|
||
- single
|
||
- mixed
|
||
type: string
|
||
type: object
|
||
migManager:
|
||
description: MIGManager for configuration to deploy MIG Manager
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
config:
|
||
description: 'Optional: Custom mig-parted configuration for NVIDIA
|
||
MIG Manager container'
|
||
properties:
|
||
default:
|
||
default: all-disabled
|
||
description: Default MIG config to be applied on the node,
|
||
when there is no config specified with the node label nvidia.com/mig.config
|
||
enum:
|
||
- all-disabled
|
||
- ""
|
||
type: string
|
||
name:
|
||
default: default-mig-parted-config
|
||
description: ConfigMap name
|
||
type: string
|
||
type: object
|
||
enabled:
|
||
description: Enabled indicates if deployment of NVIDIA MIG Manager
|
||
is enabled
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
gpuClientsConfig:
|
||
description: 'Optional: Custom gpu-clients configuration for NVIDIA
|
||
MIG Manager container'
|
||
properties:
|
||
name:
|
||
description: ConfigMap name
|
||
type: string
|
||
type: object
|
||
image:
|
||
description: NVIDIA MIG Manager image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: NVIDIA MIG Manager image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
version:
|
||
description: NVIDIA MIG Manager image tag
|
||
type: string
|
||
type: object
|
||
nodeStatusExporter:
|
||
description: NodeStatusExporter spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
enabled:
|
||
description: Enabled indicates if deployment of Node Status Exporter
|
||
is enabled.
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: Node Status Exporter image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: Node Status Exporterimage repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
version:
|
||
description: Node Status Exporterimage tag
|
||
type: string
|
||
type: object
|
||
operator:
|
||
description: Operator component spec
|
||
properties:
|
||
annotations:
|
||
additionalProperties:
|
||
type: string
|
||
description: |-
|
||
Optional: Annotations is an unstructured key value map stored with a resource that may be
|
||
set by external tools to store and retrieve arbitrary metadata. They are not
|
||
queryable and should be preserved when modifying objects.
|
||
type: object
|
||
defaultRuntime:
|
||
default: docker
|
||
description: Runtime defines container runtime type
|
||
enum:
|
||
- docker
|
||
- crio
|
||
- containerd
|
||
type: string
|
||
initContainer:
|
||
description: InitContainerSpec describes configuration for initContainer
|
||
image used with all components
|
||
properties:
|
||
image:
|
||
description: Image represents image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: Repository represents image repository path
|
||
type: string
|
||
version:
|
||
description: Version represents image tag(version)
|
||
type: string
|
||
type: object
|
||
labels:
|
||
additionalProperties:
|
||
type: string
|
||
description: |-
|
||
Optional: Map of string keys and values that can be used to organize and categorize
|
||
(scope and select) objects. May match selectors of replication controllers
|
||
and services.
|
||
type: object
|
||
runtimeClass:
|
||
default: nvidia
|
||
type: string
|
||
use_ocp_driver_toolkit:
|
||
description: UseOpenShiftDriverToolkit indicates if DriverToolkit
|
||
image should be used on OpenShift to build and install driver
|
||
modules
|
||
type: boolean
|
||
required:
|
||
- defaultRuntime
|
||
type: object
|
||
psa:
|
||
description: PSA defines spec for PodSecurityAdmission configuration
|
||
properties:
|
||
enabled:
|
||
description: Enabled indicates if PodSecurityAdmission configuration
|
||
needs to be enabled for all Pods
|
||
type: boolean
|
||
type: object
|
||
psp:
|
||
description: |-
|
||
Deprecated: Pod Security Policies are no longer supported. Please use PodSecurityAdmission instead
|
||
PSP defines spec for handling PodSecurityPolicies
|
||
properties:
|
||
enabled:
|
||
description: Enabled indicates if PodSecurityPolicies needs to
|
||
be enabled for all Pods
|
||
type: boolean
|
||
type: object
|
||
sandboxDevicePlugin:
|
||
description: SandboxDevicePlugin component spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
enabled:
|
||
description: Enabled indicates if deployment of NVIDIA Sandbox
|
||
Device Plugin through operator is enabled
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: NVIDIA Sandbox Device Plugin image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: NVIDIA Sandbox Device Plugin image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
version:
|
||
description: NVIDIA Sandbox Device Plugin image tag
|
||
type: string
|
||
type: object
|
||
sandboxWorkloads:
|
||
description: SandboxWorkloads defines the spec for handling sandbox
|
||
workloads (i.e. Virtual Machines)
|
||
properties:
|
||
defaultWorkload:
|
||
default: container
|
||
description: |-
|
||
DefaultWorkload indicates the default GPU workload type to configure
|
||
worker nodes in the cluster for
|
||
enum:
|
||
- container
|
||
- vm-passthrough
|
||
- vm-vgpu
|
||
type: string
|
||
enabled:
|
||
description: |-
|
||
Enabled indicates if the GPU Operator should manage additional operands required
|
||
for sandbox workloads (i.e. VFIO Manager, vGPU Manager, and additional device plugins)
|
||
type: boolean
|
||
type: object
|
||
toolkit:
|
||
description: Toolkit component spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
enabled:
|
||
description: Enabled indicates if deployment of NVIDIA Container
|
||
Toolkit through operator is enabled
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: NVIDIA Container Toolkit image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
installDir:
|
||
default: /usr/local/nvidia
|
||
description: Toolkit install directory on the host
|
||
type: string
|
||
repository:
|
||
description: NVIDIA Container Toolkit image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
version:
|
||
description: NVIDIA Container Toolkit image tag
|
||
type: string
|
||
type: object
|
||
validator:
|
||
description: Validator defines the spec for operator-validator daemonset
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
cuda:
|
||
description: CUDA validator spec
|
||
properties:
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
type: object
|
||
driver:
|
||
description: Toolkit validator spec
|
||
properties:
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
type: object
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: Validator image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
plugin:
|
||
description: Plugin validator spec
|
||
properties:
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
type: object
|
||
repository:
|
||
description: Validator image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
toolkit:
|
||
description: Toolkit validator spec
|
||
properties:
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
type: object
|
||
version:
|
||
description: Validator image tag
|
||
type: string
|
||
vfioPCI:
|
||
description: VfioPCI validator spec
|
||
properties:
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
type: object
|
||
vgpuDevices:
|
||
description: VGPUDevices validator spec
|
||
properties:
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
type: object
|
||
vgpuManager:
|
||
description: VGPUManager validator spec
|
||
properties:
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
type: object
|
||
type: object
|
||
vfioManager:
|
||
description: VFIOManager for configuration to deploy VFIO-PCI Manager
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
driverManager:
|
||
description: DriverManager represents configuration for NVIDIA
|
||
Driver Manager
|
||
properties:
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: Image represents NVIDIA Driver Manager image
|
||
name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: Repository represents Driver Managerrepository
|
||
path
|
||
type: string
|
||
version:
|
||
description: Version represents NVIDIA Driver Manager image
|
||
tag(version)
|
||
type: string
|
||
type: object
|
||
enabled:
|
||
description: Enabled indicates if deployment of VFIO Manager is
|
||
enabled
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: VFIO Manager image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: VFIO Manager image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
version:
|
||
description: VFIO Manager image tag
|
||
type: string
|
||
type: object
|
||
vgpuDeviceManager:
|
||
description: VGPUDeviceManager spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
config:
|
||
description: NVIDIA vGPU devices configuration for NVIDIA vGPU
|
||
Device Manager container
|
||
properties:
|
||
default:
|
||
default: default
|
||
description: Default config name within the ConfigMap
|
||
type: string
|
||
name:
|
||
description: ConfigMap name
|
||
type: string
|
||
type: object
|
||
enabled:
|
||
description: Enabled indicates if deployment of NVIDIA vGPU Device
|
||
Manager is enabled
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: NVIDIA vGPU Device Manager image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: NVIDIA vGPU Device Manager image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
version:
|
||
description: NVIDIA vGPU Device Manager image tag
|
||
type: string
|
||
type: object
|
||
vgpuManager:
|
||
description: VGPUManager component spec
|
||
properties:
|
||
args:
|
||
description: 'Optional: List of arguments'
|
||
items:
|
||
type: string
|
||
type: array
|
||
driverManager:
|
||
description: DriverManager represents configuration for NVIDIA
|
||
Driver Manager initContainer
|
||
properties:
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: Image represents NVIDIA Driver Manager image
|
||
name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: Repository represents Driver Managerrepository
|
||
path
|
||
type: string
|
||
version:
|
||
description: Version represents NVIDIA Driver Manager image
|
||
tag(version)
|
||
type: string
|
||
type: object
|
||
enabled:
|
||
description: Enabled indicates if deployment of NVIDIA vGPU Manager
|
||
through operator is enabled
|
||
type: boolean
|
||
env:
|
||
description: 'Optional: List of environment variables'
|
||
items:
|
||
description: EnvVar represents an environment variable present
|
||
in a Container.
|
||
properties:
|
||
name:
|
||
description: Name of the environment variable.
|
||
type: string
|
||
value:
|
||
description: Value of the environment variable.
|
||
type: string
|
||
required:
|
||
- name
|
||
type: object
|
||
type: array
|
||
image:
|
||
description: NVIDIA vGPU Manager image name
|
||
pattern: '[a-zA-Z0-9\-]+'
|
||
type: string
|
||
imagePullPolicy:
|
||
description: Image pull policy
|
||
type: string
|
||
imagePullSecrets:
|
||
description: Image pull secrets
|
||
items:
|
||
type: string
|
||
type: array
|
||
repository:
|
||
description: NVIDIA vGPU Manager image repository
|
||
type: string
|
||
resources:
|
||
description: 'Optional: Define resources requests and limits for
|
||
each pod'
|
||
properties:
|
||
limits:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Limits describes the maximum amount of compute resources allowed.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
requests:
|
||
additionalProperties:
|
||
anyOf:
|
||
- type: integer
|
||
- type: string
|
||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||
x-kubernetes-int-or-string: true
|
||
description: |-
|
||
Requests describes the minimum amount of compute resources required.
|
||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||
type: object
|
||
type: object
|
||
version:
|
||
description: NVIDIA vGPU Manager image tag
|
||
type: string
|
||
type: object
|
||
required:
|
||
- daemonsets
|
||
- dcgm
|
||
- dcgmExporter
|
||
- devicePlugin
|
||
- driver
|
||
- gfd
|
||
- nodeStatusExporter
|
||
- operator
|
||
- toolkit
|
||
type: object
|
||
status:
|
||
description: ClusterPolicyStatus defines the observed state of ClusterPolicy
|
||
properties:
|
||
conditions:
|
||
description: Conditions is a list of conditions representing the ClusterPolicy's
|
||
current state.
|
||
items:
|
||
description: Condition contains details for one aspect of the current
|
||
state of this API Resource.
|
||
properties:
|
||
lastTransitionTime:
|
||
description: |-
|
||
lastTransitionTime is the last time the condition transitioned from one status to another.
|
||
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
|
||
format: date-time
|
||
type: string
|
||
message:
|
||
description: |-
|
||
message is a human readable message indicating details about the transition.
|
||
This may be an empty string.
|
||
maxLength: 32768
|
||
type: string
|
||
observedGeneration:
|
||
description: |-
|
||
observedGeneration represents the .metadata.generation that the condition was set based upon.
|
||
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
|
||
with respect to the current state of the instance.
|
||
format: int64
|
||
minimum: 0
|
||
type: integer
|
||
reason:
|
||
description: |-
|
||
reason contains a programmatic identifier indicating the reason for the condition's last transition.
|
||
Producers of specific condition types may define expected values and meanings for this field,
|
||
and whether the values are considered a guaranteed API.
|
||
The value should be a CamelCase string.
|
||
This field may not be empty.
|
||
maxLength: 1024
|
||
minLength: 1
|
||
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
|
||
type: string
|
||
status:
|
||
description: status of the condition, one of True, False, Unknown.
|
||
enum:
|
||
- "True"
|
||
- "False"
|
||
- Unknown
|
||
type: string
|
||
type:
|
||
description: type of condition in CamelCase or in foo.example.com/CamelCase.
|
||
maxLength: 316
|
||
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
|
||
type: string
|
||
required:
|
||
- lastTransitionTime
|
||
- message
|
||
- reason
|
||
- status
|
||
- type
|
||
type: object
|
||
type: array
|
||
namespace:
|
||
description: Namespace indicates a namespace in which the operator
|
||
is installed
|
||
type: string
|
||
state:
|
||
description: State indicates status of ClusterPolicy
|
||
enum:
|
||
- ignored
|
||
- ready
|
||
- notReady
|
||
type: string
|
||
required:
|
||
- state
|
||
type: object
|
||
type: object
|
||
served: true
|
||
storage: true
|
||
subresources:
|
||
status: {}
|