kubs/charts/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml
2025-01-12 04:03:33 +03:00

797 lines
39 KiB
YAML

---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.16.4
name: nvidiadrivers.nvidia.com
spec:
group: nvidia.com
names:
kind: NVIDIADriver
listKind: NVIDIADriverList
plural: nvidiadrivers
shortNames:
- nvd
- nvdriver
- nvdrivers
singular: nvidiadriver
scope: Cluster
versions:
- additionalPrinterColumns:
- jsonPath: .status.state
name: Status
type: string
- jsonPath: .metadata.creationTimestamp
name: Age
type: string
name: v1alpha1
schema:
openAPIV3Schema:
description: NVIDIADriver is the Schema for the nvidiadrivers API
properties:
apiVersion:
description: |-
APIVersion defines the versioned schema of this representation of an object.
Servers should convert recognized schemas to the latest internal value, and
may reject unrecognized values.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
type: string
kind:
description: |-
Kind is a string value representing the REST resource this object represents.
Servers may infer this from the endpoint the client submits requests to.
Cannot be updated.
In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
type: string
metadata:
type: object
spec:
description: NVIDIADriverSpec defines the desired state of NVIDIADriver
properties:
annotations:
additionalProperties:
type: string
description: |-
Optional: Annotations is an unstructured key value map stored with a resource that may be
set by external tools to store and retrieve arbitrary metadata. They are not
queryable and should be preserved when modifying objects.
type: object
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
certConfig:
description: 'Optional: Custom certificates configuration for NVIDIA
Driver container'
properties:
name:
type: string
type: object
driverType:
default: gpu
description: DriverType defines NVIDIA driver type
enum:
- gpu
- vgpu
- vgpu-host-manager
type: string
x-kubernetes-validations:
- message: driverType is an immutable field. Please create a new NvidiaDriver
resource instead when you want to change this setting.
rule: self == oldSelf
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present in
a Container.
properties:
name:
description: Name of the environment variable.
type: string
value:
description: Value of the environment variable.
type: string
required:
- name
type: object
type: array
gdrcopy:
description: GDRCopy defines the spec for GDRCopy driver
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
enabled:
description: Enabled indicates if GDRCopy is enabled through GPU
operator
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable.
type: string
value:
description: Value of the environment variable.
type: string
required:
- name
type: object
type: array
image:
description: GDRCopy driver image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: GDRCopy diver image repository
type: string
version:
description: GDRCopy driver image tag
type: string
type: object
gds:
description: GPUDirectStorage defines the spec for GDS driver
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
enabled:
description: Enabled indicates if GPUDirect Storage is enabled
through GPU operator
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable.
type: string
value:
description: Value of the environment variable.
type: string
required:
- name
type: object
type: array
image:
description: NVIDIA GPUDirect Storage Driver image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: NVIDIA GPUDirect Storage Driver image repository
type: string
version:
description: NVIDIA GPUDirect Storage Driver image tag
type: string
type: object
image:
default: nvcr.io/nvidia/driver
description: NVIDIA Driver container image name
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
kernelModuleConfig:
description: 'Optional: Kernel module configuration parameters for
the NVIDIA Driver'
properties:
name:
type: string
type: object
labels:
additionalProperties:
type: string
description: |-
Optional: Map of string keys and values that can be used to organize and categorize
(scope and select) objects. May match selectors of replication controllers
and services.
type: object
licensingConfig:
description: 'Optional: Licensing configuration for NVIDIA vGPU licensing'
properties:
name:
type: string
nlsEnabled:
description: NLSEnabled indicates if NVIDIA Licensing System is
used for licensing.
type: boolean
type: object
livenessProbe:
description: NVIDIA Driver container liveness probe settings
properties:
failureThreshold:
description: |-
Minimum consecutive failures for the probe to be considered failed after having succeeded.
Defaults to 3. Minimum value is 1.
format: int32
minimum: 1
type: integer
initialDelaySeconds:
description: |-
Number of seconds after the container has started before liveness probes are initiated.
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
format: int32
type: integer
periodSeconds:
description: |-
How often (in seconds) to perform the probe.
Default to 10 seconds. Minimum value is 1.
format: int32
minimum: 1
type: integer
successThreshold:
description: |-
Minimum consecutive successes for the probe to be considered successful after having failed.
Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
format: int32
minimum: 1
type: integer
timeoutSeconds:
description: |-
Number of seconds after which the probe times out.
Defaults to 1 second. Minimum value is 1.
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
format: int32
minimum: 1
type: integer
type: object
manager:
description: Manager represents configuration for NVIDIA Driver Manager
initContainer
properties:
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable.
type: string
value:
description: Value of the environment variable.
type: string
required:
- name
type: object
type: array
image:
description: Image represents NVIDIA Driver Manager image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: Repository represents Driver Managerrepository path
type: string
version:
description: Version represents NVIDIA Driver Manager image tag(version)
type: string
type: object
nodeAffinity:
description: Affinity specifies node affinity rules for driver pods
properties:
preferredDuringSchedulingIgnoredDuringExecution:
description: |-
The scheduler will prefer to schedule pods to nodes that satisfy
the affinity expressions specified by this field, but it may choose
a node that violates one or more of the expressions. The node that is
most preferred is the one with the greatest sum of weights, i.e.
for each node that meets all of the scheduling requirements (resource
request, requiredDuringScheduling affinity expressions, etc.),
compute a sum by iterating through the elements of this field and adding
"weight" to the sum if the node matches the corresponding matchExpressions; the
node(s) with the highest sum are the most preferred.
items:
description: |-
An empty preferred scheduling term matches all objects with implicit weight 0
(i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op).
properties:
preference:
description: A node selector term, associated with the corresponding
weight.
properties:
matchExpressions:
description: A list of node selector requirements by
node's labels.
items:
description: |-
A node selector requirement is a selector that contains values, a key, and an operator
that relates the key and values.
properties:
key:
description: The label key that the selector applies
to.
type: string
operator:
description: |-
Represents a key's relationship to a set of values.
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
type: string
values:
description: |-
An array of string values. If the operator is In or NotIn,
the values array must be non-empty. If the operator is Exists or DoesNotExist,
the values array must be empty. If the operator is Gt or Lt, the values
array must have a single element, which will be interpreted as an integer.
This array is replaced during a strategic merge patch.
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchFields:
description: A list of node selector requirements by
node's fields.
items:
description: |-
A node selector requirement is a selector that contains values, a key, and an operator
that relates the key and values.
properties:
key:
description: The label key that the selector applies
to.
type: string
operator:
description: |-
Represents a key's relationship to a set of values.
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
type: string
values:
description: |-
An array of string values. If the operator is In or NotIn,
the values array must be non-empty. If the operator is Exists or DoesNotExist,
the values array must be empty. If the operator is Gt or Lt, the values
array must have a single element, which will be interpreted as an integer.
This array is replaced during a strategic merge patch.
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
type: object
x-kubernetes-map-type: atomic
weight:
description: Weight associated with matching the corresponding
nodeSelectorTerm, in the range 1-100.
format: int32
type: integer
required:
- preference
- weight
type: object
type: array
x-kubernetes-list-type: atomic
requiredDuringSchedulingIgnoredDuringExecution:
description: |-
If the affinity requirements specified by this field are not met at
scheduling time, the pod will not be scheduled onto the node.
If the affinity requirements specified by this field cease to be met
at some point during pod execution (e.g. due to an update), the system
may or may not try to eventually evict the pod from its node.
properties:
nodeSelectorTerms:
description: Required. A list of node selector terms. The
terms are ORed.
items:
description: |-
A null or empty node selector term matches no objects. The requirements of
them are ANDed.
The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
properties:
matchExpressions:
description: A list of node selector requirements by
node's labels.
items:
description: |-
A node selector requirement is a selector that contains values, a key, and an operator
that relates the key and values.
properties:
key:
description: The label key that the selector applies
to.
type: string
operator:
description: |-
Represents a key's relationship to a set of values.
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
type: string
values:
description: |-
An array of string values. If the operator is In or NotIn,
the values array must be non-empty. If the operator is Exists or DoesNotExist,
the values array must be empty. If the operator is Gt or Lt, the values
array must have a single element, which will be interpreted as an integer.
This array is replaced during a strategic merge patch.
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchFields:
description: A list of node selector requirements by
node's fields.
items:
description: |-
A node selector requirement is a selector that contains values, a key, and an operator
that relates the key and values.
properties:
key:
description: The label key that the selector applies
to.
type: string
operator:
description: |-
Represents a key's relationship to a set of values.
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
type: string
values:
description: |-
An array of string values. If the operator is In or NotIn,
the values array must be non-empty. If the operator is Exists or DoesNotExist,
the values array must be empty. If the operator is Gt or Lt, the values
array must have a single element, which will be interpreted as an integer.
This array is replaced during a strategic merge patch.
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
type: object
x-kubernetes-map-type: atomic
type: array
x-kubernetes-list-type: atomic
required:
- nodeSelectorTerms
type: object
x-kubernetes-map-type: atomic
type: object
nodeSelector:
additionalProperties:
type: string
description: NodeSelector specifies a selector for installation of
NVIDIA driver
type: object
priorityClassName:
description: 'Optional: Set priorityClassName'
type: string
rdma:
description: GPUDirectRDMA defines the spec for NVIDIA Peer Memory
driver
properties:
enabled:
description: Enabled indicates if GPUDirect RDMA is enabled through
GPU operator
type: boolean
useHostMofed:
description: UseHostMOFED indicates to use MOFED drivers directly
installed on the host to enable GPUDirect RDMA
type: boolean
type: object
readinessProbe:
description: NVIDIA Driver container readiness probe settings
properties:
failureThreshold:
description: |-
Minimum consecutive failures for the probe to be considered failed after having succeeded.
Defaults to 3. Minimum value is 1.
format: int32
minimum: 1
type: integer
initialDelaySeconds:
description: |-
Number of seconds after the container has started before liveness probes are initiated.
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
format: int32
type: integer
periodSeconds:
description: |-
How often (in seconds) to perform the probe.
Default to 10 seconds. Minimum value is 1.
format: int32
minimum: 1
type: integer
successThreshold:
description: |-
Minimum consecutive successes for the probe to be considered successful after having failed.
Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
format: int32
minimum: 1
type: integer
timeoutSeconds:
description: |-
Number of seconds after which the probe times out.
Defaults to 1 second. Minimum value is 1.
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
format: int32
minimum: 1
type: integer
type: object
repoConfig:
description: 'Optional: Custom repo configuration for NVIDIA Driver
container'
properties:
name:
type: string
type: object
repository:
description: NVIDIA Driver repository
type: string
resources:
description: 'Optional: Define resources requests and limits for each
pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: |-
Limits describes the maximum amount of compute resources allowed.
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: |-
Requests describes the minimum amount of compute resources required.
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
otherwise to an implementation-defined value. Requests cannot exceed Limits.
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
type: object
type: object
startupProbe:
description: NVIDIA Driver container startup probe settings
properties:
failureThreshold:
description: |-
Minimum consecutive failures for the probe to be considered failed after having succeeded.
Defaults to 3. Minimum value is 1.
format: int32
minimum: 1
type: integer
initialDelaySeconds:
description: |-
Number of seconds after the container has started before liveness probes are initiated.
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
format: int32
type: integer
periodSeconds:
description: |-
How often (in seconds) to perform the probe.
Default to 10 seconds. Minimum value is 1.
format: int32
minimum: 1
type: integer
successThreshold:
description: |-
Minimum consecutive successes for the probe to be considered successful after having failed.
Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
format: int32
minimum: 1
type: integer
timeoutSeconds:
description: |-
Number of seconds after which the probe times out.
Defaults to 1 second. Minimum value is 1.
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
format: int32
minimum: 1
type: integer
type: object
tolerations:
description: 'Optional: Set tolerations'
items:
description: |-
The pod this Toleration is attached to tolerates any taint that matches
the triple <key,value,effect> using the matching operator <operator>.
properties:
effect:
description: |-
Effect indicates the taint effect to match. Empty means match all taint effects.
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
type: string
key:
description: |-
Key is the taint key that the toleration applies to. Empty means match all taint keys.
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
type: string
operator:
description: |-
Operator represents a key's relationship to the value.
Valid operators are Exists and Equal. Defaults to Equal.
Exists is equivalent to wildcard for value, so that a pod can
tolerate all taints of a particular category.
type: string
tolerationSeconds:
description: |-
TolerationSeconds represents the period of time the toleration (which must be
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
it is not set, which means tolerate the taint forever (do not evict). Zero and
negative values will be treated as 0 (evict immediately) by the system.
format: int64
type: integer
value:
description: |-
Value is the taint value the toleration matches to.
If the operator is Exists, the value should be empty, otherwise just a regular string.
type: string
type: object
type: array
useOpenKernelModules:
description: UseOpenKernelModules indicates if the open GPU kernel
modules should be used
type: boolean
usePrecompiled:
description: UsePrecompiled indicates if deployment of NVIDIA Driver
using pre-compiled modules is enabled
type: boolean
x-kubernetes-validations:
- message: usePrecompiled is an immutable field. Please create a new
NvidiaDriver resource instead when you want to change this setting.
rule: self == oldSelf
version:
description: NVIDIA Driver version (or just branch for precompiled
drivers)
type: string
virtualTopologyConfig:
description: 'Optional: Virtual Topology Daemon configuration for
NVIDIA vGPU drivers'
properties:
name:
description: 'Optional: Config name representing virtual topology
daemon configuration file nvidia-topologyd.conf'
type: string
type: object
required:
- driverType
- image
type: object
status:
description: NVIDIADriverStatus defines the observed state of NVIDIADriver
properties:
conditions:
description: Conditions is a list of conditions representing the NVIDIADriver's
current state.
items:
description: Condition contains details for one aspect of the current
state of this API Resource.
properties:
lastTransitionTime:
description: |-
lastTransitionTime is the last time the condition transitioned from one status to another.
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
format: date-time
type: string
message:
description: |-
message is a human readable message indicating details about the transition.
This may be an empty string.
maxLength: 32768
type: string
observedGeneration:
description: |-
observedGeneration represents the .metadata.generation that the condition was set based upon.
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
with respect to the current state of the instance.
format: int64
minimum: 0
type: integer
reason:
description: |-
reason contains a programmatic identifier indicating the reason for the condition's last transition.
Producers of specific condition types may define expected values and meanings for this field,
and whether the values are considered a guaranteed API.
The value should be a CamelCase string.
This field may not be empty.
maxLength: 1024
minLength: 1
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
type: string
status:
description: status of the condition, one of True, False, Unknown.
enum:
- "True"
- "False"
- Unknown
type: string
type:
description: type of condition in CamelCase or in foo.example.com/CamelCase.
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
type: string
required:
- lastTransitionTime
- message
- reason
- status
- type
type: object
type: array
namespace:
description: Namespace indicates a namespace in which the operator
and driver are installed
type: string
state:
description: |-
INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
Important: Run "make" to regenerate code after modifying this file
State indicates status of NVIDIADriver instance
enum:
- ignored
- ready
- notReady
type: string
required:
- state
type: object
type: object
served: true
storage: true
subresources:
status: {}