added gpu-operator
This commit is contained in:
parent
cb672d1f0d
commit
a2b2bd17c5
48 changed files with 8358 additions and 0 deletions
22
charts/gpu-operator/.helmignore
Normal file
22
charts/gpu-operator/.helmignore
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
6
charts/gpu-operator/Chart.lock
Normal file
6
charts/gpu-operator/Chart.lock
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
dependencies:
|
||||||
|
- name: node-feature-discovery
|
||||||
|
repository: https://kubernetes-sigs.github.io/node-feature-discovery/charts
|
||||||
|
version: 0.16.6
|
||||||
|
digest: sha256:e7b02cbdf9daff49892c0b74c50da2ed11e18eff2105a1b1abc9a8f2ebd8be47
|
||||||
|
generated: "2024-10-31T07:12:50.141904-07:00"
|
23
charts/gpu-operator/Chart.yaml
Normal file
23
charts/gpu-operator/Chart.yaml
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
apiVersion: v2
|
||||||
|
appVersion: v24.9.1
|
||||||
|
dependencies:
|
||||||
|
- condition: nfd.enabled
|
||||||
|
name: node-feature-discovery
|
||||||
|
repository: https://kubernetes-sigs.github.io/node-feature-discovery/charts
|
||||||
|
version: v0.16.6
|
||||||
|
description: NVIDIA GPU Operator creates/configures/manages GPUs atop Kubernetes
|
||||||
|
home: https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/overview.html
|
||||||
|
icon: https://assets.nvidiagrid.net/ngc/logos/GPUoperator.png
|
||||||
|
keywords:
|
||||||
|
- gpu
|
||||||
|
- cuda
|
||||||
|
- compute
|
||||||
|
- operator
|
||||||
|
- deep learning
|
||||||
|
- monitoring
|
||||||
|
- tesla
|
||||||
|
kubeVersion: '>= 1.16.0-0'
|
||||||
|
name: gpu-operator
|
||||||
|
sources:
|
||||||
|
- https://github.com/NVIDIA/gpu-operator
|
||||||
|
version: v24.9.1
|
|
@ -0,0 +1,23 @@
|
||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
14
charts/gpu-operator/charts/node-feature-discovery/Chart.yaml
Normal file
14
charts/gpu-operator/charts/node-feature-discovery/Chart.yaml
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
apiVersion: v2
|
||||||
|
appVersion: v0.16.6
|
||||||
|
description: 'Detects hardware features available on each node in a Kubernetes cluster,
|
||||||
|
and advertises those features using node labels. '
|
||||||
|
home: https://github.com/kubernetes-sigs/node-feature-discovery
|
||||||
|
keywords:
|
||||||
|
- feature-discovery
|
||||||
|
- feature-detection
|
||||||
|
- node-labels
|
||||||
|
name: node-feature-discovery
|
||||||
|
sources:
|
||||||
|
- https://github.com/kubernetes-sigs/node-feature-discovery
|
||||||
|
type: application
|
||||||
|
version: 0.16.6
|
10
charts/gpu-operator/charts/node-feature-discovery/README.md
Normal file
10
charts/gpu-operator/charts/node-feature-discovery/README.md
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# Node Feature Discovery
|
||||||
|
|
||||||
|
Node Feature Discovery (NFD) is a Kubernetes add-on for detecting hardware
|
||||||
|
features and system configuration. Detected features are advertised as node
|
||||||
|
labels. NFD provides flexible configuration and extension points for a wide
|
||||||
|
range of vendor and application specific node labeling needs.
|
||||||
|
|
||||||
|
See
|
||||||
|
[NFD documentation](https://kubernetes-sigs.github.io/node-feature-discovery/v0.16/deployment/helm.html)
|
||||||
|
for deployment instructions.
|
|
@ -0,0 +1,710 @@
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
kind: CustomResourceDefinition
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
controller-gen.kubebuilder.io/version: v0.14.0
|
||||||
|
name: nodefeatures.nfd.k8s-sigs.io
|
||||||
|
spec:
|
||||||
|
group: nfd.k8s-sigs.io
|
||||||
|
names:
|
||||||
|
kind: NodeFeature
|
||||||
|
listKind: NodeFeatureList
|
||||||
|
plural: nodefeatures
|
||||||
|
singular: nodefeature
|
||||||
|
scope: Namespaced
|
||||||
|
versions:
|
||||||
|
- name: v1alpha1
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
description: |-
|
||||||
|
NodeFeature resource holds the features discovered for one node in the
|
||||||
|
cluster.
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
description: |-
|
||||||
|
APIVersion defines the versioned schema of this representation of an object.
|
||||||
|
Servers should convert recognized schemas to the latest internal value, and
|
||||||
|
may reject unrecognized values.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
description: |-
|
||||||
|
Kind is a string value representing the REST resource this object represents.
|
||||||
|
Servers may infer this from the endpoint the client submits requests to.
|
||||||
|
Cannot be updated.
|
||||||
|
In CamelCase.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
description: Specification of the NodeFeature, containing features discovered
|
||||||
|
for a node.
|
||||||
|
properties:
|
||||||
|
features:
|
||||||
|
description: Features is the full "raw" features data that has been
|
||||||
|
discovered.
|
||||||
|
properties:
|
||||||
|
attributes:
|
||||||
|
additionalProperties:
|
||||||
|
description: AttributeFeatureSet is a set of features having
|
||||||
|
string value.
|
||||||
|
properties:
|
||||||
|
elements:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Individual features of the feature set.
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- elements
|
||||||
|
type: object
|
||||||
|
description: Attributes contains all the attribute-type features
|
||||||
|
of the node.
|
||||||
|
type: object
|
||||||
|
flags:
|
||||||
|
additionalProperties:
|
||||||
|
description: FlagFeatureSet is a set of simple features only
|
||||||
|
containing names without values.
|
||||||
|
properties:
|
||||||
|
elements:
|
||||||
|
additionalProperties:
|
||||||
|
description: Nil is a dummy empty struct for protobuf
|
||||||
|
compatibility
|
||||||
|
type: object
|
||||||
|
description: Individual features of the feature set.
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- elements
|
||||||
|
type: object
|
||||||
|
description: Flags contains all the flag-type features of the
|
||||||
|
node.
|
||||||
|
type: object
|
||||||
|
instances:
|
||||||
|
additionalProperties:
|
||||||
|
description: InstanceFeatureSet is a set of features each of
|
||||||
|
which is an instance having multiple attributes.
|
||||||
|
properties:
|
||||||
|
elements:
|
||||||
|
description: Individual features of the feature set.
|
||||||
|
items:
|
||||||
|
description: InstanceFeature represents one instance of
|
||||||
|
a complex features, e.g. a device.
|
||||||
|
properties:
|
||||||
|
attributes:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Attributes of the instance feature.
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- attributes
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- elements
|
||||||
|
type: object
|
||||||
|
description: Instances contains all the instance-type features
|
||||||
|
of the node.
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
labels:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Labels is the set of node labels that are requested to
|
||||||
|
be created.
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- spec
|
||||||
|
type: object
|
||||||
|
served: true
|
||||||
|
storage: true
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
kind: CustomResourceDefinition
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
controller-gen.kubebuilder.io/version: v0.14.0
|
||||||
|
name: nodefeaturegroups.nfd.k8s-sigs.io
|
||||||
|
spec:
|
||||||
|
group: nfd.k8s-sigs.io
|
||||||
|
names:
|
||||||
|
kind: NodeFeatureGroup
|
||||||
|
listKind: NodeFeatureGroupList
|
||||||
|
plural: nodefeaturegroups
|
||||||
|
shortNames:
|
||||||
|
- nfg
|
||||||
|
singular: nodefeaturegroup
|
||||||
|
scope: Namespaced
|
||||||
|
versions:
|
||||||
|
- name: v1alpha1
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
description: NodeFeatureGroup resource holds Node pools by featureGroup
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
description: |-
|
||||||
|
APIVersion defines the versioned schema of this representation of an object.
|
||||||
|
Servers should convert recognized schemas to the latest internal value, and
|
||||||
|
may reject unrecognized values.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
description: |-
|
||||||
|
Kind is a string value representing the REST resource this object represents.
|
||||||
|
Servers may infer this from the endpoint the client submits requests to.
|
||||||
|
Cannot be updated.
|
||||||
|
In CamelCase.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
description: Spec defines the rules to be evaluated.
|
||||||
|
properties:
|
||||||
|
featureGroupRules:
|
||||||
|
description: List of rules to evaluate to determine nodes that belong
|
||||||
|
in this group.
|
||||||
|
items:
|
||||||
|
description: GroupRule defines a rule for nodegroup filtering.
|
||||||
|
properties:
|
||||||
|
matchAny:
|
||||||
|
description: MatchAny specifies a list of matchers one of which
|
||||||
|
must match.
|
||||||
|
items:
|
||||||
|
description: MatchAnyElem specifies one sub-matcher of MatchAny.
|
||||||
|
properties:
|
||||||
|
matchFeatures:
|
||||||
|
description: MatchFeatures specifies a set of matcher
|
||||||
|
terms all of which must match.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
FeatureMatcherTerm defines requirements against one feature set. All
|
||||||
|
requirements (specified as MatchExpressions) are evaluated against each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
feature:
|
||||||
|
description: Feature is the name of the feature
|
||||||
|
set to match against.
|
||||||
|
type: string
|
||||||
|
matchExpressions:
|
||||||
|
additionalProperties:
|
||||||
|
description: |-
|
||||||
|
MatchExpression specifies an expression to evaluate against a set of input
|
||||||
|
values. It contains an operator that is applied when matching the input and
|
||||||
|
an array of values that the operator evaluates the input against.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
description: |-
|
||||||
|
MatchExpressions is the set of per-element expressions evaluated. These
|
||||||
|
match against the value of the specified elements.
|
||||||
|
type: object
|
||||||
|
matchName:
|
||||||
|
description: |-
|
||||||
|
MatchName in an expression that is matched against the name of each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- feature
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- matchFeatures
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
matchFeatures:
|
||||||
|
description: MatchFeatures specifies a set of matcher terms
|
||||||
|
all of which must match.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
FeatureMatcherTerm defines requirements against one feature set. All
|
||||||
|
requirements (specified as MatchExpressions) are evaluated against each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
feature:
|
||||||
|
description: Feature is the name of the feature set to
|
||||||
|
match against.
|
||||||
|
type: string
|
||||||
|
matchExpressions:
|
||||||
|
additionalProperties:
|
||||||
|
description: |-
|
||||||
|
MatchExpression specifies an expression to evaluate against a set of input
|
||||||
|
values. It contains an operator that is applied when matching the input and
|
||||||
|
an array of values that the operator evaluates the input against.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
description: |-
|
||||||
|
MatchExpressions is the set of per-element expressions evaluated. These
|
||||||
|
match against the value of the specified elements.
|
||||||
|
type: object
|
||||||
|
matchName:
|
||||||
|
description: |-
|
||||||
|
MatchName in an expression that is matched against the name of each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- feature
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
name:
|
||||||
|
description: Name of the rule.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- featureGroupRules
|
||||||
|
type: object
|
||||||
|
status:
|
||||||
|
description: |-
|
||||||
|
Status of the NodeFeatureGroup after the most recent evaluation of the
|
||||||
|
specification.
|
||||||
|
properties:
|
||||||
|
nodes:
|
||||||
|
description: Nodes is a list of FeatureGroupNode in the cluster that
|
||||||
|
match the featureGroupRules
|
||||||
|
items:
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
description: Name of the node.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
x-kubernetes-list-map-keys:
|
||||||
|
- name
|
||||||
|
x-kubernetes-list-type: map
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- spec
|
||||||
|
type: object
|
||||||
|
served: true
|
||||||
|
storage: true
|
||||||
|
subresources:
|
||||||
|
status: {}
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
kind: CustomResourceDefinition
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
controller-gen.kubebuilder.io/version: v0.14.0
|
||||||
|
name: nodefeaturerules.nfd.k8s-sigs.io
|
||||||
|
spec:
|
||||||
|
group: nfd.k8s-sigs.io
|
||||||
|
names:
|
||||||
|
kind: NodeFeatureRule
|
||||||
|
listKind: NodeFeatureRuleList
|
||||||
|
plural: nodefeaturerules
|
||||||
|
shortNames:
|
||||||
|
- nfr
|
||||||
|
singular: nodefeaturerule
|
||||||
|
scope: Cluster
|
||||||
|
versions:
|
||||||
|
- name: v1alpha1
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
description: |-
|
||||||
|
NodeFeatureRule resource specifies a configuration for feature-based
|
||||||
|
customization of node objects, such as node labeling.
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
description: |-
|
||||||
|
APIVersion defines the versioned schema of this representation of an object.
|
||||||
|
Servers should convert recognized schemas to the latest internal value, and
|
||||||
|
may reject unrecognized values.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
description: |-
|
||||||
|
Kind is a string value representing the REST resource this object represents.
|
||||||
|
Servers may infer this from the endpoint the client submits requests to.
|
||||||
|
Cannot be updated.
|
||||||
|
In CamelCase.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
description: Spec defines the rules to be evaluated.
|
||||||
|
properties:
|
||||||
|
rules:
|
||||||
|
description: Rules is a list of node customization rules.
|
||||||
|
items:
|
||||||
|
description: Rule defines a rule for node customization such as
|
||||||
|
labeling.
|
||||||
|
properties:
|
||||||
|
annotations:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Annotations to create if the rule matches.
|
||||||
|
type: object
|
||||||
|
extendedResources:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: ExtendedResources to create if the rule matches.
|
||||||
|
type: object
|
||||||
|
labels:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Labels to create if the rule matches.
|
||||||
|
type: object
|
||||||
|
labelsTemplate:
|
||||||
|
description: |-
|
||||||
|
LabelsTemplate specifies a template to expand for dynamically generating
|
||||||
|
multiple labels. Data (after template expansion) must be keys with an
|
||||||
|
optional value (<key>[=<value>]) separated by newlines.
|
||||||
|
type: string
|
||||||
|
matchAny:
|
||||||
|
description: MatchAny specifies a list of matchers one of which
|
||||||
|
must match.
|
||||||
|
items:
|
||||||
|
description: MatchAnyElem specifies one sub-matcher of MatchAny.
|
||||||
|
properties:
|
||||||
|
matchFeatures:
|
||||||
|
description: MatchFeatures specifies a set of matcher
|
||||||
|
terms all of which must match.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
FeatureMatcherTerm defines requirements against one feature set. All
|
||||||
|
requirements (specified as MatchExpressions) are evaluated against each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
feature:
|
||||||
|
description: Feature is the name of the feature
|
||||||
|
set to match against.
|
||||||
|
type: string
|
||||||
|
matchExpressions:
|
||||||
|
additionalProperties:
|
||||||
|
description: |-
|
||||||
|
MatchExpression specifies an expression to evaluate against a set of input
|
||||||
|
values. It contains an operator that is applied when matching the input and
|
||||||
|
an array of values that the operator evaluates the input against.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
description: |-
|
||||||
|
MatchExpressions is the set of per-element expressions evaluated. These
|
||||||
|
match against the value of the specified elements.
|
||||||
|
type: object
|
||||||
|
matchName:
|
||||||
|
description: |-
|
||||||
|
MatchName in an expression that is matched against the name of each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- feature
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- matchFeatures
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
matchFeatures:
|
||||||
|
description: MatchFeatures specifies a set of matcher terms
|
||||||
|
all of which must match.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
FeatureMatcherTerm defines requirements against one feature set. All
|
||||||
|
requirements (specified as MatchExpressions) are evaluated against each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
feature:
|
||||||
|
description: Feature is the name of the feature set to
|
||||||
|
match against.
|
||||||
|
type: string
|
||||||
|
matchExpressions:
|
||||||
|
additionalProperties:
|
||||||
|
description: |-
|
||||||
|
MatchExpression specifies an expression to evaluate against a set of input
|
||||||
|
values. It contains an operator that is applied when matching the input and
|
||||||
|
an array of values that the operator evaluates the input against.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
description: |-
|
||||||
|
MatchExpressions is the set of per-element expressions evaluated. These
|
||||||
|
match against the value of the specified elements.
|
||||||
|
type: object
|
||||||
|
matchName:
|
||||||
|
description: |-
|
||||||
|
MatchName in an expression that is matched against the name of each
|
||||||
|
element in the feature set.
|
||||||
|
properties:
|
||||||
|
op:
|
||||||
|
description: Op is the operator to be applied.
|
||||||
|
enum:
|
||||||
|
- In
|
||||||
|
- NotIn
|
||||||
|
- InRegexp
|
||||||
|
- Exists
|
||||||
|
- DoesNotExist
|
||||||
|
- Gt
|
||||||
|
- Lt
|
||||||
|
- GtLt
|
||||||
|
- IsTrue
|
||||||
|
- IsFalse
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the list of values that the operand evaluates the input
|
||||||
|
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||||
|
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||||
|
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||||
|
In other cases Value should contain at least one element.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- op
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- feature
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
name:
|
||||||
|
description: Name of the rule.
|
||||||
|
type: string
|
||||||
|
taints:
|
||||||
|
description: Taints to create if the rule matches.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
The node this Taint is attached to has the "effect" on
|
||||||
|
any pod that does not tolerate the Taint.
|
||||||
|
properties:
|
||||||
|
effect:
|
||||||
|
description: |-
|
||||||
|
Required. The effect of the taint on pods
|
||||||
|
that do not tolerate the taint.
|
||||||
|
Valid effects are NoSchedule, PreferNoSchedule and NoExecute.
|
||||||
|
type: string
|
||||||
|
key:
|
||||||
|
description: Required. The taint key to be applied to
|
||||||
|
a node.
|
||||||
|
type: string
|
||||||
|
timeAdded:
|
||||||
|
description: |-
|
||||||
|
TimeAdded represents the time at which the taint was added.
|
||||||
|
It is only written for NoExecute taints.
|
||||||
|
format: date-time
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: The taint value corresponding to the taint
|
||||||
|
key.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- effect
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
vars:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: |-
|
||||||
|
Vars is the variables to store if the rule matches. Variables do not
|
||||||
|
directly inflict any changes in the node object. However, they can be
|
||||||
|
referenced from other rules enabling more complex rule hierarchies,
|
||||||
|
without exposing intermediary output values as labels.
|
||||||
|
type: object
|
||||||
|
varsTemplate:
|
||||||
|
description: |-
|
||||||
|
VarsTemplate specifies a template to expand for dynamically generating
|
||||||
|
multiple variables. Data (after template expansion) must be keys with an
|
||||||
|
optional value (<key>[=<value>]) separated by newlines.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- rules
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- spec
|
||||||
|
type: object
|
||||||
|
served: true
|
||||||
|
storage: true
|
|
@ -0,0 +1,107 @@
|
||||||
|
{{/* vim: set filetype=mustache: */}}
|
||||||
|
{{/*
|
||||||
|
Expand the name of the chart.
|
||||||
|
*/}}
|
||||||
|
{{- define "node-feature-discovery.name" -}}
|
||||||
|
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create a default fully qualified app name.
|
||||||
|
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||||
|
If release name contains chart name it will be used as a full name.
|
||||||
|
*/}}
|
||||||
|
{{- define "node-feature-discovery.fullname" -}}
|
||||||
|
{{- if .Values.fullnameOverride -}}
|
||||||
|
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- $name := default .Chart.Name .Values.nameOverride -}}
|
||||||
|
{{- if contains $name .Release.Name -}}
|
||||||
|
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Allow the release namespace to be overridden for multi-namespace deployments in combined charts
|
||||||
|
*/}}
|
||||||
|
{{- define "node-feature-discovery.namespace" -}}
|
||||||
|
{{- if .Values.namespaceOverride -}}
|
||||||
|
{{- .Values.namespaceOverride -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- .Release.Namespace -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create chart name and version as used by the chart label.
|
||||||
|
*/}}
|
||||||
|
{{- define "node-feature-discovery.chart" -}}
|
||||||
|
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Common labels
|
||||||
|
*/}}
|
||||||
|
{{- define "node-feature-discovery.labels" -}}
|
||||||
|
helm.sh/chart: {{ include "node-feature-discovery.chart" . }}
|
||||||
|
{{ include "node-feature-discovery.selectorLabels" . }}
|
||||||
|
{{- if .Chart.AppVersion }}
|
||||||
|
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
|
{{- end }}
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Selector labels
|
||||||
|
*/}}
|
||||||
|
{{- define "node-feature-discovery.selectorLabels" -}}
|
||||||
|
app.kubernetes.io/name: {{ include "node-feature-discovery.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create the name of the service account which the nfd master will use
|
||||||
|
*/}}
|
||||||
|
{{- define "node-feature-discovery.master.serviceAccountName" -}}
|
||||||
|
{{- if .Values.master.serviceAccount.create -}}
|
||||||
|
{{ default (include "node-feature-discovery.fullname" .) .Values.master.serviceAccount.name }}
|
||||||
|
{{- else -}}
|
||||||
|
{{ default "default" .Values.master.serviceAccount.name }}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create the name of the service account which the nfd worker will use
|
||||||
|
*/}}
|
||||||
|
{{- define "node-feature-discovery.worker.serviceAccountName" -}}
|
||||||
|
{{- if .Values.worker.serviceAccount.create -}}
|
||||||
|
{{ default (printf "%s-worker" (include "node-feature-discovery.fullname" .)) .Values.worker.serviceAccount.name }}
|
||||||
|
{{- else -}}
|
||||||
|
{{ default "default" .Values.worker.serviceAccount.name }}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create the name of the service account which topologyUpdater will use
|
||||||
|
*/}}
|
||||||
|
{{- define "node-feature-discovery.topologyUpdater.serviceAccountName" -}}
|
||||||
|
{{- if .Values.topologyUpdater.serviceAccount.create -}}
|
||||||
|
{{ default (printf "%s-topology-updater" (include "node-feature-discovery.fullname" .)) .Values.topologyUpdater.serviceAccount.name }}
|
||||||
|
{{- else -}}
|
||||||
|
{{ default "default" .Values.topologyUpdater.serviceAccount.name }}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create the name of the service account which nfd-gc will use
|
||||||
|
*/}}
|
||||||
|
{{- define "node-feature-discovery.gc.serviceAccountName" -}}
|
||||||
|
{{- if .Values.gc.serviceAccount.create -}}
|
||||||
|
{{ default (printf "%s-gc" (include "node-feature-discovery.fullname" .)) .Values.gc.serviceAccount.name }}
|
||||||
|
{{- else -}}
|
||||||
|
{{ default "default" .Values.gc.serviceAccount.name }}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
|
@ -0,0 +1,80 @@
|
||||||
|
{{- if .Values.tls.certManager }}
|
||||||
|
{{- if .Values.master.enable }}
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: nfd-master-cert
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
spec:
|
||||||
|
secretName: nfd-master-cert
|
||||||
|
subject:
|
||||||
|
organizations:
|
||||||
|
- node-feature-discovery
|
||||||
|
commonName: nfd-master
|
||||||
|
dnsNames:
|
||||||
|
# must match the service name
|
||||||
|
- {{ include "node-feature-discovery.fullname" . }}-master
|
||||||
|
# first one is configured for use by the worker; below are for completeness
|
||||||
|
- {{ include "node-feature-discovery.fullname" . }}-master.{{ include "node-feature-discovery.namespace" . }}.svc
|
||||||
|
- {{ include "node-feature-discovery.fullname" . }}-master.{{ include "node-feature-discovery.namespace" . }}.svc.cluster.local
|
||||||
|
issuerRef:
|
||||||
|
name: {{ default "nfd-ca-issuer" .Values.tls.certManagerCertificate.issuerName }}
|
||||||
|
{{- if and .Values.tls.certManagerCertificate.issuerName .Values.tls.certManagerCertificate.issuerKind }}
|
||||||
|
kind: {{ .Values.tls.certManagerCertificate.issuerKind }}
|
||||||
|
{{- else }}
|
||||||
|
kind: Issuer
|
||||||
|
{{- end }}
|
||||||
|
group: cert-manager.io
|
||||||
|
{{- end }}
|
||||||
|
---
|
||||||
|
{{- if .Values.worker.enable }}
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: nfd-worker-cert
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
spec:
|
||||||
|
secretName: nfd-worker-cert
|
||||||
|
subject:
|
||||||
|
organizations:
|
||||||
|
- node-feature-discovery
|
||||||
|
commonName: nfd-worker
|
||||||
|
dnsNames:
|
||||||
|
- {{ include "node-feature-discovery.fullname" . }}-worker.{{ include "node-feature-discovery.namespace" . }}.svc.cluster.local
|
||||||
|
issuerRef:
|
||||||
|
name: {{ default "nfd-ca-issuer" .Values.tls.certManagerCertificate.issuerName }}
|
||||||
|
{{- if and .Values.tls.certManagerCertificate.issuerName .Values.tls.certManagerCertificate.issuerKind }}
|
||||||
|
kind: {{ .Values.tls.certManagerCertificate.issuerKind }}
|
||||||
|
{{- else }}
|
||||||
|
kind: Issuer
|
||||||
|
{{- end }}
|
||||||
|
group: cert-manager.io
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if .Values.topologyUpdater.enable }}
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: nfd-topology-updater-cert
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
spec:
|
||||||
|
secretName: nfd-topology-updater-cert
|
||||||
|
subject:
|
||||||
|
organizations:
|
||||||
|
- node-feature-discovery
|
||||||
|
commonName: nfd-topology-updater
|
||||||
|
dnsNames:
|
||||||
|
- {{ include "node-feature-discovery.fullname" . }}-topology-updater.{{ include "node-feature-discovery.namespace" . }}.svc.cluster.local
|
||||||
|
issuerRef:
|
||||||
|
name: {{ default "nfd-ca-issuer" .Values.tls.certManagerCertificate.issuerName }}
|
||||||
|
{{- if and .Values.tls.certManagerCertificate.issuerName .Values.tls.certManagerCertificate.issuerKind }}
|
||||||
|
kind: {{ .Values.tls.certManagerCertificate.issuerKind }}
|
||||||
|
{{- else }}
|
||||||
|
kind: Issuer
|
||||||
|
{{- end }}
|
||||||
|
group: cert-manager.io
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,42 @@
|
||||||
|
{{- if and .Values.tls.certManager (not .Values.tls.certManagerCertificate.issuerName ) }}
|
||||||
|
# See https://cert-manager.io/docs/configuration/selfsigned/#bootstrapping-ca-issuers
|
||||||
|
# - Create a self signed issuer
|
||||||
|
# - Use this to create a CA cert
|
||||||
|
# - Use this to now create a CA issuer
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Issuer
|
||||||
|
metadata:
|
||||||
|
name: nfd-ca-bootstrap
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
spec:
|
||||||
|
selfSigned: {}
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: nfd-ca-cert
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
spec:
|
||||||
|
isCA: true
|
||||||
|
secretName: nfd-ca-cert
|
||||||
|
subject:
|
||||||
|
organizations:
|
||||||
|
- node-feature-discovery
|
||||||
|
commonName: nfd-ca-cert
|
||||||
|
issuerRef:
|
||||||
|
name: nfd-ca-bootstrap
|
||||||
|
kind: Issuer
|
||||||
|
group: cert-manager.io
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Issuer
|
||||||
|
metadata:
|
||||||
|
name: nfd-ca-issuer
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
spec:
|
||||||
|
ca:
|
||||||
|
secretName: nfd-ca-cert
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,133 @@
|
||||||
|
{{- if and .Values.master.enable .Values.master.rbac.create }}
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
- nodes/status
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- list
|
||||||
|
- apiGroups:
|
||||||
|
- nfd.k8s-sigs.io
|
||||||
|
resources:
|
||||||
|
- nodefeatures
|
||||||
|
- nodefeaturerules
|
||||||
|
- nodefeaturegroups
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- nfd.k8s-sigs.io
|
||||||
|
resources:
|
||||||
|
- nodefeaturegroups/status
|
||||||
|
verbs:
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- apiGroups:
|
||||||
|
- coordination.k8s.io
|
||||||
|
resources:
|
||||||
|
- leases
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- apiGroups:
|
||||||
|
- coordination.k8s.io
|
||||||
|
resources:
|
||||||
|
- leases
|
||||||
|
resourceNames:
|
||||||
|
- "nfd-master.nfd.kubernetes.io"
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- update
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if and .Values.topologyUpdater.enable .Values.topologyUpdater.rbac.create }}
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- namespaces
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- nodes/proxy
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- pods
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- apiGroups:
|
||||||
|
- topology.node.k8s.io
|
||||||
|
resources:
|
||||||
|
- noderesourcetopologies
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- get
|
||||||
|
- update
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if and .Values.gc.enable .Values.gc.rbac.create (or (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) .Values.topologyUpdater.enable) }}
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-gc
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
verbs:
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- nodes/proxy
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- apiGroups:
|
||||||
|
- topology.node.k8s.io
|
||||||
|
resources:
|
||||||
|
- noderesourcetopologies
|
||||||
|
verbs:
|
||||||
|
- delete
|
||||||
|
- list
|
||||||
|
- apiGroups:
|
||||||
|
- nfd.k8s-sigs.io
|
||||||
|
resources:
|
||||||
|
- nodefeatures
|
||||||
|
verbs:
|
||||||
|
- delete
|
||||||
|
- list
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,52 @@
|
||||||
|
{{- if and .Values.master.enable .Values.master.rbac.create }}
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: {{ include "node-feature-discovery.master.serviceAccountName" . }}
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if and .Values.topologyUpdater.enable .Values.topologyUpdater.rbac.create }}
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }}
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if and .Values.gc.enable .Values.gc.rbac.create (or (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) .Values.topologyUpdater.enable) }}
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-gc
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-gc
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: {{ include "node-feature-discovery.gc.serviceAccountName" . }}
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,152 @@
|
||||||
|
{{- if .Values.master.enable }}
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-master
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
role: master
|
||||||
|
{{- with .Values.master.deploymentAnnotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.master.replicaCount }}
|
||||||
|
revisionHistoryLimit: {{ .Values.master.revisionHistoryLimit }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
|
||||||
|
role: master
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
|
||||||
|
role: master
|
||||||
|
{{- with .Values.master.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
{{- with .Values.priorityClassName }}
|
||||||
|
priorityClassName: {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.imagePullSecrets }}
|
||||||
|
imagePullSecrets:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
serviceAccountName: {{ include "node-feature-discovery.master.serviceAccountName" . }}
|
||||||
|
enableServiceLinks: false
|
||||||
|
securityContext:
|
||||||
|
{{- toYaml .Values.master.podSecurityContext | nindent 8 }}
|
||||||
|
hostNetwork: {{ .Values.master.hostNetwork }}
|
||||||
|
containers:
|
||||||
|
- name: master
|
||||||
|
securityContext:
|
||||||
|
{{- toYaml .Values.master.securityContext | nindent 12 }}
|
||||||
|
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||||
|
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||||
|
livenessProbe:
|
||||||
|
{{- toYaml .Values.master.livenessProbe | nindent 12 }}
|
||||||
|
readinessProbe:
|
||||||
|
{{- toYaml .Values.master.readinessProbe | nindent 12 }}
|
||||||
|
ports:
|
||||||
|
- containerPort: {{ .Values.master.port | default "8080" }}
|
||||||
|
name: grpc
|
||||||
|
- containerPort: {{ .Values.master.metricsPort | default "8081" }}
|
||||||
|
name: metrics
|
||||||
|
- containerPort: {{ .Values.master.healthPort | default "8082" }}
|
||||||
|
name: health
|
||||||
|
env:
|
||||||
|
- name: NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
{{- with .Values.master.extraEnvs }}
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end}}
|
||||||
|
command:
|
||||||
|
- "nfd-master"
|
||||||
|
resources:
|
||||||
|
{{- toYaml .Values.master.resources | nindent 12 }}
|
||||||
|
args:
|
||||||
|
{{- if .Values.master.instance | empty | not }}
|
||||||
|
- "-instance={{ .Values.master.instance }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- if not (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) }}
|
||||||
|
- "-port={{ .Values.master.port | default "8080" }}"
|
||||||
|
{{- else if gt (int .Values.master.replicaCount) 1 }}
|
||||||
|
- "-enable-leader-election"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.master.extraLabelNs | empty | not }}
|
||||||
|
- "-extra-label-ns={{- join "," .Values.master.extraLabelNs }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.master.denyLabelNs | empty | not }}
|
||||||
|
- "-deny-label-ns={{- join "," .Values.master.denyLabelNs }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.master.resourceLabels | empty | not }}
|
||||||
|
- "-resource-labels={{- join "," .Values.master.resourceLabels }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.master.enableTaints }}
|
||||||
|
- "-enable-taints"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.master.crdController | kindIs "invalid" | not }}
|
||||||
|
- "-crd-controller={{ .Values.master.crdController }}"
|
||||||
|
{{- else }}
|
||||||
|
## By default, disable crd controller for other than the default instances
|
||||||
|
- "-crd-controller={{ .Values.master.instance | empty }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.master.featureRulesController | kindIs "invalid" | not }}
|
||||||
|
- "-featurerules-controller={{ .Values.master.featureRulesController }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.master.resyncPeriod }}
|
||||||
|
- "-resync-period={{ .Values.master.resyncPeriod }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.master.nfdApiParallelism | empty | not }}
|
||||||
|
- "-nfd-api-parallelism={{ .Values.master.nfdApiParallelism }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.tls.enable }}
|
||||||
|
- "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
|
||||||
|
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
|
||||||
|
- "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
|
||||||
|
{{- end }}
|
||||||
|
# Go over featureGates and add the feature-gate flag
|
||||||
|
{{- range $key, $value := .Values.featureGates }}
|
||||||
|
- "-feature-gates={{ $key }}={{ $value }}"
|
||||||
|
{{- end }}
|
||||||
|
- "-metrics={{ .Values.master.metricsPort | default "8081" }}"
|
||||||
|
- "-grpc-health={{ .Values.master.healthPort | default "8082" }}"
|
||||||
|
volumeMounts:
|
||||||
|
{{- if .Values.tls.enable }}
|
||||||
|
- name: nfd-master-cert
|
||||||
|
mountPath: "/etc/kubernetes/node-feature-discovery/certs"
|
||||||
|
readOnly: true
|
||||||
|
{{- end }}
|
||||||
|
- name: nfd-master-conf
|
||||||
|
mountPath: "/etc/kubernetes/node-feature-discovery"
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
{{- if .Values.tls.enable }}
|
||||||
|
- name: nfd-master-cert
|
||||||
|
secret:
|
||||||
|
secretName: nfd-master-cert
|
||||||
|
{{- end }}
|
||||||
|
- name: nfd-master-conf
|
||||||
|
configMap:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-master-conf
|
||||||
|
items:
|
||||||
|
- key: nfd-master.conf
|
||||||
|
path: nfd-master.conf
|
||||||
|
{{- with .Values.master.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.master.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.master.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,85 @@
|
||||||
|
{{- if and .Values.gc.enable (or (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) .Values.topologyUpdater.enable) -}}
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-gc
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
role: gc
|
||||||
|
{{- with .Values.gc.deploymentAnnotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.gc.replicaCount | default 1 }}
|
||||||
|
revisionHistoryLimit: {{ .Values.gc.revisionHistoryLimit }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
|
||||||
|
role: gc
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
|
||||||
|
role: gc
|
||||||
|
{{- with .Values.gc.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
serviceAccountName: {{ include "node-feature-discovery.gc.serviceAccountName" . }}
|
||||||
|
dnsPolicy: ClusterFirstWithHostNet
|
||||||
|
{{- with .Values.priorityClassName }}
|
||||||
|
priorityClassName: {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.imagePullSecrets }}
|
||||||
|
imagePullSecrets:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
securityContext:
|
||||||
|
{{- toYaml .Values.gc.podSecurityContext | nindent 8 }}
|
||||||
|
hostNetwork: {{ .Values.gc.hostNetwork }}
|
||||||
|
containers:
|
||||||
|
- name: gc
|
||||||
|
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||||
|
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
|
||||||
|
env:
|
||||||
|
- name: NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
{{- with .Values.gc.extraEnvs }}
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end}}
|
||||||
|
command:
|
||||||
|
- "nfd-gc"
|
||||||
|
args:
|
||||||
|
{{- if .Values.gc.interval | empty | not }}
|
||||||
|
- "-gc-interval={{ .Values.gc.interval }}"
|
||||||
|
{{- end }}
|
||||||
|
resources:
|
||||||
|
{{- toYaml .Values.gc.resources | nindent 12 }}
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop: [ "ALL" ]
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
runAsNonRoot: true
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
containerPort: {{ .Values.gc.metricsPort | default "8081"}}
|
||||||
|
|
||||||
|
{{- with .Values.gc.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.gc.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.gc.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,12 @@
|
||||||
|
{{- if .Values.master.enable }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-master-conf
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
data:
|
||||||
|
nfd-master.conf: |-
|
||||||
|
{{- .Values.master.config | toYaml | nindent 4 }}
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,12 @@
|
||||||
|
{{- if .Values.topologyUpdater.enable -}}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater-conf
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
data:
|
||||||
|
nfd-topology-updater.conf: |-
|
||||||
|
{{- .Values.topologyUpdater.config | toYaml | nindent 4 }}
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,12 @@
|
||||||
|
{{- if .Values.worker.enable }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-worker-conf
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
data:
|
||||||
|
nfd-worker.conf: |-
|
||||||
|
{{- .Values.worker.config | toYaml | nindent 4 }}
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,94 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
annotations:
|
||||||
|
"helm.sh/hook": post-delete
|
||||||
|
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
annotations:
|
||||||
|
"helm.sh/hook": post-delete
|
||||||
|
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
- nodes/status
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- list
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
annotations:
|
||||||
|
"helm.sh/hook": post-delete
|
||||||
|
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
---
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
annotations:
|
||||||
|
"helm.sh/hook": post-delete
|
||||||
|
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 8 }}
|
||||||
|
role: prune
|
||||||
|
spec:
|
||||||
|
serviceAccountName: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||||
|
containers:
|
||||||
|
- name: nfd-master
|
||||||
|
securityContext:
|
||||||
|
{{- toYaml .Values.master.securityContext | nindent 12 }}
|
||||||
|
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||||
|
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||||
|
command:
|
||||||
|
- "nfd-master"
|
||||||
|
args:
|
||||||
|
- "-prune"
|
||||||
|
{{- if .Values.master.instance | empty | not }}
|
||||||
|
- "-instance={{ .Values.master.instance }}"
|
||||||
|
{{- end }}
|
||||||
|
restartPolicy: Never
|
||||||
|
{{- with .Values.master.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.master.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.master.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,26 @@
|
||||||
|
{{- if .Values.prometheus.enable }}
|
||||||
|
# Prometheus Monitor Service (Metrics)
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PodMonitor
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.selectorLabels" . | nindent 4 }}
|
||||||
|
{{- with .Values.prometheus.labels }}
|
||||||
|
{{ toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
podMetricsEndpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: {{ .Values.prometheus.scrapeInterval }}
|
||||||
|
path: /metrics
|
||||||
|
port: metrics
|
||||||
|
scheme: http
|
||||||
|
namespaceSelector:
|
||||||
|
matchNames:
|
||||||
|
- {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
selector:
|
||||||
|
matchExpressions:
|
||||||
|
- {key: app.kubernetes.io/instance, operator: In, values: ["{{ .Release.Name }}"]}
|
||||||
|
- {key: app.kubernetes.io/name, operator: In, values: ["{{ include "node-feature-discovery.name" . }}"]}
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,24 @@
|
||||||
|
{{- if and .Values.worker.enable .Values.worker.rbac.create }}
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-worker
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- nfd.k8s-sigs.io
|
||||||
|
resources:
|
||||||
|
- nodefeatures
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- get
|
||||||
|
- update
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- pods
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,18 @@
|
||||||
|
{{- if and .Values.worker.enable .Values.worker.rbac.create }}
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-worker
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: Role
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-worker
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: {{ include "node-feature-discovery.worker.serviceAccountName" . }}
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
{{- end }}
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
{{- if and (not (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi)) .Values.master.enable }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-master
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
role: master
|
||||||
|
spec:
|
||||||
|
type: {{ .Values.master.service.type }}
|
||||||
|
ports:
|
||||||
|
- port: {{ .Values.master.service.port | default "8080" }}
|
||||||
|
targetPort: grpc
|
||||||
|
protocol: TCP
|
||||||
|
name: grpc
|
||||||
|
selector:
|
||||||
|
{{- include "node-feature-discovery.selectorLabels" . | nindent 4 }}
|
||||||
|
role: master
|
||||||
|
{{- end}}
|
|
@ -0,0 +1,58 @@
|
||||||
|
{{- if and .Values.master.enable .Values.master.serviceAccount.create }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.master.serviceAccountName" . }}
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
{{- with .Values.master.serviceAccount.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if and .Values.topologyUpdater.enable .Values.topologyUpdater.serviceAccount.create }}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }}
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
{{- with .Values.topologyUpdater.serviceAccount.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if and .Values.gc.enable .Values.gc.serviceAccount.create (or (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) .Values.topologyUpdater.enable) }}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.gc.serviceAccountName" . }}
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
{{- with .Values.gc.serviceAccount.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if and .Values.worker.enable .Values.worker.serviceAccount.create }}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.worker.serviceAccountName" . }}
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
{{- with .Values.worker.serviceAccount.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,278 @@
|
||||||
|
{{- if and .Values.topologyUpdater.enable .Values.topologyUpdater.createCRDs -}}
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
kind: CustomResourceDefinition
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
api-approved.kubernetes.io: https://github.com/kubernetes/enhancements/pull/1870
|
||||||
|
controller-gen.kubebuilder.io/version: v0.11.2
|
||||||
|
creationTimestamp: null
|
||||||
|
name: noderesourcetopologies.topology.node.k8s.io
|
||||||
|
spec:
|
||||||
|
group: topology.node.k8s.io
|
||||||
|
names:
|
||||||
|
kind: NodeResourceTopology
|
||||||
|
listKind: NodeResourceTopologyList
|
||||||
|
plural: noderesourcetopologies
|
||||||
|
shortNames:
|
||||||
|
- node-res-topo
|
||||||
|
singular: noderesourcetopology
|
||||||
|
scope: Cluster
|
||||||
|
versions:
|
||||||
|
- name: v1alpha1
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
description: NodeResourceTopology describes node resources and their topology.
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
description: 'APIVersion defines the versioned schema of this representation
|
||||||
|
of an object. Servers should convert recognized schemas to the latest
|
||||||
|
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
description: 'Kind is a string value representing the REST resource this
|
||||||
|
object represents. Servers may infer this from the endpoint the client
|
||||||
|
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
topologyPolicies:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
zones:
|
||||||
|
description: ZoneList contains an array of Zone objects.
|
||||||
|
items:
|
||||||
|
description: Zone represents a resource topology zone, e.g. socket,
|
||||||
|
node, die or core.
|
||||||
|
properties:
|
||||||
|
attributes:
|
||||||
|
description: AttributeList contains an array of AttributeInfo objects.
|
||||||
|
items:
|
||||||
|
description: AttributeInfo contains one attribute of a Zone.
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
- value
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
costs:
|
||||||
|
description: CostList contains an array of CostInfo objects.
|
||||||
|
items:
|
||||||
|
description: CostInfo describes the cost (or distance) between
|
||||||
|
two Zones.
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
- value
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
parent:
|
||||||
|
type: string
|
||||||
|
resources:
|
||||||
|
description: ResourceInfoList contains an array of ResourceInfo
|
||||||
|
objects.
|
||||||
|
items:
|
||||||
|
description: ResourceInfo contains information about one resource
|
||||||
|
type.
|
||||||
|
properties:
|
||||||
|
allocatable:
|
||||||
|
anyOf:
|
||||||
|
- type: integer
|
||||||
|
- type: string
|
||||||
|
description: Allocatable quantity of the resource, corresponding
|
||||||
|
to allocatable in node status, i.e. total amount of this
|
||||||
|
resource available to be used by pods.
|
||||||
|
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||||
|
x-kubernetes-int-or-string: true
|
||||||
|
available:
|
||||||
|
anyOf:
|
||||||
|
- type: integer
|
||||||
|
- type: string
|
||||||
|
description: Available is the amount of this resource currently
|
||||||
|
available for new (to be scheduled) pods, i.e. Allocatable
|
||||||
|
minus the resources reserved by currently running pods.
|
||||||
|
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||||
|
x-kubernetes-int-or-string: true
|
||||||
|
capacity:
|
||||||
|
anyOf:
|
||||||
|
- type: integer
|
||||||
|
- type: string
|
||||||
|
description: Capacity of the resource, corresponding to capacity
|
||||||
|
in node status, i.e. total amount of this resource that
|
||||||
|
the node has.
|
||||||
|
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||||
|
x-kubernetes-int-or-string: true
|
||||||
|
name:
|
||||||
|
description: Name of the resource.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- allocatable
|
||||||
|
- available
|
||||||
|
- capacity
|
||||||
|
- name
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
- type
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- topologyPolicies
|
||||||
|
- zones
|
||||||
|
type: object
|
||||||
|
served: true
|
||||||
|
storage: false
|
||||||
|
- name: v1alpha2
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
description: NodeResourceTopology describes node resources and their topology.
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
description: 'APIVersion defines the versioned schema of this representation
|
||||||
|
of an object. Servers should convert recognized schemas to the latest
|
||||||
|
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||||
|
type: string
|
||||||
|
attributes:
|
||||||
|
description: AttributeList contains an array of AttributeInfo objects.
|
||||||
|
items:
|
||||||
|
description: AttributeInfo contains one attribute of a Zone.
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
- value
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
kind:
|
||||||
|
description: 'Kind is a string value representing the REST resource this
|
||||||
|
object represents. Servers may infer this from the endpoint the client
|
||||||
|
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
topologyPolicies:
|
||||||
|
description: 'DEPRECATED (to be removed in v1beta1): use top level attributes
|
||||||
|
if needed'
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
zones:
|
||||||
|
description: ZoneList contains an array of Zone objects.
|
||||||
|
items:
|
||||||
|
description: Zone represents a resource topology zone, e.g. socket,
|
||||||
|
node, die or core.
|
||||||
|
properties:
|
||||||
|
attributes:
|
||||||
|
description: AttributeList contains an array of AttributeInfo objects.
|
||||||
|
items:
|
||||||
|
description: AttributeInfo contains one attribute of a Zone.
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
- value
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
costs:
|
||||||
|
description: CostList contains an array of CostInfo objects.
|
||||||
|
items:
|
||||||
|
description: CostInfo describes the cost (or distance) between
|
||||||
|
two Zones.
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
- value
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
parent:
|
||||||
|
type: string
|
||||||
|
resources:
|
||||||
|
description: ResourceInfoList contains an array of ResourceInfo
|
||||||
|
objects.
|
||||||
|
items:
|
||||||
|
description: ResourceInfo contains information about one resource
|
||||||
|
type.
|
||||||
|
properties:
|
||||||
|
allocatable:
|
||||||
|
anyOf:
|
||||||
|
- type: integer
|
||||||
|
- type: string
|
||||||
|
description: Allocatable quantity of the resource, corresponding
|
||||||
|
to allocatable in node status, i.e. total amount of this
|
||||||
|
resource available to be used by pods.
|
||||||
|
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||||
|
x-kubernetes-int-or-string: true
|
||||||
|
available:
|
||||||
|
anyOf:
|
||||||
|
- type: integer
|
||||||
|
- type: string
|
||||||
|
description: Available is the amount of this resource currently
|
||||||
|
available for new (to be scheduled) pods, i.e. Allocatable
|
||||||
|
minus the resources reserved by currently running pods.
|
||||||
|
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||||
|
x-kubernetes-int-or-string: true
|
||||||
|
capacity:
|
||||||
|
anyOf:
|
||||||
|
- type: integer
|
||||||
|
- type: string
|
||||||
|
description: Capacity of the resource, corresponding to capacity
|
||||||
|
in node status, i.e. total amount of this resource that
|
||||||
|
the node has.
|
||||||
|
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||||
|
x-kubernetes-int-or-string: true
|
||||||
|
name:
|
||||||
|
description: Name of the resource.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- allocatable
|
||||||
|
- available
|
||||||
|
- capacity
|
||||||
|
- name
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
- type
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- zones
|
||||||
|
type: object
|
||||||
|
served: true
|
||||||
|
storage: true
|
||||||
|
status:
|
||||||
|
acceptedNames:
|
||||||
|
kind: ""
|
||||||
|
plural: ""
|
||||||
|
conditions: []
|
||||||
|
storedVersions: []
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,171 @@
|
||||||
|
{{- if .Values.topologyUpdater.enable -}}
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
role: topology-updater
|
||||||
|
{{- with .Values.topologyUpdater.daemonsetAnnotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
revisionHistoryLimit: {{ .Values.topologyUpdater.revisionHistoryLimit }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
|
||||||
|
role: topology-updater
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
|
||||||
|
role: topology-updater
|
||||||
|
{{- with .Values.topologyUpdater.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
serviceAccountName: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }}
|
||||||
|
dnsPolicy: ClusterFirstWithHostNet
|
||||||
|
{{- with .Values.priorityClassName }}
|
||||||
|
priorityClassName: {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.imagePullSecrets }}
|
||||||
|
imagePullSecrets:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
securityContext:
|
||||||
|
{{- toYaml .Values.topologyUpdater.podSecurityContext | nindent 8 }}
|
||||||
|
hostNetwork: {{ .Values.topologyUpdater.hostNetwork }}
|
||||||
|
containers:
|
||||||
|
- name: topology-updater
|
||||||
|
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||||
|
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
|
||||||
|
livenessProbe:
|
||||||
|
{{- toYaml .Values.topologyUpdater.livenessProbe | nindent 10 }}
|
||||||
|
readinessProbe:
|
||||||
|
{{- toYaml .Values.topologyUpdater.readinessProbe | nindent 10 }}
|
||||||
|
env:
|
||||||
|
- name: NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
- name: NODE_ADDRESS
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: status.hostIP
|
||||||
|
{{- with .Values.topologyUpdater.extraEnvs }}
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end}}
|
||||||
|
command:
|
||||||
|
- "nfd-topology-updater"
|
||||||
|
args:
|
||||||
|
- "-podresources-socket=/host-var/lib/kubelet-podresources/kubelet.sock"
|
||||||
|
{{- if .Values.topologyUpdater.updateInterval | empty | not }}
|
||||||
|
- "-sleep-interval={{ .Values.topologyUpdater.updateInterval }}"
|
||||||
|
{{- else }}
|
||||||
|
- "-sleep-interval=3s"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.topologyUpdater.watchNamespace | empty | not }}
|
||||||
|
- "-watch-namespace={{ .Values.topologyUpdater.watchNamespace }}"
|
||||||
|
{{- else }}
|
||||||
|
- "-watch-namespace=*"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.tls.enable }}
|
||||||
|
- "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
|
||||||
|
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
|
||||||
|
- "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
|
||||||
|
{{- end }}
|
||||||
|
{{- if not .Values.topologyUpdater.podSetFingerprint }}
|
||||||
|
- "-pods-fingerprint=false"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.topologyUpdater.kubeletConfigPath | empty | not }}
|
||||||
|
- "-kubelet-config-uri=file:///host-var/kubelet-config"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.topologyUpdater.kubeletStateDir | empty }}
|
||||||
|
# Disable kubelet state tracking by giving an empty path
|
||||||
|
- "-kubelet-state-dir="
|
||||||
|
{{- end }}
|
||||||
|
- -metrics={{ .Values.topologyUpdater.metricsPort | default "8081"}}
|
||||||
|
- "-grpc-health={{ .Values.topologyUpdater.healthPort | default "8082" }}"
|
||||||
|
ports:
|
||||||
|
- containerPort: {{ .Values.topologyUpdater.metricsPort | default "8081"}}
|
||||||
|
name: metrics
|
||||||
|
- containerPort: {{ .Values.topologyUpdater.healthPort | default "8082" }}
|
||||||
|
name: health
|
||||||
|
volumeMounts:
|
||||||
|
{{- if .Values.topologyUpdater.kubeletConfigPath | empty | not }}
|
||||||
|
- name: kubelet-config
|
||||||
|
mountPath: /host-var/kubelet-config
|
||||||
|
{{- end }}
|
||||||
|
- name: kubelet-podresources-sock
|
||||||
|
mountPath: /host-var/lib/kubelet-podresources/kubelet.sock
|
||||||
|
- name: host-sys
|
||||||
|
mountPath: /host-sys
|
||||||
|
{{- if .Values.topologyUpdater.kubeletStateDir | empty | not }}
|
||||||
|
- name: kubelet-state-files
|
||||||
|
mountPath: /host-var/lib/kubelet
|
||||||
|
readOnly: true
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.tls.enable }}
|
||||||
|
- name: nfd-topology-updater-cert
|
||||||
|
mountPath: "/etc/kubernetes/node-feature-discovery/certs"
|
||||||
|
readOnly: true
|
||||||
|
{{- end }}
|
||||||
|
- name: nfd-topology-updater-conf
|
||||||
|
mountPath: "/etc/kubernetes/node-feature-discovery"
|
||||||
|
readOnly: true
|
||||||
|
|
||||||
|
resources:
|
||||||
|
{{- toYaml .Values.topologyUpdater.resources | nindent 12 }}
|
||||||
|
securityContext:
|
||||||
|
{{- toYaml .Values.topologyUpdater.securityContext | nindent 12 }}
|
||||||
|
volumes:
|
||||||
|
- name: host-sys
|
||||||
|
hostPath:
|
||||||
|
path: "/sys"
|
||||||
|
{{- if .Values.topologyUpdater.kubeletConfigPath | empty | not }}
|
||||||
|
- name: kubelet-config
|
||||||
|
hostPath:
|
||||||
|
path: {{ .Values.topologyUpdater.kubeletConfigPath }}
|
||||||
|
{{- end }}
|
||||||
|
- name: kubelet-podresources-sock
|
||||||
|
hostPath:
|
||||||
|
{{- if .Values.topologyUpdater.kubeletPodResourcesSockPath | empty | not }}
|
||||||
|
path: {{ .Values.topologyUpdater.kubeletPodResourcesSockPath }}
|
||||||
|
{{- else }}
|
||||||
|
path: /var/lib/kubelet/pod-resources/kubelet.sock
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.topologyUpdater.kubeletStateDir | empty | not }}
|
||||||
|
- name: kubelet-state-files
|
||||||
|
hostPath:
|
||||||
|
path: {{ .Values.topologyUpdater.kubeletStateDir }}
|
||||||
|
{{- end }}
|
||||||
|
- name: nfd-topology-updater-conf
|
||||||
|
configMap:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater-conf
|
||||||
|
items:
|
||||||
|
- key: nfd-topology-updater.conf
|
||||||
|
path: nfd-topology-updater.conf
|
||||||
|
{{- if .Values.tls.enable }}
|
||||||
|
- name: nfd-topology-updater-cert
|
||||||
|
secret:
|
||||||
|
secretName: nfd-topology-updater-cert
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
|
||||||
|
{{- with .Values.topologyUpdater.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.topologyUpdater.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.topologyUpdater.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,186 @@
|
||||||
|
{{- if .Values.worker.enable }}
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-worker
|
||||||
|
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||||
|
role: worker
|
||||||
|
{{- with .Values.worker.daemonsetAnnotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
revisionHistoryLimit: {{ .Values.worker.revisionHistoryLimit }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
|
||||||
|
role: worker
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
|
||||||
|
role: worker
|
||||||
|
{{- with .Values.worker.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
dnsPolicy: ClusterFirstWithHostNet
|
||||||
|
{{- with .Values.priorityClassName }}
|
||||||
|
priorityClassName: {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.imagePullSecrets }}
|
||||||
|
imagePullSecrets:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
serviceAccountName: {{ include "node-feature-discovery.worker.serviceAccountName" . }}
|
||||||
|
securityContext:
|
||||||
|
{{- toYaml .Values.worker.podSecurityContext | nindent 8 }}
|
||||||
|
hostNetwork: {{ .Values.worker.hostNetwork }}
|
||||||
|
containers:
|
||||||
|
- name: worker
|
||||||
|
securityContext:
|
||||||
|
{{- toYaml .Values.worker.securityContext | nindent 12 }}
|
||||||
|
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||||
|
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||||
|
livenessProbe:
|
||||||
|
{{- toYaml .Values.worker.livenessProbe | nindent 12 }}
|
||||||
|
readinessProbe:
|
||||||
|
{{- toYaml .Values.worker.readinessProbe | nindent 12 }}
|
||||||
|
env:
|
||||||
|
- name: NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: POD_UID
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.uid
|
||||||
|
{{- with .Values.worker.extraEnvs }}
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end}}
|
||||||
|
resources:
|
||||||
|
{{- toYaml .Values.worker.resources | nindent 12 }}
|
||||||
|
command:
|
||||||
|
- "nfd-worker"
|
||||||
|
args:
|
||||||
|
{{- if not (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) }}
|
||||||
|
- "-server={{ include "node-feature-discovery.fullname" . }}-master:{{ .Values.master.service.port }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.tls.enable }}
|
||||||
|
- "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
|
||||||
|
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
|
||||||
|
- "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
|
||||||
|
{{- end }}
|
||||||
|
# Go over featureGate and add the feature-gate flag
|
||||||
|
{{- range $key, $value := .Values.featureGates }}
|
||||||
|
- "-feature-gates={{ $key }}={{ $value }}"
|
||||||
|
{{- end }}
|
||||||
|
- "-metrics={{ .Values.worker.metricsPort | default "8081"}}"
|
||||||
|
- "-grpc-health={{ .Values.worker.healthPort | default "8082" }}"
|
||||||
|
ports:
|
||||||
|
- containerPort: {{ .Values.worker.metricsPort | default "8081"}}
|
||||||
|
name: metrics
|
||||||
|
- containerPort: {{ .Values.worker.healthPort | default "8082" }}
|
||||||
|
name: health
|
||||||
|
volumeMounts:
|
||||||
|
- name: host-boot
|
||||||
|
mountPath: "/host-boot"
|
||||||
|
readOnly: true
|
||||||
|
- name: host-os-release
|
||||||
|
mountPath: "/host-etc/os-release"
|
||||||
|
readOnly: true
|
||||||
|
- name: host-sys
|
||||||
|
mountPath: "/host-sys"
|
||||||
|
readOnly: true
|
||||||
|
- name: host-usr-lib
|
||||||
|
mountPath: "/host-usr/lib"
|
||||||
|
readOnly: true
|
||||||
|
- name: host-lib
|
||||||
|
mountPath: "/host-lib"
|
||||||
|
readOnly: true
|
||||||
|
- name: host-proc-swaps
|
||||||
|
mountPath: "/host-proc/swaps"
|
||||||
|
readOnly: true
|
||||||
|
{{- if .Values.worker.mountUsrSrc }}
|
||||||
|
- name: host-usr-src
|
||||||
|
mountPath: "/host-usr/src"
|
||||||
|
readOnly: true
|
||||||
|
{{- end }}
|
||||||
|
- name: source-d
|
||||||
|
mountPath: "/etc/kubernetes/node-feature-discovery/source.d/"
|
||||||
|
readOnly: true
|
||||||
|
- name: features-d
|
||||||
|
mountPath: "/etc/kubernetes/node-feature-discovery/features.d/"
|
||||||
|
readOnly: true
|
||||||
|
- name: nfd-worker-conf
|
||||||
|
mountPath: "/etc/kubernetes/node-feature-discovery"
|
||||||
|
readOnly: true
|
||||||
|
{{- if .Values.tls.enable }}
|
||||||
|
- name: nfd-worker-cert
|
||||||
|
mountPath: "/etc/kubernetes/node-feature-discovery/certs"
|
||||||
|
readOnly: true
|
||||||
|
{{- end }}
|
||||||
|
volumes:
|
||||||
|
- name: host-boot
|
||||||
|
hostPath:
|
||||||
|
path: "/boot"
|
||||||
|
- name: host-os-release
|
||||||
|
hostPath:
|
||||||
|
path: "/etc/os-release"
|
||||||
|
- name: host-sys
|
||||||
|
hostPath:
|
||||||
|
path: "/sys"
|
||||||
|
- name: host-usr-lib
|
||||||
|
hostPath:
|
||||||
|
path: "/usr/lib"
|
||||||
|
- name: host-lib
|
||||||
|
hostPath:
|
||||||
|
path: "/lib"
|
||||||
|
- name: host-proc-swaps
|
||||||
|
hostPath:
|
||||||
|
path: "/proc/swaps"
|
||||||
|
{{- if .Values.worker.mountUsrSrc }}
|
||||||
|
- name: host-usr-src
|
||||||
|
hostPath:
|
||||||
|
path: "/usr/src"
|
||||||
|
{{- end }}
|
||||||
|
- name: source-d
|
||||||
|
hostPath:
|
||||||
|
path: "/etc/kubernetes/node-feature-discovery/source.d/"
|
||||||
|
- name: features-d
|
||||||
|
hostPath:
|
||||||
|
path: "/etc/kubernetes/node-feature-discovery/features.d/"
|
||||||
|
- name: nfd-worker-conf
|
||||||
|
configMap:
|
||||||
|
name: {{ include "node-feature-discovery.fullname" . }}-worker-conf
|
||||||
|
items:
|
||||||
|
- key: nfd-worker.conf
|
||||||
|
path: nfd-worker.conf
|
||||||
|
{{- if .Values.tls.enable }}
|
||||||
|
- name: nfd-worker-cert
|
||||||
|
secret:
|
||||||
|
secretName: nfd-worker-cert
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.worker.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.worker.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.worker.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.worker.priorityClassName }}
|
||||||
|
priorityClassName: {{ . | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
593
charts/gpu-operator/charts/node-feature-discovery/values.yaml
Normal file
593
charts/gpu-operator/charts/node-feature-discovery/values.yaml
Normal file
|
@ -0,0 +1,593 @@
|
||||||
|
image:
|
||||||
|
repository: registry.k8s.io/nfd/node-feature-discovery
|
||||||
|
# This should be set to 'IfNotPresent' for released version
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
# tag, if defined will use the given image tag, else Chart.AppVersion will be used
|
||||||
|
# tag
|
||||||
|
imagePullSecrets: []
|
||||||
|
|
||||||
|
nameOverride: ""
|
||||||
|
fullnameOverride: ""
|
||||||
|
namespaceOverride: ""
|
||||||
|
|
||||||
|
enableNodeFeatureApi: true
|
||||||
|
|
||||||
|
featureGates:
|
||||||
|
NodeFeatureAPI: true
|
||||||
|
NodeFeatureGroupAPI: false
|
||||||
|
|
||||||
|
priorityClassName: ""
|
||||||
|
|
||||||
|
master:
|
||||||
|
enable: true
|
||||||
|
extraEnvs: []
|
||||||
|
hostNetwork: false
|
||||||
|
config: ### <NFD-MASTER-CONF-START-DO-NOT-REMOVE>
|
||||||
|
# noPublish: false
|
||||||
|
# autoDefaultNs: true
|
||||||
|
# extraLabelNs: ["added.ns.io","added.kubernets.io"]
|
||||||
|
# denyLabelNs: ["denied.ns.io","denied.kubernetes.io"]
|
||||||
|
# resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"]
|
||||||
|
# enableTaints: false
|
||||||
|
# labelWhiteList: "foo"
|
||||||
|
# resyncPeriod: "2h"
|
||||||
|
# klog:
|
||||||
|
# addDirHeader: false
|
||||||
|
# alsologtostderr: false
|
||||||
|
# logBacktraceAt:
|
||||||
|
# logtostderr: true
|
||||||
|
# skipHeaders: false
|
||||||
|
# stderrthreshold: 2
|
||||||
|
# v: 0
|
||||||
|
# vmodule:
|
||||||
|
## NOTE: the following options are not dynamically run-time configurable
|
||||||
|
## and require a nfd-master restart to take effect after being changed
|
||||||
|
# logDir:
|
||||||
|
# logFile:
|
||||||
|
# logFileMaxSize: 1800
|
||||||
|
# skipLogHeaders: false
|
||||||
|
# leaderElection:
|
||||||
|
# leaseDuration: 15s
|
||||||
|
# # this value has to be lower than leaseDuration and greater than retryPeriod*1.2
|
||||||
|
# renewDeadline: 10s
|
||||||
|
# # this value has to be greater than 0
|
||||||
|
# retryPeriod: 2s
|
||||||
|
# nfdApiParallelism: 10
|
||||||
|
### <NFD-MASTER-CONF-END-DO-NOT-REMOVE>
|
||||||
|
# The TCP port that nfd-master listens for incoming requests. Default: 8080
|
||||||
|
# Deprecated this parameter is related to the deprecated gRPC API and will
|
||||||
|
# be removed with it in a future release
|
||||||
|
port: 8080
|
||||||
|
metricsPort: 8081
|
||||||
|
healthPort: 8082
|
||||||
|
instance:
|
||||||
|
featureApi:
|
||||||
|
resyncPeriod:
|
||||||
|
denyLabelNs: []
|
||||||
|
extraLabelNs: []
|
||||||
|
resourceLabels: []
|
||||||
|
enableTaints: false
|
||||||
|
crdController: null
|
||||||
|
featureRulesController: null
|
||||||
|
nfdApiParallelism: null
|
||||||
|
deploymentAnnotations: {}
|
||||||
|
replicaCount: 1
|
||||||
|
|
||||||
|
podSecurityContext: {}
|
||||||
|
# fsGroup: 2000
|
||||||
|
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop: [ "ALL" ]
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
runAsNonRoot: true
|
||||||
|
# runAsUser: 1000
|
||||||
|
|
||||||
|
serviceAccount:
|
||||||
|
# Specifies whether a service account should be created
|
||||||
|
create: true
|
||||||
|
# Annotations to add to the service account
|
||||||
|
annotations: {}
|
||||||
|
# The name of the service account to use.
|
||||||
|
# If not set and create is true, a name is generated using the fullname template
|
||||||
|
name:
|
||||||
|
|
||||||
|
# specify how many old ReplicaSets for the Deployment to retain.
|
||||||
|
revisionHistoryLimit:
|
||||||
|
|
||||||
|
rbac:
|
||||||
|
create: true
|
||||||
|
|
||||||
|
service:
|
||||||
|
type: ClusterIP
|
||||||
|
port: 8080
|
||||||
|
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 4Gi
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
# You may want to use the same value for `requests.memory` and `limits.memory`. The “requests” value affects scheduling to accommodate pods on nodes.
|
||||||
|
# If there is a large difference between “requests” and “limits” and nodes experience memory pressure, the kernel may invoke
|
||||||
|
# the OOM Killer, even if the memory does not exceed the “limits” threshold. This can cause unexpected pod evictions. Memory
|
||||||
|
# cannot be compressed and once allocated to a pod, it can only be reclaimed by killing the pod.
|
||||||
|
# Natan Yellin 22/09/2022 https://home.robusta.dev/blog/kubernetes-memory-limit
|
||||||
|
memory: 128Mi
|
||||||
|
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
tolerations:
|
||||||
|
- key: "node-role.kubernetes.io/master"
|
||||||
|
operator: "Equal"
|
||||||
|
value: ""
|
||||||
|
effect: "NoSchedule"
|
||||||
|
- key: "node-role.kubernetes.io/control-plane"
|
||||||
|
operator: "Equal"
|
||||||
|
value: ""
|
||||||
|
effect: "NoSchedule"
|
||||||
|
|
||||||
|
annotations: {}
|
||||||
|
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
preferredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
- weight: 1
|
||||||
|
preference:
|
||||||
|
matchExpressions:
|
||||||
|
- key: "node-role.kubernetes.io/master"
|
||||||
|
operator: In
|
||||||
|
values: [""]
|
||||||
|
- weight: 1
|
||||||
|
preference:
|
||||||
|
matchExpressions:
|
||||||
|
- key: "node-role.kubernetes.io/control-plane"
|
||||||
|
operator: In
|
||||||
|
values: [""]
|
||||||
|
|
||||||
|
livenessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
# failureThreshold: 3
|
||||||
|
# periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
failureThreshold: 10
|
||||||
|
# periodSeconds: 10
|
||||||
|
|
||||||
|
worker:
|
||||||
|
enable: true
|
||||||
|
extraEnvs: []
|
||||||
|
hostNetwork: false
|
||||||
|
config: ### <NFD-WORKER-CONF-START-DO-NOT-REMOVE>
|
||||||
|
#core:
|
||||||
|
# labelWhiteList:
|
||||||
|
# noPublish: false
|
||||||
|
# sleepInterval: 60s
|
||||||
|
# featureSources: [all]
|
||||||
|
# labelSources: [all]
|
||||||
|
# klog:
|
||||||
|
# addDirHeader: false
|
||||||
|
# alsologtostderr: false
|
||||||
|
# logBacktraceAt:
|
||||||
|
# logtostderr: true
|
||||||
|
# skipHeaders: false
|
||||||
|
# stderrthreshold: 2
|
||||||
|
# v: 0
|
||||||
|
# vmodule:
|
||||||
|
## NOTE: the following options are not dynamically run-time configurable
|
||||||
|
## and require a nfd-worker restart to take effect after being changed
|
||||||
|
# logDir:
|
||||||
|
# logFile:
|
||||||
|
# logFileMaxSize: 1800
|
||||||
|
# skipLogHeaders: false
|
||||||
|
#sources:
|
||||||
|
# cpu:
|
||||||
|
# cpuid:
|
||||||
|
## NOTE: whitelist has priority over blacklist
|
||||||
|
# attributeBlacklist:
|
||||||
|
# - "AVX10"
|
||||||
|
# - "BMI1"
|
||||||
|
# - "BMI2"
|
||||||
|
# - "CLMUL"
|
||||||
|
# - "CMOV"
|
||||||
|
# - "CX16"
|
||||||
|
# - "ERMS"
|
||||||
|
# - "F16C"
|
||||||
|
# - "HTT"
|
||||||
|
# - "LZCNT"
|
||||||
|
# - "MMX"
|
||||||
|
# - "MMXEXT"
|
||||||
|
# - "NX"
|
||||||
|
# - "POPCNT"
|
||||||
|
# - "RDRAND"
|
||||||
|
# - "RDSEED"
|
||||||
|
# - "RDTSCP"
|
||||||
|
# - "SGX"
|
||||||
|
# - "SSE"
|
||||||
|
# - "SSE2"
|
||||||
|
# - "SSE3"
|
||||||
|
# - "SSE4"
|
||||||
|
# - "SSE42"
|
||||||
|
# - "SSSE3"
|
||||||
|
# - "TDX_GUEST"
|
||||||
|
# attributeWhitelist:
|
||||||
|
# kernel:
|
||||||
|
# kconfigFile: "/path/to/kconfig"
|
||||||
|
# configOpts:
|
||||||
|
# - "NO_HZ"
|
||||||
|
# - "X86"
|
||||||
|
# - "DMI"
|
||||||
|
# pci:
|
||||||
|
# deviceClassWhitelist:
|
||||||
|
# - "0200"
|
||||||
|
# - "03"
|
||||||
|
# - "12"
|
||||||
|
# deviceLabelFields:
|
||||||
|
# - "class"
|
||||||
|
# - "vendor"
|
||||||
|
# - "device"
|
||||||
|
# - "subsystem_vendor"
|
||||||
|
# - "subsystem_device"
|
||||||
|
# usb:
|
||||||
|
# deviceClassWhitelist:
|
||||||
|
# - "0e"
|
||||||
|
# - "ef"
|
||||||
|
# - "fe"
|
||||||
|
# - "ff"
|
||||||
|
# deviceLabelFields:
|
||||||
|
# - "class"
|
||||||
|
# - "vendor"
|
||||||
|
# - "device"
|
||||||
|
# local:
|
||||||
|
# hooksEnabled: false
|
||||||
|
# custom:
|
||||||
|
# # The following feature demonstrates the capabilities of the matchFeatures
|
||||||
|
# - name: "my custom rule"
|
||||||
|
# labels:
|
||||||
|
# "vendor.io/my-ng-feature": "true"
|
||||||
|
# # matchFeatures implements a logical AND over all matcher terms in the
|
||||||
|
# # list (i.e. all of the terms, or per-feature matchers, must match)
|
||||||
|
# matchFeatures:
|
||||||
|
# - feature: cpu.cpuid
|
||||||
|
# matchExpressions:
|
||||||
|
# AVX512F: {op: Exists}
|
||||||
|
# - feature: cpu.cstate
|
||||||
|
# matchExpressions:
|
||||||
|
# enabled: {op: IsTrue}
|
||||||
|
# - feature: cpu.pstate
|
||||||
|
# matchExpressions:
|
||||||
|
# no_turbo: {op: IsFalse}
|
||||||
|
# scaling_governor: {op: In, value: ["performance"]}
|
||||||
|
# - feature: cpu.rdt
|
||||||
|
# matchExpressions:
|
||||||
|
# RDTL3CA: {op: Exists}
|
||||||
|
# - feature: cpu.sst
|
||||||
|
# matchExpressions:
|
||||||
|
# bf.enabled: {op: IsTrue}
|
||||||
|
# - feature: cpu.topology
|
||||||
|
# matchExpressions:
|
||||||
|
# hardware_multithreading: {op: IsFalse}
|
||||||
|
#
|
||||||
|
# - feature: kernel.config
|
||||||
|
# matchExpressions:
|
||||||
|
# X86: {op: Exists}
|
||||||
|
# LSM: {op: InRegexp, value: ["apparmor"]}
|
||||||
|
# - feature: kernel.loadedmodule
|
||||||
|
# matchExpressions:
|
||||||
|
# e1000e: {op: Exists}
|
||||||
|
# - feature: kernel.selinux
|
||||||
|
# matchExpressions:
|
||||||
|
# enabled: {op: IsFalse}
|
||||||
|
# - feature: kernel.version
|
||||||
|
# matchExpressions:
|
||||||
|
# major: {op: In, value: ["5"]}
|
||||||
|
# minor: {op: Gt, value: ["10"]}
|
||||||
|
#
|
||||||
|
# - feature: storage.block
|
||||||
|
# matchExpressions:
|
||||||
|
# rotational: {op: In, value: ["0"]}
|
||||||
|
# dax: {op: In, value: ["0"]}
|
||||||
|
#
|
||||||
|
# - feature: network.device
|
||||||
|
# matchExpressions:
|
||||||
|
# operstate: {op: In, value: ["up"]}
|
||||||
|
# speed: {op: Gt, value: ["100"]}
|
||||||
|
#
|
||||||
|
# - feature: memory.numa
|
||||||
|
# matchExpressions:
|
||||||
|
# node_count: {op: Gt, value: ["2"]}
|
||||||
|
# - feature: memory.nv
|
||||||
|
# matchExpressions:
|
||||||
|
# devtype: {op: In, value: ["nd_dax"]}
|
||||||
|
# mode: {op: In, value: ["memory"]}
|
||||||
|
#
|
||||||
|
# - feature: system.osrelease
|
||||||
|
# matchExpressions:
|
||||||
|
# ID: {op: In, value: ["fedora", "centos"]}
|
||||||
|
# - feature: system.name
|
||||||
|
# matchExpressions:
|
||||||
|
# nodename: {op: InRegexp, value: ["^worker-X"]}
|
||||||
|
#
|
||||||
|
# - feature: local.label
|
||||||
|
# matchExpressions:
|
||||||
|
# custom-feature-knob: {op: Gt, value: ["100"]}
|
||||||
|
#
|
||||||
|
# # The following feature demonstrates the capabilities of the matchAny
|
||||||
|
# - name: "my matchAny rule"
|
||||||
|
# labels:
|
||||||
|
# "vendor.io/my-ng-feature-2": "my-value"
|
||||||
|
# # matchAny implements a logical IF over all elements (sub-matchers) in
|
||||||
|
# # the list (i.e. at least one feature matcher must match)
|
||||||
|
# matchAny:
|
||||||
|
# - matchFeatures:
|
||||||
|
# - feature: kernel.loadedmodule
|
||||||
|
# matchExpressions:
|
||||||
|
# driver-module-X: {op: Exists}
|
||||||
|
# - feature: pci.device
|
||||||
|
# matchExpressions:
|
||||||
|
# vendor: {op: In, value: ["8086"]}
|
||||||
|
# class: {op: In, value: ["0200"]}
|
||||||
|
# - matchFeatures:
|
||||||
|
# - feature: kernel.loadedmodule
|
||||||
|
# matchExpressions:
|
||||||
|
# driver-module-Y: {op: Exists}
|
||||||
|
# - feature: usb.device
|
||||||
|
# matchExpressions:
|
||||||
|
# vendor: {op: In, value: ["8086"]}
|
||||||
|
# class: {op: In, value: ["02"]}
|
||||||
|
#
|
||||||
|
# - name: "avx wildcard rule"
|
||||||
|
# labels:
|
||||||
|
# "my-avx-feature": "true"
|
||||||
|
# matchFeatures:
|
||||||
|
# - feature: cpu.cpuid
|
||||||
|
# matchName: {op: InRegexp, value: ["^AVX512"]}
|
||||||
|
#
|
||||||
|
# # The following features demonstreate label templating capabilities
|
||||||
|
# - name: "my template rule"
|
||||||
|
# labelsTemplate: |
|
||||||
|
# {{ range .system.osrelease }}vendor.io/my-system-feature.{{ .Name }}={{ .Value }}
|
||||||
|
# {{ end }}
|
||||||
|
# matchFeatures:
|
||||||
|
# - feature: system.osrelease
|
||||||
|
# matchExpressions:
|
||||||
|
# ID: {op: InRegexp, value: ["^open.*"]}
|
||||||
|
# VERSION_ID.major: {op: In, value: ["13", "15"]}
|
||||||
|
#
|
||||||
|
# - name: "my template rule 2"
|
||||||
|
# labelsTemplate: |
|
||||||
|
# {{ range .pci.device }}vendor.io/my-pci-device.{{ .class }}-{{ .device }}=with-cpuid
|
||||||
|
# {{ end }}
|
||||||
|
# matchFeatures:
|
||||||
|
# - feature: pci.device
|
||||||
|
# matchExpressions:
|
||||||
|
# class: {op: InRegexp, value: ["^06"]}
|
||||||
|
# vendor: ["8086"]
|
||||||
|
# - feature: cpu.cpuid
|
||||||
|
# matchExpressions:
|
||||||
|
# AVX: {op: Exists}
|
||||||
|
#
|
||||||
|
# # The following examples demonstrate vars field and back-referencing
|
||||||
|
# # previous labels and vars
|
||||||
|
# - name: "my dummy kernel rule"
|
||||||
|
# labels:
|
||||||
|
# "vendor.io/my.kernel.feature": "true"
|
||||||
|
# matchFeatures:
|
||||||
|
# - feature: kernel.version
|
||||||
|
# matchExpressions:
|
||||||
|
# major: {op: Gt, value: ["2"]}
|
||||||
|
#
|
||||||
|
# - name: "my dummy rule with no labels"
|
||||||
|
# vars:
|
||||||
|
# "my.dummy.var": "1"
|
||||||
|
# matchFeatures:
|
||||||
|
# - feature: cpu.cpuid
|
||||||
|
# matchExpressions: {}
|
||||||
|
#
|
||||||
|
# - name: "my rule using backrefs"
|
||||||
|
# labels:
|
||||||
|
# "vendor.io/my.backref.feature": "true"
|
||||||
|
# matchFeatures:
|
||||||
|
# - feature: rule.matched
|
||||||
|
# matchExpressions:
|
||||||
|
# vendor.io/my.kernel.feature: {op: IsTrue}
|
||||||
|
# my.dummy.var: {op: Gt, value: ["0"]}
|
||||||
|
#
|
||||||
|
# - name: "kconfig template rule"
|
||||||
|
# labelsTemplate: |
|
||||||
|
# {{ range .kernel.config }}kconfig-{{ .Name }}={{ .Value }}
|
||||||
|
# {{ end }}
|
||||||
|
# matchFeatures:
|
||||||
|
# - feature: kernel.config
|
||||||
|
# matchName: {op: In, value: ["SWAP", "X86", "ARM"]}
|
||||||
|
### <NFD-WORKER-CONF-END-DO-NOT-REMOVE>
|
||||||
|
|
||||||
|
metricsPort: 8081
|
||||||
|
healthPort: 8082
|
||||||
|
daemonsetAnnotations: {}
|
||||||
|
podSecurityContext: {}
|
||||||
|
# fsGroup: 2000
|
||||||
|
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop: [ "ALL" ]
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
runAsNonRoot: true
|
||||||
|
# runAsUser: 1000
|
||||||
|
|
||||||
|
livenessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
# failureThreshold: 3
|
||||||
|
# periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
failureThreshold: 10
|
||||||
|
# periodSeconds: 10
|
||||||
|
|
||||||
|
serviceAccount:
|
||||||
|
# Specifies whether a service account should be created.
|
||||||
|
# We create this by default to make it easier for downstream users to apply PodSecurityPolicies.
|
||||||
|
create: true
|
||||||
|
# Annotations to add to the service account
|
||||||
|
annotations: {}
|
||||||
|
# The name of the service account to use.
|
||||||
|
# If not set and create is true, a name is generated using the fullname template
|
||||||
|
name:
|
||||||
|
|
||||||
|
# specify how many old ControllerRevisions for the DaemonSet to retain.
|
||||||
|
revisionHistoryLimit:
|
||||||
|
|
||||||
|
rbac:
|
||||||
|
create: true
|
||||||
|
|
||||||
|
# Allow users to mount the hostPath /usr/src, useful for RHCOS on s390x
|
||||||
|
# Does not work on systems without /usr/src AND a read-only /usr, such as Talos
|
||||||
|
mountUsrSrc: false
|
||||||
|
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 512Mi
|
||||||
|
requests:
|
||||||
|
cpu: 5m
|
||||||
|
memory: 64Mi
|
||||||
|
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
tolerations: []
|
||||||
|
|
||||||
|
annotations: {}
|
||||||
|
|
||||||
|
affinity: {}
|
||||||
|
|
||||||
|
priorityClassName: ""
|
||||||
|
|
||||||
|
topologyUpdater:
|
||||||
|
config: ### <NFD-TOPOLOGY-UPDATER-CONF-START-DO-NOT-REMOVE>
|
||||||
|
## key = node name, value = list of resources to be excluded.
|
||||||
|
## use * to exclude from all nodes.
|
||||||
|
## an example for how the exclude list should looks like
|
||||||
|
#excludeList:
|
||||||
|
# node1: [cpu]
|
||||||
|
# node2: [memory, example/deviceA]
|
||||||
|
# *: [hugepages-2Mi]
|
||||||
|
### <NFD-TOPOLOGY-UPDATER-CONF-END-DO-NOT-REMOVE>
|
||||||
|
|
||||||
|
enable: false
|
||||||
|
createCRDs: false
|
||||||
|
extraEnvs: []
|
||||||
|
hostNetwork: false
|
||||||
|
|
||||||
|
serviceAccount:
|
||||||
|
create: true
|
||||||
|
annotations: {}
|
||||||
|
name:
|
||||||
|
|
||||||
|
# specify how many old ControllerRevisions for the DaemonSet to retain.
|
||||||
|
revisionHistoryLimit:
|
||||||
|
|
||||||
|
rbac:
|
||||||
|
create: true
|
||||||
|
|
||||||
|
metricsPort: 8081
|
||||||
|
healthPort: 8082
|
||||||
|
kubeletConfigPath:
|
||||||
|
kubeletPodResourcesSockPath:
|
||||||
|
updateInterval: 60s
|
||||||
|
watchNamespace: "*"
|
||||||
|
kubeletStateDir: /var/lib/kubelet
|
||||||
|
|
||||||
|
podSecurityContext: {}
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop: [ "ALL" ]
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
runAsUser: 0
|
||||||
|
|
||||||
|
livenessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
# failureThreshold: 3
|
||||||
|
# periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
failureThreshold: 10
|
||||||
|
# periodSeconds: 10
|
||||||
|
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 60Mi
|
||||||
|
requests:
|
||||||
|
cpu: 50m
|
||||||
|
memory: 40Mi
|
||||||
|
|
||||||
|
nodeSelector: {}
|
||||||
|
tolerations: []
|
||||||
|
annotations: {}
|
||||||
|
daemonsetAnnotations: {}
|
||||||
|
affinity: {}
|
||||||
|
podSetFingerprint: true
|
||||||
|
|
||||||
|
gc:
|
||||||
|
enable: true
|
||||||
|
extraEnvs: []
|
||||||
|
hostNetwork: false
|
||||||
|
replicaCount: 1
|
||||||
|
|
||||||
|
serviceAccount:
|
||||||
|
create: true
|
||||||
|
annotations: {}
|
||||||
|
name:
|
||||||
|
rbac:
|
||||||
|
create: true
|
||||||
|
|
||||||
|
interval: 1h
|
||||||
|
|
||||||
|
podSecurityContext: {}
|
||||||
|
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 1Gi
|
||||||
|
requests:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 128Mi
|
||||||
|
|
||||||
|
metricsPort: 8081
|
||||||
|
|
||||||
|
nodeSelector: {}
|
||||||
|
tolerations: []
|
||||||
|
annotations: {}
|
||||||
|
deploymentAnnotations: {}
|
||||||
|
affinity: {}
|
||||||
|
|
||||||
|
# specify how many old ReplicaSets for the Deployment to retain.
|
||||||
|
revisionHistoryLimit:
|
||||||
|
|
||||||
|
# Optionally use encryption for worker <--> master comms
|
||||||
|
# TODO: verify hostname is not yet supported
|
||||||
|
#
|
||||||
|
# If you do not enable certManager (and have it installed) you will
|
||||||
|
# need to manually, or otherwise, provision the TLS certs as secrets
|
||||||
|
tls:
|
||||||
|
enable: false
|
||||||
|
certManager: false
|
||||||
|
certManagerCertificate:
|
||||||
|
issuerKind:
|
||||||
|
issuerName:
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
enable: false
|
||||||
|
scrapeInterval: 10s
|
||||||
|
labels: {}
|
2384
charts/gpu-operator/crds/nvidia.com_clusterpolicies.yaml
Normal file
2384
charts/gpu-operator/crds/nvidia.com_clusterpolicies.yaml
Normal file
File diff suppressed because it is too large
Load diff
797
charts/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml
Normal file
797
charts/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml
Normal file
|
@ -0,0 +1,797 @@
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
kind: CustomResourceDefinition
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
controller-gen.kubebuilder.io/version: v0.16.4
|
||||||
|
name: nvidiadrivers.nvidia.com
|
||||||
|
spec:
|
||||||
|
group: nvidia.com
|
||||||
|
names:
|
||||||
|
kind: NVIDIADriver
|
||||||
|
listKind: NVIDIADriverList
|
||||||
|
plural: nvidiadrivers
|
||||||
|
shortNames:
|
||||||
|
- nvd
|
||||||
|
- nvdriver
|
||||||
|
- nvdrivers
|
||||||
|
singular: nvidiadriver
|
||||||
|
scope: Cluster
|
||||||
|
versions:
|
||||||
|
- additionalPrinterColumns:
|
||||||
|
- jsonPath: .status.state
|
||||||
|
name: Status
|
||||||
|
type: string
|
||||||
|
- jsonPath: .metadata.creationTimestamp
|
||||||
|
name: Age
|
||||||
|
type: string
|
||||||
|
name: v1alpha1
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
description: NVIDIADriver is the Schema for the nvidiadrivers API
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
description: |-
|
||||||
|
APIVersion defines the versioned schema of this representation of an object.
|
||||||
|
Servers should convert recognized schemas to the latest internal value, and
|
||||||
|
may reject unrecognized values.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
description: |-
|
||||||
|
Kind is a string value representing the REST resource this object represents.
|
||||||
|
Servers may infer this from the endpoint the client submits requests to.
|
||||||
|
Cannot be updated.
|
||||||
|
In CamelCase.
|
||||||
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
description: NVIDIADriverSpec defines the desired state of NVIDIADriver
|
||||||
|
properties:
|
||||||
|
annotations:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: |-
|
||||||
|
Optional: Annotations is an unstructured key value map stored with a resource that may be
|
||||||
|
set by external tools to store and retrieve arbitrary metadata. They are not
|
||||||
|
queryable and should be preserved when modifying objects.
|
||||||
|
type: object
|
||||||
|
args:
|
||||||
|
description: 'Optional: List of arguments'
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
certConfig:
|
||||||
|
description: 'Optional: Custom certificates configuration for NVIDIA
|
||||||
|
Driver container'
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
driverType:
|
||||||
|
default: gpu
|
||||||
|
description: DriverType defines NVIDIA driver type
|
||||||
|
enum:
|
||||||
|
- gpu
|
||||||
|
- vgpu
|
||||||
|
- vgpu-host-manager
|
||||||
|
type: string
|
||||||
|
x-kubernetes-validations:
|
||||||
|
- message: driverType is an immutable field. Please create a new NvidiaDriver
|
||||||
|
resource instead when you want to change this setting.
|
||||||
|
rule: self == oldSelf
|
||||||
|
env:
|
||||||
|
description: 'Optional: List of environment variables'
|
||||||
|
items:
|
||||||
|
description: EnvVar represents an environment variable present in
|
||||||
|
a Container.
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
description: Name of the environment variable.
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: Value of the environment variable.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
gdrcopy:
|
||||||
|
description: GDRCopy defines the spec for GDRCopy driver
|
||||||
|
properties:
|
||||||
|
args:
|
||||||
|
description: 'Optional: List of arguments'
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
enabled:
|
||||||
|
description: Enabled indicates if GDRCopy is enabled through GPU
|
||||||
|
operator
|
||||||
|
type: boolean
|
||||||
|
env:
|
||||||
|
description: 'Optional: List of environment variables'
|
||||||
|
items:
|
||||||
|
description: EnvVar represents an environment variable present
|
||||||
|
in a Container.
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
description: Name of the environment variable.
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: Value of the environment variable.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
image:
|
||||||
|
description: GDRCopy driver image name
|
||||||
|
pattern: '[a-zA-Z0-9\-]+'
|
||||||
|
type: string
|
||||||
|
imagePullPolicy:
|
||||||
|
description: Image pull policy
|
||||||
|
type: string
|
||||||
|
imagePullSecrets:
|
||||||
|
description: Image pull secrets
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
repository:
|
||||||
|
description: GDRCopy diver image repository
|
||||||
|
type: string
|
||||||
|
version:
|
||||||
|
description: GDRCopy driver image tag
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
gds:
|
||||||
|
description: GPUDirectStorage defines the spec for GDS driver
|
||||||
|
properties:
|
||||||
|
args:
|
||||||
|
description: 'Optional: List of arguments'
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
enabled:
|
||||||
|
description: Enabled indicates if GPUDirect Storage is enabled
|
||||||
|
through GPU operator
|
||||||
|
type: boolean
|
||||||
|
env:
|
||||||
|
description: 'Optional: List of environment variables'
|
||||||
|
items:
|
||||||
|
description: EnvVar represents an environment variable present
|
||||||
|
in a Container.
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
description: Name of the environment variable.
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: Value of the environment variable.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
image:
|
||||||
|
description: NVIDIA GPUDirect Storage Driver image name
|
||||||
|
pattern: '[a-zA-Z0-9\-]+'
|
||||||
|
type: string
|
||||||
|
imagePullPolicy:
|
||||||
|
description: Image pull policy
|
||||||
|
type: string
|
||||||
|
imagePullSecrets:
|
||||||
|
description: Image pull secrets
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
repository:
|
||||||
|
description: NVIDIA GPUDirect Storage Driver image repository
|
||||||
|
type: string
|
||||||
|
version:
|
||||||
|
description: NVIDIA GPUDirect Storage Driver image tag
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
image:
|
||||||
|
default: nvcr.io/nvidia/driver
|
||||||
|
description: NVIDIA Driver container image name
|
||||||
|
type: string
|
||||||
|
imagePullPolicy:
|
||||||
|
description: Image pull policy
|
||||||
|
type: string
|
||||||
|
imagePullSecrets:
|
||||||
|
description: Image pull secrets
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
kernelModuleConfig:
|
||||||
|
description: 'Optional: Kernel module configuration parameters for
|
||||||
|
the NVIDIA Driver'
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
labels:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: |-
|
||||||
|
Optional: Map of string keys and values that can be used to organize and categorize
|
||||||
|
(scope and select) objects. May match selectors of replication controllers
|
||||||
|
and services.
|
||||||
|
type: object
|
||||||
|
licensingConfig:
|
||||||
|
description: 'Optional: Licensing configuration for NVIDIA vGPU licensing'
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
nlsEnabled:
|
||||||
|
description: NLSEnabled indicates if NVIDIA Licensing System is
|
||||||
|
used for licensing.
|
||||||
|
type: boolean
|
||||||
|
type: object
|
||||||
|
livenessProbe:
|
||||||
|
description: NVIDIA Driver container liveness probe settings
|
||||||
|
properties:
|
||||||
|
failureThreshold:
|
||||||
|
description: |-
|
||||||
|
Minimum consecutive failures for the probe to be considered failed after having succeeded.
|
||||||
|
Defaults to 3. Minimum value is 1.
|
||||||
|
format: int32
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
|
initialDelaySeconds:
|
||||||
|
description: |-
|
||||||
|
Number of seconds after the container has started before liveness probes are initiated.
|
||||||
|
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||||||
|
format: int32
|
||||||
|
type: integer
|
||||||
|
periodSeconds:
|
||||||
|
description: |-
|
||||||
|
How often (in seconds) to perform the probe.
|
||||||
|
Default to 10 seconds. Minimum value is 1.
|
||||||
|
format: int32
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
|
successThreshold:
|
||||||
|
description: |-
|
||||||
|
Minimum consecutive successes for the probe to be considered successful after having failed.
|
||||||
|
Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
|
||||||
|
format: int32
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
|
timeoutSeconds:
|
||||||
|
description: |-
|
||||||
|
Number of seconds after which the probe times out.
|
||||||
|
Defaults to 1 second. Minimum value is 1.
|
||||||
|
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||||||
|
format: int32
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
|
type: object
|
||||||
|
manager:
|
||||||
|
description: Manager represents configuration for NVIDIA Driver Manager
|
||||||
|
initContainer
|
||||||
|
properties:
|
||||||
|
env:
|
||||||
|
description: 'Optional: List of environment variables'
|
||||||
|
items:
|
||||||
|
description: EnvVar represents an environment variable present
|
||||||
|
in a Container.
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
description: Name of the environment variable.
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
description: Value of the environment variable.
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
image:
|
||||||
|
description: Image represents NVIDIA Driver Manager image name
|
||||||
|
pattern: '[a-zA-Z0-9\-]+'
|
||||||
|
type: string
|
||||||
|
imagePullPolicy:
|
||||||
|
description: Image pull policy
|
||||||
|
type: string
|
||||||
|
imagePullSecrets:
|
||||||
|
description: Image pull secrets
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
repository:
|
||||||
|
description: Repository represents Driver Managerrepository path
|
||||||
|
type: string
|
||||||
|
version:
|
||||||
|
description: Version represents NVIDIA Driver Manager image tag(version)
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
nodeAffinity:
|
||||||
|
description: Affinity specifies node affinity rules for driver pods
|
||||||
|
properties:
|
||||||
|
preferredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
description: |-
|
||||||
|
The scheduler will prefer to schedule pods to nodes that satisfy
|
||||||
|
the affinity expressions specified by this field, but it may choose
|
||||||
|
a node that violates one or more of the expressions. The node that is
|
||||||
|
most preferred is the one with the greatest sum of weights, i.e.
|
||||||
|
for each node that meets all of the scheduling requirements (resource
|
||||||
|
request, requiredDuringScheduling affinity expressions, etc.),
|
||||||
|
compute a sum by iterating through the elements of this field and adding
|
||||||
|
"weight" to the sum if the node matches the corresponding matchExpressions; the
|
||||||
|
node(s) with the highest sum are the most preferred.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
An empty preferred scheduling term matches all objects with implicit weight 0
|
||||||
|
(i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op).
|
||||||
|
properties:
|
||||||
|
preference:
|
||||||
|
description: A node selector term, associated with the corresponding
|
||||||
|
weight.
|
||||||
|
properties:
|
||||||
|
matchExpressions:
|
||||||
|
description: A list of node selector requirements by
|
||||||
|
node's labels.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
A node selector requirement is a selector that contains values, a key, and an operator
|
||||||
|
that relates the key and values.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The label key that the selector applies
|
||||||
|
to.
|
||||||
|
type: string
|
||||||
|
operator:
|
||||||
|
description: |-
|
||||||
|
Represents a key's relationship to a set of values.
|
||||||
|
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
|
||||||
|
type: string
|
||||||
|
values:
|
||||||
|
description: |-
|
||||||
|
An array of string values. If the operator is In or NotIn,
|
||||||
|
the values array must be non-empty. If the operator is Exists or DoesNotExist,
|
||||||
|
the values array must be empty. If the operator is Gt or Lt, the values
|
||||||
|
array must have a single element, which will be interpreted as an integer.
|
||||||
|
This array is replaced during a strategic merge patch.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
x-kubernetes-list-type: atomic
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
- operator
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
x-kubernetes-list-type: atomic
|
||||||
|
matchFields:
|
||||||
|
description: A list of node selector requirements by
|
||||||
|
node's fields.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
A node selector requirement is a selector that contains values, a key, and an operator
|
||||||
|
that relates the key and values.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The label key that the selector applies
|
||||||
|
to.
|
||||||
|
type: string
|
||||||
|
operator:
|
||||||
|
description: |-
|
||||||
|
Represents a key's relationship to a set of values.
|
||||||
|
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
|
||||||
|
type: string
|
||||||
|
values:
|
||||||
|
description: |-
|
||||||
|
An array of string values. If the operator is In or NotIn,
|
||||||
|
the values array must be non-empty. If the operator is Exists or DoesNotExist,
|
||||||
|
the values array must be empty. If the operator is Gt or Lt, the values
|
||||||
|
array must have a single element, which will be interpreted as an integer.
|
||||||
|
This array is replaced during a strategic merge patch.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
x-kubernetes-list-type: atomic
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
- operator
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
x-kubernetes-list-type: atomic
|
||||||
|
type: object
|
||||||
|
x-kubernetes-map-type: atomic
|
||||||
|
weight:
|
||||||
|
description: Weight associated with matching the corresponding
|
||||||
|
nodeSelectorTerm, in the range 1-100.
|
||||||
|
format: int32
|
||||||
|
type: integer
|
||||||
|
required:
|
||||||
|
- preference
|
||||||
|
- weight
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
x-kubernetes-list-type: atomic
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
description: |-
|
||||||
|
If the affinity requirements specified by this field are not met at
|
||||||
|
scheduling time, the pod will not be scheduled onto the node.
|
||||||
|
If the affinity requirements specified by this field cease to be met
|
||||||
|
at some point during pod execution (e.g. due to an update), the system
|
||||||
|
may or may not try to eventually evict the pod from its node.
|
||||||
|
properties:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
description: Required. A list of node selector terms. The
|
||||||
|
terms are ORed.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
A null or empty node selector term matches no objects. The requirements of
|
||||||
|
them are ANDed.
|
||||||
|
The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
|
||||||
|
properties:
|
||||||
|
matchExpressions:
|
||||||
|
description: A list of node selector requirements by
|
||||||
|
node's labels.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
A node selector requirement is a selector that contains values, a key, and an operator
|
||||||
|
that relates the key and values.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The label key that the selector applies
|
||||||
|
to.
|
||||||
|
type: string
|
||||||
|
operator:
|
||||||
|
description: |-
|
||||||
|
Represents a key's relationship to a set of values.
|
||||||
|
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
|
||||||
|
type: string
|
||||||
|
values:
|
||||||
|
description: |-
|
||||||
|
An array of string values. If the operator is In or NotIn,
|
||||||
|
the values array must be non-empty. If the operator is Exists or DoesNotExist,
|
||||||
|
the values array must be empty. If the operator is Gt or Lt, the values
|
||||||
|
array must have a single element, which will be interpreted as an integer.
|
||||||
|
This array is replaced during a strategic merge patch.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
x-kubernetes-list-type: atomic
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
- operator
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
x-kubernetes-list-type: atomic
|
||||||
|
matchFields:
|
||||||
|
description: A list of node selector requirements by
|
||||||
|
node's fields.
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
A node selector requirement is a selector that contains values, a key, and an operator
|
||||||
|
that relates the key and values.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The label key that the selector applies
|
||||||
|
to.
|
||||||
|
type: string
|
||||||
|
operator:
|
||||||
|
description: |-
|
||||||
|
Represents a key's relationship to a set of values.
|
||||||
|
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
|
||||||
|
type: string
|
||||||
|
values:
|
||||||
|
description: |-
|
||||||
|
An array of string values. If the operator is In or NotIn,
|
||||||
|
the values array must be non-empty. If the operator is Exists or DoesNotExist,
|
||||||
|
the values array must be empty. If the operator is Gt or Lt, the values
|
||||||
|
array must have a single element, which will be interpreted as an integer.
|
||||||
|
This array is replaced during a strategic merge patch.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
x-kubernetes-list-type: atomic
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
- operator
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
x-kubernetes-list-type: atomic
|
||||||
|
type: object
|
||||||
|
x-kubernetes-map-type: atomic
|
||||||
|
type: array
|
||||||
|
x-kubernetes-list-type: atomic
|
||||||
|
required:
|
||||||
|
- nodeSelectorTerms
|
||||||
|
type: object
|
||||||
|
x-kubernetes-map-type: atomic
|
||||||
|
type: object
|
||||||
|
nodeSelector:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: NodeSelector specifies a selector for installation of
|
||||||
|
NVIDIA driver
|
||||||
|
type: object
|
||||||
|
priorityClassName:
|
||||||
|
description: 'Optional: Set priorityClassName'
|
||||||
|
type: string
|
||||||
|
rdma:
|
||||||
|
description: GPUDirectRDMA defines the spec for NVIDIA Peer Memory
|
||||||
|
driver
|
||||||
|
properties:
|
||||||
|
enabled:
|
||||||
|
description: Enabled indicates if GPUDirect RDMA is enabled through
|
||||||
|
GPU operator
|
||||||
|
type: boolean
|
||||||
|
useHostMofed:
|
||||||
|
description: UseHostMOFED indicates to use MOFED drivers directly
|
||||||
|
installed on the host to enable GPUDirect RDMA
|
||||||
|
type: boolean
|
||||||
|
type: object
|
||||||
|
readinessProbe:
|
||||||
|
description: NVIDIA Driver container readiness probe settings
|
||||||
|
properties:
|
||||||
|
failureThreshold:
|
||||||
|
description: |-
|
||||||
|
Minimum consecutive failures for the probe to be considered failed after having succeeded.
|
||||||
|
Defaults to 3. Minimum value is 1.
|
||||||
|
format: int32
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
|
initialDelaySeconds:
|
||||||
|
description: |-
|
||||||
|
Number of seconds after the container has started before liveness probes are initiated.
|
||||||
|
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||||||
|
format: int32
|
||||||
|
type: integer
|
||||||
|
periodSeconds:
|
||||||
|
description: |-
|
||||||
|
How often (in seconds) to perform the probe.
|
||||||
|
Default to 10 seconds. Minimum value is 1.
|
||||||
|
format: int32
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
|
successThreshold:
|
||||||
|
description: |-
|
||||||
|
Minimum consecutive successes for the probe to be considered successful after having failed.
|
||||||
|
Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
|
||||||
|
format: int32
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
|
timeoutSeconds:
|
||||||
|
description: |-
|
||||||
|
Number of seconds after which the probe times out.
|
||||||
|
Defaults to 1 second. Minimum value is 1.
|
||||||
|
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||||||
|
format: int32
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
|
type: object
|
||||||
|
repoConfig:
|
||||||
|
description: 'Optional: Custom repo configuration for NVIDIA Driver
|
||||||
|
container'
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
repository:
|
||||||
|
description: NVIDIA Driver repository
|
||||||
|
type: string
|
||||||
|
resources:
|
||||||
|
description: 'Optional: Define resources requests and limits for each
|
||||||
|
pod'
|
||||||
|
properties:
|
||||||
|
limits:
|
||||||
|
additionalProperties:
|
||||||
|
anyOf:
|
||||||
|
- type: integer
|
||||||
|
- type: string
|
||||||
|
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||||
|
x-kubernetes-int-or-string: true
|
||||||
|
description: |-
|
||||||
|
Limits describes the maximum amount of compute resources allowed.
|
||||||
|
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||||||
|
type: object
|
||||||
|
requests:
|
||||||
|
additionalProperties:
|
||||||
|
anyOf:
|
||||||
|
- type: integer
|
||||||
|
- type: string
|
||||||
|
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||||
|
x-kubernetes-int-or-string: true
|
||||||
|
description: |-
|
||||||
|
Requests describes the minimum amount of compute resources required.
|
||||||
|
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||||||
|
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||||||
|
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
startupProbe:
|
||||||
|
description: NVIDIA Driver container startup probe settings
|
||||||
|
properties:
|
||||||
|
failureThreshold:
|
||||||
|
description: |-
|
||||||
|
Minimum consecutive failures for the probe to be considered failed after having succeeded.
|
||||||
|
Defaults to 3. Minimum value is 1.
|
||||||
|
format: int32
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
|
initialDelaySeconds:
|
||||||
|
description: |-
|
||||||
|
Number of seconds after the container has started before liveness probes are initiated.
|
||||||
|
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||||||
|
format: int32
|
||||||
|
type: integer
|
||||||
|
periodSeconds:
|
||||||
|
description: |-
|
||||||
|
How often (in seconds) to perform the probe.
|
||||||
|
Default to 10 seconds. Minimum value is 1.
|
||||||
|
format: int32
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
|
successThreshold:
|
||||||
|
description: |-
|
||||||
|
Minimum consecutive successes for the probe to be considered successful after having failed.
|
||||||
|
Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
|
||||||
|
format: int32
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
|
timeoutSeconds:
|
||||||
|
description: |-
|
||||||
|
Number of seconds after which the probe times out.
|
||||||
|
Defaults to 1 second. Minimum value is 1.
|
||||||
|
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||||||
|
format: int32
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
|
type: object
|
||||||
|
tolerations:
|
||||||
|
description: 'Optional: Set tolerations'
|
||||||
|
items:
|
||||||
|
description: |-
|
||||||
|
The pod this Toleration is attached to tolerates any taint that matches
|
||||||
|
the triple <key,value,effect> using the matching operator <operator>.
|
||||||
|
properties:
|
||||||
|
effect:
|
||||||
|
description: |-
|
||||||
|
Effect indicates the taint effect to match. Empty means match all taint effects.
|
||||||
|
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
|
||||||
|
type: string
|
||||||
|
key:
|
||||||
|
description: |-
|
||||||
|
Key is the taint key that the toleration applies to. Empty means match all taint keys.
|
||||||
|
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
|
||||||
|
type: string
|
||||||
|
operator:
|
||||||
|
description: |-
|
||||||
|
Operator represents a key's relationship to the value.
|
||||||
|
Valid operators are Exists and Equal. Defaults to Equal.
|
||||||
|
Exists is equivalent to wildcard for value, so that a pod can
|
||||||
|
tolerate all taints of a particular category.
|
||||||
|
type: string
|
||||||
|
tolerationSeconds:
|
||||||
|
description: |-
|
||||||
|
TolerationSeconds represents the period of time the toleration (which must be
|
||||||
|
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
|
||||||
|
it is not set, which means tolerate the taint forever (do not evict). Zero and
|
||||||
|
negative values will be treated as 0 (evict immediately) by the system.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
value:
|
||||||
|
description: |-
|
||||||
|
Value is the taint value the toleration matches to.
|
||||||
|
If the operator is Exists, the value should be empty, otherwise just a regular string.
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
useOpenKernelModules:
|
||||||
|
description: UseOpenKernelModules indicates if the open GPU kernel
|
||||||
|
modules should be used
|
||||||
|
type: boolean
|
||||||
|
usePrecompiled:
|
||||||
|
description: UsePrecompiled indicates if deployment of NVIDIA Driver
|
||||||
|
using pre-compiled modules is enabled
|
||||||
|
type: boolean
|
||||||
|
x-kubernetes-validations:
|
||||||
|
- message: usePrecompiled is an immutable field. Please create a new
|
||||||
|
NvidiaDriver resource instead when you want to change this setting.
|
||||||
|
rule: self == oldSelf
|
||||||
|
version:
|
||||||
|
description: NVIDIA Driver version (or just branch for precompiled
|
||||||
|
drivers)
|
||||||
|
type: string
|
||||||
|
virtualTopologyConfig:
|
||||||
|
description: 'Optional: Virtual Topology Daemon configuration for
|
||||||
|
NVIDIA vGPU drivers'
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
description: 'Optional: Config name representing virtual topology
|
||||||
|
daemon configuration file nvidia-topologyd.conf'
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- driverType
|
||||||
|
- image
|
||||||
|
type: object
|
||||||
|
status:
|
||||||
|
description: NVIDIADriverStatus defines the observed state of NVIDIADriver
|
||||||
|
properties:
|
||||||
|
conditions:
|
||||||
|
description: Conditions is a list of conditions representing the NVIDIADriver's
|
||||||
|
current state.
|
||||||
|
items:
|
||||||
|
description: Condition contains details for one aspect of the current
|
||||||
|
state of this API Resource.
|
||||||
|
properties:
|
||||||
|
lastTransitionTime:
|
||||||
|
description: |-
|
||||||
|
lastTransitionTime is the last time the condition transitioned from one status to another.
|
||||||
|
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
|
||||||
|
format: date-time
|
||||||
|
type: string
|
||||||
|
message:
|
||||||
|
description: |-
|
||||||
|
message is a human readable message indicating details about the transition.
|
||||||
|
This may be an empty string.
|
||||||
|
maxLength: 32768
|
||||||
|
type: string
|
||||||
|
observedGeneration:
|
||||||
|
description: |-
|
||||||
|
observedGeneration represents the .metadata.generation that the condition was set based upon.
|
||||||
|
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
|
||||||
|
with respect to the current state of the instance.
|
||||||
|
format: int64
|
||||||
|
minimum: 0
|
||||||
|
type: integer
|
||||||
|
reason:
|
||||||
|
description: |-
|
||||||
|
reason contains a programmatic identifier indicating the reason for the condition's last transition.
|
||||||
|
Producers of specific condition types may define expected values and meanings for this field,
|
||||||
|
and whether the values are considered a guaranteed API.
|
||||||
|
The value should be a CamelCase string.
|
||||||
|
This field may not be empty.
|
||||||
|
maxLength: 1024
|
||||||
|
minLength: 1
|
||||||
|
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
|
||||||
|
type: string
|
||||||
|
status:
|
||||||
|
description: status of the condition, one of True, False, Unknown.
|
||||||
|
enum:
|
||||||
|
- "True"
|
||||||
|
- "False"
|
||||||
|
- Unknown
|
||||||
|
type: string
|
||||||
|
type:
|
||||||
|
description: type of condition in CamelCase or in foo.example.com/CamelCase.
|
||||||
|
maxLength: 316
|
||||||
|
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- lastTransitionTime
|
||||||
|
- message
|
||||||
|
- reason
|
||||||
|
- status
|
||||||
|
- type
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
namespace:
|
||||||
|
description: Namespace indicates a namespace in which the operator
|
||||||
|
and driver are installed
|
||||||
|
type: string
|
||||||
|
state:
|
||||||
|
description: |-
|
||||||
|
INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
|
||||||
|
Important: Run "make" to regenerate code after modifying this file
|
||||||
|
State indicates status of NVIDIADriver instance
|
||||||
|
enum:
|
||||||
|
- ignored
|
||||||
|
- ready
|
||||||
|
- notReady
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- state
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
served: true
|
||||||
|
storage: true
|
||||||
|
subresources:
|
||||||
|
status: {}
|
80
charts/gpu-operator/templates/_helpers.tpl
Normal file
80
charts/gpu-operator/templates/_helpers.tpl
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
{{/* vim: set filetype=mustache: */}}
|
||||||
|
{{/*
|
||||||
|
Expand the name of the chart.
|
||||||
|
*/}}
|
||||||
|
{{- define "gpu-operator.name" -}}
|
||||||
|
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create a default fully qualified app name.
|
||||||
|
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||||
|
If release name contains chart name it will be used as a full name.
|
||||||
|
*/}}
|
||||||
|
{{- define "gpu-operator.fullname" -}}
|
||||||
|
{{- if .Values.fullnameOverride -}}
|
||||||
|
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- $name := default .Chart.Name .Values.nameOverride -}}
|
||||||
|
{{- if contains $name .Release.Name -}}
|
||||||
|
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create chart name and version as used by the chart label.
|
||||||
|
*/}}
|
||||||
|
{{- define "gpu-operator.chart" -}}
|
||||||
|
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Common labels
|
||||||
|
*/}}
|
||||||
|
|
||||||
|
{{- define "gpu-operator.labels" -}}
|
||||||
|
app.kubernetes.io/name: {{ include "gpu-operator.name" . }}
|
||||||
|
helm.sh/chart: {{ include "gpu-operator.chart" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
{{- if .Chart.AppVersion }}
|
||||||
|
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
|
{{- end }}
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
|
{{- if .Values.operator.labels }}
|
||||||
|
{{ toYaml .Values.operator.labels }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{- define "gpu-operator.operand-labels" -}}
|
||||||
|
helm.sh/chart: {{ include "gpu-operator.chart" . }}
|
||||||
|
app.kubernetes.io/managed-by: {{ include "gpu-operator.name" . }}
|
||||||
|
{{- if .Values.daemonsets.labels }}
|
||||||
|
{{ toYaml .Values.daemonsets.labels }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{- define "gpu-operator.matchLabels" -}}
|
||||||
|
app.kubernetes.io/name: {{ include "gpu-operator.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
{{- if .Chart.AppVersion }}
|
||||||
|
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
|
{{- end }}
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Full image name with tag
|
||||||
|
*/}}
|
||||||
|
{{- define "gpu-operator.fullimage" -}}
|
||||||
|
{{- .Values.operator.repository -}}/{{- .Values.operator.image -}}:{{- .Values.operator.version | default .Chart.AppVersion -}}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Full image name with tag
|
||||||
|
*/}}
|
||||||
|
{{- define "driver-manager.fullimage" -}}
|
||||||
|
{{- .Values.driver.manager.repository -}}/{{- .Values.driver.manager.image -}}:{{- .Values.driver.manager.version -}}
|
||||||
|
{{- end }}
|
45
charts/gpu-operator/templates/cleanup_crd.yaml
Normal file
45
charts/gpu-operator/templates/cleanup_crd.yaml
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
{{- if .Values.operator.cleanupCRD }}
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator-cleanup-crd
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
annotations:
|
||||||
|
"helm.sh/hook": pre-delete
|
||||||
|
"helm.sh/hook-weight": "1"
|
||||||
|
"helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator-cleanup-crd
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 8 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
spec:
|
||||||
|
serviceAccountName: gpu-operator
|
||||||
|
{{- if .Values.operator.imagePullSecrets }}
|
||||||
|
imagePullSecrets:
|
||||||
|
{{- range .Values.operator.imagePullSecrets }}
|
||||||
|
- name: {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.operator.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
containers:
|
||||||
|
- name: cleanup-crd
|
||||||
|
image: {{ include "gpu-operator.fullimage" . }}
|
||||||
|
imagePullPolicy: {{ .Values.operator.imagePullPolicy }}
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
- >
|
||||||
|
kubectl delete clusterpolicy cluster-policy;
|
||||||
|
kubectl delete crd clusterpolicies.nvidia.com;
|
||||||
|
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
{{- end }}
|
683
charts/gpu-operator/templates/clusterpolicy.yaml
Normal file
683
charts/gpu-operator/templates/clusterpolicy.yaml
Normal file
|
@ -0,0 +1,683 @@
|
||||||
|
apiVersion: nvidia.com/v1
|
||||||
|
kind: ClusterPolicy
|
||||||
|
metadata:
|
||||||
|
name: cluster-policy
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
{{- if .Values.operator.cleanupCRD }}
|
||||||
|
# CR cleanup is handled during pre-delete hook
|
||||||
|
# Add below annotation so that helm doesn't attempt to cleanup CR twice
|
||||||
|
annotations:
|
||||||
|
"helm.sh/resource-policy": keep
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
hostPaths:
|
||||||
|
rootFS: {{ .Values.hostPaths.rootFS }}
|
||||||
|
driverInstallDir: {{ .Values.hostPaths.driverInstallDir }}
|
||||||
|
operator:
|
||||||
|
{{- if .Values.operator.defaultRuntime }}
|
||||||
|
defaultRuntime: {{ .Values.operator.defaultRuntime }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.operator.runtimeClass }}
|
||||||
|
runtimeClass: {{ .Values.operator.runtimeClass }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.operator.defaultGPUMode }}
|
||||||
|
defaultGPUMode: {{ .Values.operator.defaultGPUMode }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.operator.initContainer }}
|
||||||
|
initContainer:
|
||||||
|
{{- if .Values.operator.initContainer.repository }}
|
||||||
|
repository: {{ .Values.operator.initContainer.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.operator.initContainer.image }}
|
||||||
|
image: {{ .Values.operator.initContainer.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.operator.initContainer.version }}
|
||||||
|
version: {{ .Values.operator.initContainer.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.operator.initContainer.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.operator.initContainer.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.operator.initContainer.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.operator.initContainer.imagePullSecrets | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.operator.use_ocp_driver_toolkit }}
|
||||||
|
use_ocp_driver_toolkit: {{ .Values.operator.use_ocp_driver_toolkit }}
|
||||||
|
{{- end }}
|
||||||
|
daemonsets:
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.operand-labels" . | nindent 6 }}
|
||||||
|
{{- if .Values.daemonsets.annotations }}
|
||||||
|
annotations: {{ toYaml .Values.daemonsets.annotations | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.daemonsets.tolerations }}
|
||||||
|
tolerations: {{ toYaml .Values.daemonsets.tolerations | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.daemonsets.priorityClassName }}
|
||||||
|
priorityClassName: {{ .Values.daemonsets.priorityClassName }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.daemonsets.updateStrategy }}
|
||||||
|
updateStrategy: {{ .Values.daemonsets.updateStrategy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.daemonsets.rollingUpdate }}
|
||||||
|
rollingUpdate:
|
||||||
|
maxUnavailable: {{ .Values.daemonsets.rollingUpdate.maxUnavailable | quote }}
|
||||||
|
{{- end }}
|
||||||
|
validator:
|
||||||
|
{{- if .Values.validator.repository }}
|
||||||
|
repository: {{ .Values.validator.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.validator.image }}
|
||||||
|
image: {{ .Values.validator.image }}
|
||||||
|
{{- end }}
|
||||||
|
version: {{ .Values.validator.version | default .Chart.AppVersion | quote }}
|
||||||
|
{{- if .Values.validator.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.validator.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.validator.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.validator.imagePullSecrets | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.validator.resources }}
|
||||||
|
resources: {{ toYaml .Values.validator.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.validator.env }}
|
||||||
|
env: {{ toYaml .Values.validator.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.validator.args }}
|
||||||
|
args: {{ toYaml .Values.validator.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.validator.plugin }}
|
||||||
|
plugin:
|
||||||
|
{{- if .Values.validator.plugin.env }}
|
||||||
|
env: {{ toYaml .Values.validator.plugin.env | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.validator.cuda }}
|
||||||
|
cuda:
|
||||||
|
{{- if .Values.validator.cuda.env }}
|
||||||
|
env: {{ toYaml .Values.validator.cuda.env | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.validator.driver }}
|
||||||
|
driver:
|
||||||
|
{{- if .Values.validator.driver.env }}
|
||||||
|
env: {{ toYaml .Values.validator.driver.env | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.validator.toolkit }}
|
||||||
|
toolkit:
|
||||||
|
{{- if .Values.validator.toolkit.env }}
|
||||||
|
env: {{ toYaml .Values.validator.toolkit.env | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.validator.vfioPCI }}
|
||||||
|
vfioPCI:
|
||||||
|
{{- if .Values.validator.vfioPCI.env }}
|
||||||
|
env: {{ toYaml .Values.validator.vfioPCI.env | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.validator.vgpuManager }}
|
||||||
|
vgpuManager:
|
||||||
|
{{- if .Values.validator.vgpuManager.env }}
|
||||||
|
env: {{ toYaml .Values.validator.vgpuManager.env | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.validator.vgpuDevices }}
|
||||||
|
vgpuDevices:
|
||||||
|
{{- if .Values.validator.vgpuDevices.env }}
|
||||||
|
env: {{ toYaml .Values.validator.vgpuDevices.env | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
mig:
|
||||||
|
{{- if .Values.mig.strategy }}
|
||||||
|
strategy: {{ .Values.mig.strategy }}
|
||||||
|
{{- end }}
|
||||||
|
psa:
|
||||||
|
enabled: {{ .Values.psa.enabled }}
|
||||||
|
cdi:
|
||||||
|
enabled: {{ .Values.cdi.enabled }}
|
||||||
|
default: {{ .Values.cdi.default }}
|
||||||
|
driver:
|
||||||
|
enabled: {{ .Values.driver.enabled }}
|
||||||
|
useNvidiaDriverCRD: {{ .Values.driver.nvidiaDriverCRD.enabled }}
|
||||||
|
useOpenKernelModules: {{ .Values.driver.useOpenKernelModules }}
|
||||||
|
usePrecompiled: {{ .Values.driver.usePrecompiled }}
|
||||||
|
{{- if .Values.driver.repository }}
|
||||||
|
repository: {{ .Values.driver.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.image }}
|
||||||
|
image: {{ .Values.driver.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.version }}
|
||||||
|
version: {{ .Values.driver.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.driver.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.driver.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.startupProbe }}
|
||||||
|
startupProbe: {{ toYaml .Values.driver.startupProbe | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.livenessProbe }}
|
||||||
|
livenessProbe: {{ toYaml .Values.driver.livenessProbe | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.readinessProbe }}
|
||||||
|
readinessProbe: {{ toYaml .Values.driver.readinessProbe | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
rdma:
|
||||||
|
enabled: {{ .Values.driver.rdma.enabled }}
|
||||||
|
useHostMofed: {{ .Values.driver.rdma.useHostMofed }}
|
||||||
|
manager:
|
||||||
|
{{- if .Values.driver.manager.repository }}
|
||||||
|
repository: {{ .Values.driver.manager.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.manager.image }}
|
||||||
|
image: {{ .Values.driver.manager.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.manager.version }}
|
||||||
|
version: {{ .Values.driver.manager.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.manager.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.driver.manager.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.manager.env }}
|
||||||
|
env: {{ toYaml .Values.driver.manager.env | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.repoConfig }}
|
||||||
|
repoConfig: {{ toYaml .Values.driver.repoConfig | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.certConfig }}
|
||||||
|
certConfig: {{ toYaml .Values.driver.certConfig | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.licensingConfig }}
|
||||||
|
licensingConfig: {{ toYaml .Values.driver.licensingConfig | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.virtualTopology }}
|
||||||
|
virtualTopology: {{ toYaml .Values.driver.virtualTopology | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.kernelModuleConfig }}
|
||||||
|
kernelModuleConfig: {{ toYaml .Values.driver.kernelModuleConfig | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.resources }}
|
||||||
|
resources: {{ toYaml .Values.driver.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.env }}
|
||||||
|
env: {{ toYaml .Values.driver.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.args }}
|
||||||
|
args: {{ toYaml .Values.driver.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.upgradePolicy }}
|
||||||
|
upgradePolicy:
|
||||||
|
autoUpgrade: {{ .Values.driver.upgradePolicy.autoUpgrade | default false }}
|
||||||
|
maxParallelUpgrades: {{ .Values.driver.upgradePolicy.maxParallelUpgrades | default 0 }}
|
||||||
|
maxUnavailable : {{ .Values.driver.upgradePolicy.maxUnavailable | default "25%" }}
|
||||||
|
waitForCompletion:
|
||||||
|
timeoutSeconds: {{ .Values.driver.upgradePolicy.waitForCompletion.timeoutSeconds }}
|
||||||
|
{{- if .Values.driver.upgradePolicy.waitForCompletion.podSelector }}
|
||||||
|
podSelector: {{ .Values.driver.upgradePolicy.waitForCompletion.podSelector }}
|
||||||
|
{{- end }}
|
||||||
|
podDeletion:
|
||||||
|
force: {{ .Values.driver.upgradePolicy.gpuPodDeletion.force | default false }}
|
||||||
|
timeoutSeconds: {{ .Values.driver.upgradePolicy.gpuPodDeletion.timeoutSeconds }}
|
||||||
|
deleteEmptyDir: {{ .Values.driver.upgradePolicy.gpuPodDeletion.deleteEmptyDir | default false }}
|
||||||
|
drain:
|
||||||
|
enable: {{ .Values.driver.upgradePolicy.drain.enable | default false }}
|
||||||
|
force: {{ .Values.driver.upgradePolicy.drain.force | default false }}
|
||||||
|
{{- if .Values.driver.upgradePolicy.drain.podSelector }}
|
||||||
|
podSelector: {{ .Values.driver.upgradePolicy.drain.podSelector }}
|
||||||
|
{{- end }}
|
||||||
|
timeoutSeconds: {{ .Values.driver.upgradePolicy.drain.timeoutSeconds }}
|
||||||
|
deleteEmptyDir: {{ .Values.driver.upgradePolicy.drain.deleteEmptyDir | default false}}
|
||||||
|
{{- end }}
|
||||||
|
vgpuManager:
|
||||||
|
enabled: {{ .Values.vgpuManager.enabled }}
|
||||||
|
{{- if .Values.vgpuManager.repository }}
|
||||||
|
repository: {{ .Values.vgpuManager.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuManager.image }}
|
||||||
|
image: {{ .Values.vgpuManager.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuManager.version }}
|
||||||
|
version: {{ .Values.vgpuManager.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuManager.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.vgpuManager.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuManager.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.vgpuManager.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuManager.resources }}
|
||||||
|
resources: {{ toYaml .Values.vgpuManager.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuManager.env }}
|
||||||
|
env: {{ toYaml .Values.vgpuManager.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuManager.args }}
|
||||||
|
args: {{ toYaml .Values.vgpuManager.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
driverManager:
|
||||||
|
{{- if .Values.vgpuManager.driverManager.repository }}
|
||||||
|
repository: {{ .Values.vgpuManager.driverManager.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuManager.driverManager.image }}
|
||||||
|
image: {{ .Values.vgpuManager.driverManager.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuManager.driverManager.version }}
|
||||||
|
version: {{ .Values.vgpuManager.driverManager.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuManager.driverManager.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.vgpuManager.driverManager.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuManager.driverManager.env }}
|
||||||
|
env: {{ toYaml .Values.vgpuManager.driverManager.env | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
kataManager:
|
||||||
|
enabled: {{ .Values.kataManager.enabled }}
|
||||||
|
config: {{ toYaml .Values.kataManager.config | nindent 6 }}
|
||||||
|
{{- if .Values.kataManager.repository }}
|
||||||
|
repository: {{ .Values.kataManager.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.kataManager.image }}
|
||||||
|
image: {{ .Values.kataManager.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.kataManager.version }}
|
||||||
|
version: {{ .Values.kataManager.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.kataManager.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.kataManager.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.kataManager.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.kataManager.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.kataManager.resources }}
|
||||||
|
resources: {{ toYaml .Values.kataManager.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.kataManager.env }}
|
||||||
|
env: {{ toYaml .Values.kataManager.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.kataManager.args }}
|
||||||
|
args: {{ toYaml .Values.kataManager.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
vfioManager:
|
||||||
|
enabled: {{ .Values.vfioManager.enabled }}
|
||||||
|
{{- if .Values.vfioManager.repository }}
|
||||||
|
repository: {{ .Values.vfioManager.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vfioManager.image }}
|
||||||
|
image: {{ .Values.vfioManager.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vfioManager.version }}
|
||||||
|
version: {{ .Values.vfioManager.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vfioManager.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.vfioManager.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vfioManager.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.vfioManager.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vfioManager.resources }}
|
||||||
|
resources: {{ toYaml .Values.vfioManager.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vfioManager.env }}
|
||||||
|
env: {{ toYaml .Values.vfioManager.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vfioManager.args }}
|
||||||
|
args: {{ toYaml .Values.vfioManager.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
driverManager:
|
||||||
|
{{- if .Values.vfioManager.driverManager.repository }}
|
||||||
|
repository: {{ .Values.vfioManager.driverManager.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vfioManager.driverManager.image }}
|
||||||
|
image: {{ .Values.vfioManager.driverManager.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vfioManager.driverManager.version }}
|
||||||
|
version: {{ .Values.vfioManager.driverManager.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vfioManager.driverManager.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.vfioManager.driverManager.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vfioManager.driverManager.env }}
|
||||||
|
env: {{ toYaml .Values.vfioManager.driverManager.env | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
vgpuDeviceManager:
|
||||||
|
enabled: {{ .Values.vgpuDeviceManager.enabled }}
|
||||||
|
{{- if .Values.vgpuDeviceManager.repository }}
|
||||||
|
repository: {{ .Values.vgpuDeviceManager.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuDeviceManager.image }}
|
||||||
|
image: {{ .Values.vgpuDeviceManager.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuDeviceManager.version }}
|
||||||
|
version: {{ .Values.vgpuDeviceManager.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuDeviceManager.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.vgpuDeviceManager.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuDeviceManager.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.vgpuDeviceManager.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuDeviceManager.resources }}
|
||||||
|
resources: {{ toYaml .Values.vgpuDeviceManager.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuDeviceManager.env }}
|
||||||
|
env: {{ toYaml .Values.vgpuDeviceManager.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuDeviceManager.args }}
|
||||||
|
args: {{ toYaml .Values.vgpuDeviceManager.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.vgpuDeviceManager.config }}
|
||||||
|
config: {{ toYaml .Values.vgpuDeviceManager.config | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
ccManager:
|
||||||
|
enabled: {{ .Values.ccManager.enabled }}
|
||||||
|
defaultMode: {{ .Values.ccManager.defaultMode | quote }}
|
||||||
|
{{- if .Values.ccManager.repository }}
|
||||||
|
repository: {{ .Values.ccManager.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.ccManager.image }}
|
||||||
|
image: {{ .Values.ccManager.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.ccManager.version }}
|
||||||
|
version: {{ .Values.ccManager.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.ccManager.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.ccManager.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.ccManager.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.ccManager.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.ccManager.resources }}
|
||||||
|
resources: {{ toYaml .Values.ccManager.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.ccManager.env }}
|
||||||
|
env: {{ toYaml .Values.vfioManager.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.ccManager.args }}
|
||||||
|
args: {{ toYaml .Values.ccManager.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
toolkit:
|
||||||
|
enabled: {{ .Values.toolkit.enabled }}
|
||||||
|
{{- if .Values.toolkit.repository }}
|
||||||
|
repository: {{ .Values.toolkit.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.toolkit.image }}
|
||||||
|
image: {{ .Values.toolkit.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.toolkit.version }}
|
||||||
|
version: {{ .Values.toolkit.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.toolkit.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.toolkit.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.toolkit.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.toolkit.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.toolkit.resources }}
|
||||||
|
resources: {{ toYaml .Values.toolkit.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.toolkit.env }}
|
||||||
|
env: {{ toYaml .Values.toolkit.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.toolkit.installDir }}
|
||||||
|
installDir: {{ .Values.toolkit.installDir }}
|
||||||
|
{{- end }}
|
||||||
|
devicePlugin:
|
||||||
|
enabled: {{ .Values.devicePlugin.enabled }}
|
||||||
|
{{- if .Values.devicePlugin.repository }}
|
||||||
|
repository: {{ .Values.devicePlugin.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.devicePlugin.image }}
|
||||||
|
image: {{ .Values.devicePlugin.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.devicePlugin.version }}
|
||||||
|
version: {{ .Values.devicePlugin.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.devicePlugin.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.devicePlugin.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.devicePlugin.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.devicePlugin.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.devicePlugin.resources }}
|
||||||
|
resources: {{ toYaml .Values.devicePlugin.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.devicePlugin.env }}
|
||||||
|
env: {{ toYaml .Values.devicePlugin.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.devicePlugin.args }}
|
||||||
|
args: {{ toYaml .Values.devicePlugin.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.devicePlugin.config.name }}
|
||||||
|
config:
|
||||||
|
name: {{ .Values.devicePlugin.config.name }}
|
||||||
|
default: {{ .Values.devicePlugin.config.default }}
|
||||||
|
{{- end }}
|
||||||
|
dcgm:
|
||||||
|
enabled: {{ .Values.dcgm.enabled }}
|
||||||
|
{{- if .Values.dcgm.repository }}
|
||||||
|
repository: {{ .Values.dcgm.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgm.image }}
|
||||||
|
image: {{ .Values.dcgm.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgm.version }}
|
||||||
|
version: {{ .Values.dcgm.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgm.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.dcgm.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgm.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.dcgm.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgm.resources }}
|
||||||
|
resources: {{ toYaml .Values.dcgm.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgm.env }}
|
||||||
|
env: {{ toYaml .Values.dcgm.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgm.args }}
|
||||||
|
args: {{ toYaml .Values.dcgm.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
dcgmExporter:
|
||||||
|
enabled: {{ .Values.dcgmExporter.enabled }}
|
||||||
|
{{- if .Values.dcgmExporter.repository }}
|
||||||
|
repository: {{ .Values.dcgmExporter.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgmExporter.image }}
|
||||||
|
image: {{ .Values.dcgmExporter.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgmExporter.version }}
|
||||||
|
version: {{ .Values.dcgmExporter.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgmExporter.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.dcgmExporter.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgmExporter.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.dcgmExporter.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgmExporter.resources }}
|
||||||
|
resources: {{ toYaml .Values.dcgmExporter.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgmExporter.env }}
|
||||||
|
env: {{ toYaml .Values.dcgmExporter.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgmExporter.args }}
|
||||||
|
args: {{ toYaml .Values.dcgmExporter.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if and (.Values.dcgmExporter.config) (.Values.dcgmExporter.config.name) }}
|
||||||
|
config:
|
||||||
|
name: {{ .Values.dcgmExporter.config.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dcgmExporter.serviceMonitor }}
|
||||||
|
serviceMonitor: {{ toYaml .Values.dcgmExporter.serviceMonitor | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
gfd:
|
||||||
|
enabled: {{ .Values.gfd.enabled }}
|
||||||
|
{{- if .Values.gfd.repository }}
|
||||||
|
repository: {{ .Values.gfd.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gfd.image }}
|
||||||
|
image: {{ .Values.gfd.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gfd.version }}
|
||||||
|
version: {{ .Values.gfd.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gfd.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.gfd.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gfd.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.gfd.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gfd.resources }}
|
||||||
|
resources: {{ toYaml .Values.gfd.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gfd.env }}
|
||||||
|
env: {{ toYaml .Values.gfd.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gfd.args }}
|
||||||
|
args: {{ toYaml .Values.gfd.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
migManager:
|
||||||
|
enabled: {{ .Values.migManager.enabled }}
|
||||||
|
{{- if .Values.migManager.repository }}
|
||||||
|
repository: {{ .Values.migManager.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.migManager.image }}
|
||||||
|
image: {{ .Values.migManager.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.migManager.version }}
|
||||||
|
version: {{ .Values.migManager.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.migManager.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.migManager.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.migManager.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.migManager.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.migManager.resources }}
|
||||||
|
resources: {{ toYaml .Values.migManager.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.migManager.env }}
|
||||||
|
env: {{ toYaml .Values.migManager.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.migManager.args }}
|
||||||
|
args: {{ toYaml .Values.migManager.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.migManager.config }}
|
||||||
|
config:
|
||||||
|
name: {{ .Values.migManager.config.name }}
|
||||||
|
default: {{ .Values.migManager.config.default }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.migManager.gpuClientsConfig }}
|
||||||
|
gpuClientsConfig: {{ toYaml .Values.migManager.gpuClientsConfig | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
nodeStatusExporter:
|
||||||
|
enabled: {{ .Values.nodeStatusExporter.enabled }}
|
||||||
|
{{- if .Values.nodeStatusExporter.repository }}
|
||||||
|
repository: {{ .Values.nodeStatusExporter.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.nodeStatusExporter.image }}
|
||||||
|
image: {{ .Values.nodeStatusExporter.image }}
|
||||||
|
{{- end }}
|
||||||
|
version: {{ .Values.nodeStatusExporter.version | default .Chart.AppVersion | quote }}
|
||||||
|
{{- if .Values.nodeStatusExporter.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.nodeStatusExporter.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.nodeStatusExporter.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.nodeStatusExporter.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.nodeStatusExporter.resources }}
|
||||||
|
resources: {{ toYaml .Values.nodeStatusExporter.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.nodeStatusExporter.env }}
|
||||||
|
env: {{ toYaml .Values.nodeStatusExporter.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.nodeStatusExporter.args }}
|
||||||
|
args: {{ toYaml .Values.nodeStatusExporter.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gds.enabled }}
|
||||||
|
gds:
|
||||||
|
enabled: {{ .Values.gds.enabled }}
|
||||||
|
{{- if .Values.gds.repository }}
|
||||||
|
repository: {{ .Values.gds.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gds.image }}
|
||||||
|
image: {{ .Values.gds.image }}
|
||||||
|
{{- end }}
|
||||||
|
version: {{ .Values.gds.version | quote }}
|
||||||
|
{{- if .Values.gds.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.gds.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gds.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.gds.imagePullSecrets | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gds.env }}
|
||||||
|
env: {{ toYaml .Values.gds.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gds.args }}
|
||||||
|
args: {{ toYaml .Values.gds.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gdrcopy }}
|
||||||
|
gdrcopy:
|
||||||
|
enabled: {{ .Values.gdrcopy.enabled | default false }}
|
||||||
|
{{- if .Values.gdrcopy.repository }}
|
||||||
|
repository: {{ .Values.gdrcopy.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gdrcopy.image }}
|
||||||
|
image: {{ .Values.gdrcopy.image }}
|
||||||
|
{{- end }}
|
||||||
|
version: {{ .Values.gdrcopy.version | quote }}
|
||||||
|
{{- if .Values.gdrcopy.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.gdrcopy.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gdrcopy.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.gdrcopy.imagePullSecrets | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gdrcopy.env }}
|
||||||
|
env: {{ toYaml .Values.gdrcopy.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gdrcopy.args }}
|
||||||
|
args: {{ toYaml .Values.gdrcopy.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
sandboxWorkloads:
|
||||||
|
enabled: {{ .Values.sandboxWorkloads.enabled }}
|
||||||
|
{{- if .Values.sandboxWorkloads.defaultWorkload }}
|
||||||
|
defaultWorkload: {{ .Values.sandboxWorkloads.defaultWorkload }}
|
||||||
|
{{- end }}
|
||||||
|
sandboxDevicePlugin:
|
||||||
|
{{- if .Values.sandboxDevicePlugin.enabled }}
|
||||||
|
enabled: {{ .Values.sandboxDevicePlugin.enabled }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.sandboxDevicePlugin.repository }}
|
||||||
|
repository: {{ .Values.sandboxDevicePlugin.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.sandboxDevicePlugin.image }}
|
||||||
|
image: {{ .Values.sandboxDevicePlugin.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.sandboxDevicePlugin.version }}
|
||||||
|
version: {{ .Values.sandboxDevicePlugin.version | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.sandboxDevicePlugin.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.sandboxDevicePlugin.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.sandboxDevicePlugin.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.sandboxDevicePlugin.imagePullSecrets | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.sandboxDevicePlugin.resources }}
|
||||||
|
resources: {{ toYaml .Values.sandboxDevicePlugin.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.sandboxDevicePlugin.env }}
|
||||||
|
env: {{ toYaml .Values.sandboxDevicePlugin.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.sandboxDevicePlugin.args }}
|
||||||
|
args: {{ toYaml .Values.sandboxDevicePlugin.args | nindent 6 }}
|
||||||
|
{{- end }}
|
146
charts/gpu-operator/templates/clusterrole.yaml
Normal file
146
charts/gpu-operator/templates/clusterrole.yaml
Normal file
|
@ -0,0 +1,146 @@
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- config.openshift.io
|
||||||
|
resources:
|
||||||
|
- clusterversions
|
||||||
|
- proxies
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- image.openshift.io
|
||||||
|
resources:
|
||||||
|
- imagestreams
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- security.openshift.io
|
||||||
|
resources:
|
||||||
|
- securitycontextconstraints
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- use
|
||||||
|
- apiGroups:
|
||||||
|
- rbac.authorization.k8s.io
|
||||||
|
resources:
|
||||||
|
- clusterroles
|
||||||
|
- clusterrolebindings
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- namespaces
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- create
|
||||||
|
- watch
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- events
|
||||||
|
- pods
|
||||||
|
- pods/eviction
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- apps
|
||||||
|
resources:
|
||||||
|
- daemonsets
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- nvidia.com
|
||||||
|
resources:
|
||||||
|
- clusterpolicies
|
||||||
|
- clusterpolicies/finalizers
|
||||||
|
- clusterpolicies/status
|
||||||
|
- nvidiadrivers
|
||||||
|
- nvidiadrivers/finalizers
|
||||||
|
- nvidiadrivers/status
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- deletecollection
|
||||||
|
- apiGroups:
|
||||||
|
- scheduling.k8s.io
|
||||||
|
resources:
|
||||||
|
- priorityclasses
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- apiGroups:
|
||||||
|
- node.k8s.io
|
||||||
|
resources:
|
||||||
|
- runtimeclasses
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- apiextensions.k8s.io
|
||||||
|
resources:
|
||||||
|
- customresourcedefinitions
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- create
|
||||||
|
{{- if .Values.operator.cleanupCRD }}
|
||||||
|
- delete
|
||||||
|
{{- end }}
|
18
charts/gpu-operator/templates/clusterrolebinding.yaml
Normal file
18
charts/gpu-operator/templates/clusterrolebinding.yaml
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: gpu-operator
|
||||||
|
namespace: {{ $.Release.Namespace }}
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: node-feature-discovery
|
||||||
|
namespace: {{ $.Release.Namespace }}
|
||||||
|
roleRef:
|
||||||
|
kind: ClusterRole
|
||||||
|
name: gpu-operator
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
14
charts/gpu-operator/templates/dcgm_exporter_config.yaml
Normal file
14
charts/gpu-operator/templates/dcgm_exporter_config.yaml
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
{{- if .Values.dcgmExporter.config }}
|
||||||
|
{{- if and (.Values.dcgmExporter.config.create) (not (empty .Values.dcgmExporter.config.data)) }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.dcgmExporter.config.name }}
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
data:
|
||||||
|
dcgm-metrics.csv: |
|
||||||
|
{{- .Values.dcgmExporter.config.data | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
10
charts/gpu-operator/templates/mig_config.yaml
Normal file
10
charts/gpu-operator/templates/mig_config.yaml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
{{- if and (.Values.migManager.config.create) (not (empty .Values.migManager.config.data)) }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.migManager.config.name }}
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
data: {{ toYaml .Values.migManager.config.data | nindent 2 }}
|
||||||
|
{{- end }}
|
107
charts/gpu-operator/templates/nodefeaturerules.yaml
Normal file
107
charts/gpu-operator/templates/nodefeaturerules.yaml
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
{{- if .Values.nfd.nodefeaturerules }}
|
||||||
|
apiVersion: nfd.k8s-sigs.io/v1alpha1
|
||||||
|
kind: NodeFeatureRule
|
||||||
|
metadata:
|
||||||
|
name: nvidia-nfd-nodefeaturerules
|
||||||
|
spec:
|
||||||
|
rules:
|
||||||
|
- name: "TDX rule"
|
||||||
|
labels:
|
||||||
|
tdx.enabled: "true"
|
||||||
|
matchFeatures:
|
||||||
|
- feature: cpu.security
|
||||||
|
matchExpressions:
|
||||||
|
tdx.enabled: {op: IsTrue}
|
||||||
|
- name: "TDX total keys rule"
|
||||||
|
extendedResources:
|
||||||
|
tdx.total_keys: "@cpu.security.tdx.total_keys"
|
||||||
|
matchFeatures:
|
||||||
|
- feature: cpu.security
|
||||||
|
matchExpressions:
|
||||||
|
tdx.enabled: {op: IsTrue}
|
||||||
|
- name: "SEV-SNP rule"
|
||||||
|
labels:
|
||||||
|
sev.snp.enabled: "true"
|
||||||
|
matchFeatures:
|
||||||
|
- feature: cpu.security
|
||||||
|
matchExpressions:
|
||||||
|
sev.snp.enabled:
|
||||||
|
op: IsTrue
|
||||||
|
- name: "SEV-ES rule"
|
||||||
|
labels:
|
||||||
|
sev.es.enabled: "true"
|
||||||
|
matchFeatures:
|
||||||
|
- feature: cpu.security
|
||||||
|
matchExpressions:
|
||||||
|
sev.es.enabled:
|
||||||
|
op: IsTrue
|
||||||
|
- name: SEV system capacities
|
||||||
|
extendedResources:
|
||||||
|
sev_asids: '@cpu.security.sev.asids'
|
||||||
|
sev_es: '@cpu.security.sev.encrypted_state_ids'
|
||||||
|
matchFeatures:
|
||||||
|
- feature: cpu.security
|
||||||
|
matchExpressions:
|
||||||
|
sev.enabled:
|
||||||
|
op: Exists
|
||||||
|
- name: "NVIDIA H100"
|
||||||
|
labels:
|
||||||
|
"nvidia.com/gpu.H100": "true"
|
||||||
|
"nvidia.com/gpu.family": "hopper"
|
||||||
|
matchFeatures:
|
||||||
|
- feature: pci.device
|
||||||
|
matchExpressions:
|
||||||
|
vendor: {op: In, value: ["10de"]}
|
||||||
|
device: {op: In, value: ["2339"]}
|
||||||
|
- name: "NVIDIA H100 PCIe"
|
||||||
|
labels:
|
||||||
|
"nvidia.com/gpu.H100.pcie": "true"
|
||||||
|
"nvidia.com/gpu.family": "hopper"
|
||||||
|
matchFeatures:
|
||||||
|
- feature: pci.device
|
||||||
|
matchExpressions:
|
||||||
|
vendor: {op: In, value: ["10de"]}
|
||||||
|
device: {op: In, value: ["2331"]}
|
||||||
|
- name: "NVIDIA H100 80GB HBM3"
|
||||||
|
labels:
|
||||||
|
"nvidia.com/gpu.H100.HBM3": "true"
|
||||||
|
"nvidia.com/gpu.family": "hopper"
|
||||||
|
matchFeatures:
|
||||||
|
- feature: pci.device
|
||||||
|
matchExpressions:
|
||||||
|
vendor: {op: In, value: ["10de"]}
|
||||||
|
device: {op: In, value: ["2330"]}
|
||||||
|
- name: "NVIDIA H800"
|
||||||
|
labels:
|
||||||
|
"nvidia.com/gpu.H800": "true"
|
||||||
|
"nvidia.com/gpu.family": "hopper"
|
||||||
|
matchFeatures:
|
||||||
|
- feature: pci.device
|
||||||
|
matchExpressions:
|
||||||
|
vendor: {op: In, value: ["10de"]}
|
||||||
|
device: {op: In, value: ["2324"]}
|
||||||
|
- name: "NVIDIA H800 PCIE"
|
||||||
|
labels:
|
||||||
|
"nvidia.com/gpu.H800.pcie": "true"
|
||||||
|
"nvidia.com/gpu.family": "hopper"
|
||||||
|
matchFeatures:
|
||||||
|
- feature: pci.device
|
||||||
|
matchExpressions:
|
||||||
|
vendor: {op: In, value: ["10de"]}
|
||||||
|
device: {op: In, value: ["2322"]}
|
||||||
|
- name: "NVIDIA CC Enabled"
|
||||||
|
labels:
|
||||||
|
"nvidia.com/cc.capable": "true"
|
||||||
|
matchAny: # TDX/SEV + Hopper GPU
|
||||||
|
- matchFeatures:
|
||||||
|
- feature: rule.matched
|
||||||
|
matchExpressions:
|
||||||
|
nvidia.com/gpu.family: {op: In, value: ["hopper"]}
|
||||||
|
sev.snp.enabled: {op: IsTrue}
|
||||||
|
- matchFeatures:
|
||||||
|
- feature: rule.matched
|
||||||
|
matchExpressions:
|
||||||
|
nvidia.com/gpu.family: {op: In, value: ["hopper"]}
|
||||||
|
tdx.enabled: {op: IsTrue}
|
||||||
|
{{- end }}
|
||||||
|
|
119
charts/gpu-operator/templates/nvidiadriver.yaml
Normal file
119
charts/gpu-operator/templates/nvidiadriver.yaml
Normal file
|
@ -0,0 +1,119 @@
|
||||||
|
{{- if and .Values.driver.nvidiaDriverCRD.enabled .Values.driver.nvidiaDriverCRD.deployDefaultCR }}
|
||||||
|
apiVersion: nvidia.com/v1alpha1
|
||||||
|
kind: NVIDIADriver
|
||||||
|
metadata:
|
||||||
|
name: default
|
||||||
|
spec:
|
||||||
|
repository: {{ .Values.driver.repository }}
|
||||||
|
image: {{ .Values.driver.image }}
|
||||||
|
version: {{ .Values.driver.version }}
|
||||||
|
useOpenKernelModules: {{ .Values.driver.useOpenKernelModules }}
|
||||||
|
usePrecompiled: {{ .Values.driver.usePrecompiled }}
|
||||||
|
driverType: {{ .Values.driver.nvidiaDriverCRD.driverType | default "gpu" }}
|
||||||
|
{{- if .Values.daemonsets.annotations }}
|
||||||
|
annotations: {{ toYaml .Values.daemonsets.annotations | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.daemonsets.labels }}
|
||||||
|
labels: {{ toYaml .Values.daemonsets.labels | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.nvidiaDriverCRD.nodeSelector }}
|
||||||
|
nodeSelector: {{ toYaml .Values.driver.nvidiaDriverCRD.nodeSelector | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.driver.imagePullSecrets | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.manager }}
|
||||||
|
manager: {{ toYaml .Values.driver.manager | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.startupProbe }}
|
||||||
|
startupProbe: {{ toYaml .Values.driver.startupProbe | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.livenessProbe }}
|
||||||
|
livenessProbe: {{ toYaml .Values.driver.livenessProbe | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.readinessProbe }}
|
||||||
|
readinessProbe: {{ toYaml .Values.driver.readinessProbe | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
rdma:
|
||||||
|
enabled: {{ .Values.driver.rdma.enabled }}
|
||||||
|
useHostMofed: {{ .Values.driver.rdma.useHostMofed }}
|
||||||
|
{{- if .Values.daemonsets.tolerations }}
|
||||||
|
tolerations: {{ toYaml .Values.daemonsets.tolerations | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.repoConfig.configMapName }}
|
||||||
|
repoConfig:
|
||||||
|
name: {{ .Values.driver.repoConfig.configMapName }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.certConfig.name }}
|
||||||
|
certConfig:
|
||||||
|
name: {{ .Values.driver.certConfig.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.licensingConfig.configMapName }}
|
||||||
|
licensingConfig:
|
||||||
|
name: {{ .Values.driver.licensingConfig.configMapName }}
|
||||||
|
nlsEnabled: {{ .Values.driver.licensingConfig.nlsEnabled | default true }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.virtualTopology.config }}
|
||||||
|
virtualTopologyConfig:
|
||||||
|
name: {{ .Values.driver.virtualTopology.config }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.kernelModuleConfig.name }}
|
||||||
|
kernelModuleConfig:
|
||||||
|
name: {{ .Values.driver.kernelModuleConfig.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.resources }}
|
||||||
|
resources: {{ toYaml .Values.driver.resources | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.env }}
|
||||||
|
env: {{ toYaml .Values.driver.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.driver.args }}
|
||||||
|
args: {{ toYaml .Values.driver.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gds.enabled }}
|
||||||
|
gds:
|
||||||
|
enabled: {{ .Values.gds.enabled }}
|
||||||
|
{{- if .Values.gds.repository }}
|
||||||
|
repository: {{ .Values.gds.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gds.image }}
|
||||||
|
image: {{ .Values.gds.image }}
|
||||||
|
{{- end }}
|
||||||
|
version: {{ .Values.gds.version | quote }}
|
||||||
|
{{- if .Values.gds.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.gds.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gds.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.gds.imagePullSecrets | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gds.env }}
|
||||||
|
env: {{ toYaml .Values.gds.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gds.args }}
|
||||||
|
args: {{ toYaml .Values.gds.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gdrcopy }}
|
||||||
|
gdrcopy:
|
||||||
|
enabled: {{ .Values.gdrcopy.enabled | default false }}
|
||||||
|
{{- if .Values.gdrcopy.repository }}
|
||||||
|
repository: {{ .Values.gdrcopy.repository }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gdrcopy.image }}
|
||||||
|
image: {{ .Values.gdrcopy.image }}
|
||||||
|
{{- end }}
|
||||||
|
version: {{ .Values.gdrcopy.version | quote }}
|
||||||
|
{{- if .Values.gdrcopy.imagePullPolicy }}
|
||||||
|
imagePullPolicy: {{ .Values.gdrcopy.imagePullPolicy }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gdrcopy.imagePullSecrets }}
|
||||||
|
imagePullSecrets: {{ toYaml .Values.gdrcopy.imagePullSecrets | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gdrcopy.env }}
|
||||||
|
env: {{ toYaml .Values.gdrcopy.env | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.gdrcopy.args }}
|
||||||
|
args: {{ toYaml .Values.gdrcopy.args | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
99
charts/gpu-operator/templates/operator.yaml
Normal file
99
charts/gpu-operator/templates/operator.yaml
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
nvidia.com/gpu-driver-upgrade-drain.skip: "true"
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
app: "gpu-operator"
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 8 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
app: "gpu-operator"
|
||||||
|
nvidia.com/gpu-driver-upgrade-drain.skip: "true"
|
||||||
|
annotations:
|
||||||
|
{{- toYaml .Values.operator.annotations | nindent 8 }}
|
||||||
|
spec:
|
||||||
|
serviceAccountName: gpu-operator
|
||||||
|
{{- if .Values.operator.imagePullSecrets }}
|
||||||
|
imagePullSecrets:
|
||||||
|
{{- range .Values.operator.imagePullSecrets }}
|
||||||
|
- name: {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.operator.priorityClassName }}
|
||||||
|
priorityClassName: {{ .Values.operator.priorityClassName }}
|
||||||
|
{{- end }}
|
||||||
|
containers:
|
||||||
|
- name: gpu-operator
|
||||||
|
image: {{ include "gpu-operator.fullimage" . }}
|
||||||
|
imagePullPolicy: {{ .Values.operator.imagePullPolicy }}
|
||||||
|
command: ["gpu-operator"]
|
||||||
|
args:
|
||||||
|
- --leader-elect
|
||||||
|
{{- if .Values.operator.logging.develMode }}
|
||||||
|
- --zap-devel
|
||||||
|
{{- else }}
|
||||||
|
{{- if .Values.operator.logging.timeEncoding }}
|
||||||
|
- --zap-time-encoding={{- .Values.operator.logging.timeEncoding }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.operator.logging.level }}
|
||||||
|
- --zap-log-level={{- .Values.operator.logging.level }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
env:
|
||||||
|
- name: WATCH_NAMESPACE
|
||||||
|
value: ""
|
||||||
|
- name: OPERATOR_NAMESPACE
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.namespace
|
||||||
|
- name: "DRIVER_MANAGER_IMAGE"
|
||||||
|
value: "{{ include "driver-manager.fullimage" . }}"
|
||||||
|
volumeMounts:
|
||||||
|
- name: host-os-release
|
||||||
|
mountPath: "/host-etc/os-release"
|
||||||
|
readOnly: true
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8081
|
||||||
|
initialDelaySeconds: 15
|
||||||
|
periodSeconds: 20
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /readyz
|
||||||
|
port: 8081
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
{{- with .Values.operator.resources }}
|
||||||
|
resources:
|
||||||
|
{{- toYaml . | nindent 10 }}
|
||||||
|
{{- end }}
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
containerPort: 8080
|
||||||
|
volumes:
|
||||||
|
- name: host-os-release
|
||||||
|
hostPath:
|
||||||
|
path: "/etc/os-release"
|
||||||
|
{{- with .Values.operator.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.operator.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.operator.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
11
charts/gpu-operator/templates/plugin_config.yaml
Normal file
11
charts/gpu-operator/templates/plugin_config.yaml
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
{{- if and (.Values.devicePlugin.config.create) (not (empty .Values.devicePlugin.config.data)) }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.devicePlugin.config.name }}
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
data: {{ toYaml .Values.devicePlugin.config.data | nindent 2 }}
|
||||||
|
{{- end }}
|
||||||
|
|
49
charts/gpu-operator/templates/readonlyfs_scc.openshift.yaml
Normal file
49
charts/gpu-operator/templates/readonlyfs_scc.openshift.yaml
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
{{- if .Values.platform.openshift }}
|
||||||
|
apiVersion: security.openshift.io/v1
|
||||||
|
kind: SecurityContextConstraints
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
annotations:
|
||||||
|
kubernetes.io/description: restricted denies access to all host features and requires
|
||||||
|
pods to be run with a UID, read-only root filesystem and SELinux context that are
|
||||||
|
allocated to the namespace. This SCC is more restrictive than the default
|
||||||
|
restrictive SCC and it is used by default for authenticated users and operators and operands.
|
||||||
|
name: restricted-readonly
|
||||||
|
allowHostDirVolumePlugin: false
|
||||||
|
allowHostIPC: false
|
||||||
|
allowHostNetwork: false
|
||||||
|
allowHostPID: false
|
||||||
|
allowHostPorts: false
|
||||||
|
allowPrivilegeEscalation: true
|
||||||
|
allowPrivilegedContainer: false
|
||||||
|
allowedCapabilities: []
|
||||||
|
defaultAddCapabilities: []
|
||||||
|
fsGroup:
|
||||||
|
type: MustRunAs
|
||||||
|
groups:
|
||||||
|
- system:authenticated
|
||||||
|
priority: 0
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
requiredDropCapabilities:
|
||||||
|
- KILL
|
||||||
|
- MKNOD
|
||||||
|
- SETUID
|
||||||
|
- SETGID
|
||||||
|
runAsUser:
|
||||||
|
type: MustRunAsRange
|
||||||
|
seLinuxContext:
|
||||||
|
type: MustRunAs
|
||||||
|
supplementalGroups:
|
||||||
|
type: RunAsAny
|
||||||
|
users:
|
||||||
|
- system:serviceaccount:{{ $.Release.Namespace }}:gpu-operator
|
||||||
|
volumes:
|
||||||
|
- configMap
|
||||||
|
- downwardAPI
|
||||||
|
- emptyDir
|
||||||
|
- persistentVolumeClaim
|
||||||
|
- projected
|
||||||
|
- secret
|
||||||
|
{{- end }}
|
84
charts/gpu-operator/templates/role.yaml
Normal file
84
charts/gpu-operator/templates/role.yaml
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- rbac.authorization.k8s.io
|
||||||
|
resources:
|
||||||
|
- roles
|
||||||
|
- rolebindings
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- apps
|
||||||
|
resources:
|
||||||
|
- controllerrevisions
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- apps
|
||||||
|
resources:
|
||||||
|
- daemonsets
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
- endpoints
|
||||||
|
- pods
|
||||||
|
- pods/eviction
|
||||||
|
- secrets
|
||||||
|
- services
|
||||||
|
- services/finalizers
|
||||||
|
- serviceaccounts
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- coordination.k8s.io
|
||||||
|
resources:
|
||||||
|
- leases
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- monitoring.coreos.com
|
||||||
|
resources:
|
||||||
|
- servicemonitors
|
||||||
|
- prometheusrules
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- create
|
||||||
|
- watch
|
||||||
|
- update
|
||||||
|
- delete
|
15
charts/gpu-operator/templates/rolebinding.yaml
Normal file
15
charts/gpu-operator/templates/rolebinding.yaml
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
kind: RoleBinding
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: gpu-operator
|
||||||
|
namespace: {{ $.Release.Namespace }}
|
||||||
|
roleRef:
|
||||||
|
kind: Role
|
||||||
|
name: gpu-operator
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
7
charts/gpu-operator/templates/serviceaccount.yaml
Normal file
7
charts/gpu-operator/templates/serviceaccount.yaml
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
95
charts/gpu-operator/templates/upgrade_crd.yaml
Normal file
95
charts/gpu-operator/templates/upgrade_crd.yaml
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
{{- if .Values.operator.upgradeCRD }}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator-upgrade-crd-hook-sa
|
||||||
|
annotations:
|
||||||
|
helm.sh/hook: pre-upgrade
|
||||||
|
helm.sh/hook-delete-policy: hook-succeeded,before-hook-creation
|
||||||
|
helm.sh/hook-weight: "0"
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator-upgrade-crd-hook-role
|
||||||
|
annotations:
|
||||||
|
helm.sh/hook: pre-upgrade
|
||||||
|
helm.sh/hook-delete-policy: hook-succeeded,before-hook-creation
|
||||||
|
helm.sh/hook-weight: "0"
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- apiextensions.k8s.io
|
||||||
|
resources:
|
||||||
|
- customresourcedefinitions
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
---
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator-upgrade-crd-hook-binding
|
||||||
|
annotations:
|
||||||
|
helm.sh/hook: pre-upgrade
|
||||||
|
helm.sh/hook-delete-policy: hook-succeeded,before-hook-creation
|
||||||
|
helm.sh/hook-weight: "0"
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: gpu-operator-upgrade-crd-hook-sa
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
roleRef:
|
||||||
|
kind: ClusterRole
|
||||||
|
name: gpu-operator-upgrade-crd-hook-role
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
---
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator-upgrade-crd
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
annotations:
|
||||||
|
"helm.sh/hook": pre-upgrade
|
||||||
|
"helm.sh/hook-weight": "1"
|
||||||
|
"helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
name: gpu-operator-upgrade-crd
|
||||||
|
labels:
|
||||||
|
{{- include "gpu-operator.labels" . | nindent 8 }}
|
||||||
|
app.kubernetes.io/component: "gpu-operator"
|
||||||
|
spec:
|
||||||
|
serviceAccountName: gpu-operator-upgrade-crd-hook-sa
|
||||||
|
{{- if .Values.operator.imagePullSecrets }}
|
||||||
|
imagePullSecrets:
|
||||||
|
{{- range .Values.operator.imagePullSecrets }}
|
||||||
|
- name: {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.operator.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
containers:
|
||||||
|
- name: upgrade-crd
|
||||||
|
image: {{ include "gpu-operator.fullimage" . }}
|
||||||
|
imagePullPolicy: {{ .Values.operator.imagePullPolicy }}
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
- >
|
||||||
|
kubectl apply -f /opt/gpu-operator/nvidia.com_clusterpolicies.yaml;
|
||||||
|
kubectl apply -f /opt/gpu-operator/nvidia.com_nvidiadrivers.yaml;
|
||||||
|
{{- if .Values.nfd.enabled }}
|
||||||
|
kubectl apply -f /opt/gpu-operator/nfd-api-crds.yaml;
|
||||||
|
{{- end }}
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
{{- end }}
|
15
charts/gpu-operator/values.yaml
Normal file
15
charts/gpu-operator/values.yaml
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
toolkit:
|
||||||
|
env:
|
||||||
|
- name: CONTAINERD_CONFIG
|
||||||
|
value: "/etc/containerd/config.toml.tmpl"
|
||||||
|
- name: CONTAINERD_SOCKET
|
||||||
|
value: "/run/k3s/containerd/containerd.sock"
|
||||||
|
- name: CONTAINERD_RUNTIME_CLASS
|
||||||
|
value: "nvidia"
|
||||||
|
- name: CONTAINERD_SET_AS_DEFAULT
|
||||||
|
value: "true"
|
||||||
|
|
||||||
|
devicePlugin:
|
||||||
|
config:
|
||||||
|
name: time-slicing-config-all
|
||||||
|
default: any
|
602
charts/gpu-operator/values.yaml.bk
Normal file
602
charts/gpu-operator/values.yaml.bk
Normal file
|
@ -0,0 +1,602 @@
|
||||||
|
# Default values for gpu-operator.
|
||||||
|
# This is a YAML-formatted file.
|
||||||
|
# Declare variables to be passed into your templates.
|
||||||
|
|
||||||
|
platform:
|
||||||
|
openshift: false
|
||||||
|
|
||||||
|
nfd:
|
||||||
|
enabled: true
|
||||||
|
nodefeaturerules: false
|
||||||
|
|
||||||
|
psa:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
cdi:
|
||||||
|
enabled: false
|
||||||
|
default: false
|
||||||
|
|
||||||
|
sandboxWorkloads:
|
||||||
|
enabled: false
|
||||||
|
defaultWorkload: "container"
|
||||||
|
|
||||||
|
hostPaths:
|
||||||
|
# rootFS represents the path to the root filesystem of the host.
|
||||||
|
# This is used by components that need to interact with the host filesystem
|
||||||
|
# and as such this must be a chroot-able filesystem.
|
||||||
|
# Examples include the MIG Manager and Toolkit Container which may need to
|
||||||
|
# stop, start, or restart systemd services
|
||||||
|
rootFS: "/"
|
||||||
|
|
||||||
|
# driverInstallDir represents the root at which driver files including libraries,
|
||||||
|
# config files, and executables can be found.
|
||||||
|
driverInstallDir: "/run/nvidia/driver"
|
||||||
|
|
||||||
|
daemonsets:
|
||||||
|
labels: {}
|
||||||
|
annotations: {}
|
||||||
|
priorityClassName: system-node-critical
|
||||||
|
tolerations:
|
||||||
|
- key: nvidia.com/gpu
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
# configuration for controlling update strategy("OnDelete" or "RollingUpdate") of GPU Operands
|
||||||
|
# note that driver Daemonset is always set with OnDelete to avoid unintended disruptions
|
||||||
|
updateStrategy: "RollingUpdate"
|
||||||
|
# configuration for controlling rolling update of GPU Operands
|
||||||
|
rollingUpdate:
|
||||||
|
# maximum number of nodes to simultaneously apply pod updates on.
|
||||||
|
# can be specified either as number or percentage of nodes. Default 1.
|
||||||
|
maxUnavailable: "1"
|
||||||
|
|
||||||
|
validator:
|
||||||
|
repository: nvcr.io/nvidia/cloud-native
|
||||||
|
image: gpu-operator-validator
|
||||||
|
# If version is not specified, then default is to use chart.AppVersion
|
||||||
|
#version: ""
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
env: []
|
||||||
|
args: []
|
||||||
|
resources: {}
|
||||||
|
plugin:
|
||||||
|
env:
|
||||||
|
- name: WITH_WORKLOAD
|
||||||
|
value: "false"
|
||||||
|
|
||||||
|
operator:
|
||||||
|
repository: nvcr.io/nvidia
|
||||||
|
image: gpu-operator
|
||||||
|
# If version is not specified, then default is to use chart.AppVersion
|
||||||
|
#version: ""
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
priorityClassName: system-node-critical
|
||||||
|
defaultRuntime: docker
|
||||||
|
runtimeClass: nvidia
|
||||||
|
use_ocp_driver_toolkit: false
|
||||||
|
# cleanup CRD on chart un-install
|
||||||
|
cleanupCRD: false
|
||||||
|
# upgrade CRD on chart upgrade, requires --disable-openapi-validation flag
|
||||||
|
# to be passed during helm upgrade.
|
||||||
|
upgradeCRD: true
|
||||||
|
initContainer:
|
||||||
|
image: cuda
|
||||||
|
repository: nvcr.io/nvidia
|
||||||
|
version: 12.6.3-base-ubi9
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
tolerations:
|
||||||
|
- key: "node-role.kubernetes.io/master"
|
||||||
|
operator: "Equal"
|
||||||
|
value: ""
|
||||||
|
effect: "NoSchedule"
|
||||||
|
- key: "node-role.kubernetes.io/control-plane"
|
||||||
|
operator: "Equal"
|
||||||
|
value: ""
|
||||||
|
effect: "NoSchedule"
|
||||||
|
annotations:
|
||||||
|
openshift.io/scc: restricted-readonly
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
preferredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
- weight: 1
|
||||||
|
preference:
|
||||||
|
matchExpressions:
|
||||||
|
- key: "node-role.kubernetes.io/master"
|
||||||
|
operator: In
|
||||||
|
values: [""]
|
||||||
|
- weight: 1
|
||||||
|
preference:
|
||||||
|
matchExpressions:
|
||||||
|
- key: "node-role.kubernetes.io/control-plane"
|
||||||
|
operator: In
|
||||||
|
values: [""]
|
||||||
|
logging:
|
||||||
|
# Zap time encoding (one of 'epoch', 'millis', 'nano', 'iso8601', 'rfc3339' or 'rfc3339nano')
|
||||||
|
timeEncoding: epoch
|
||||||
|
# Zap Level to configure the verbosity of logging. Can be one of 'debug', 'info', 'error', or any integer value > 0 which corresponds to custom debug levels of increasing verbosity
|
||||||
|
level: info
|
||||||
|
# Development Mode defaults(encoder=consoleEncoder,logLevel=Debug,stackTraceLevel=Warn)
|
||||||
|
# Production Mode defaults(encoder=jsonEncoder,logLevel=Info,stackTraceLevel=Error)
|
||||||
|
develMode: false
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 350Mi
|
||||||
|
requests:
|
||||||
|
cpu: 200m
|
||||||
|
memory: 100Mi
|
||||||
|
|
||||||
|
mig:
|
||||||
|
strategy: single
|
||||||
|
|
||||||
|
driver:
|
||||||
|
enabled: true
|
||||||
|
nvidiaDriverCRD:
|
||||||
|
enabled: false
|
||||||
|
deployDefaultCR: true
|
||||||
|
driverType: gpu
|
||||||
|
nodeSelector: {}
|
||||||
|
useOpenKernelModules: false
|
||||||
|
# use pre-compiled packages for NVIDIA driver installation.
|
||||||
|
# only supported for as a tech-preview feature on ubuntu22.04 kernels.
|
||||||
|
usePrecompiled: false
|
||||||
|
repository: nvcr.io/nvidia
|
||||||
|
image: driver
|
||||||
|
version: "550.127.08"
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
startupProbe:
|
||||||
|
initialDelaySeconds: 60
|
||||||
|
periodSeconds: 10
|
||||||
|
# nvidia-smi can take longer than 30s in some cases
|
||||||
|
# ensure enough timeout is set
|
||||||
|
timeoutSeconds: 60
|
||||||
|
failureThreshold: 120
|
||||||
|
rdma:
|
||||||
|
enabled: false
|
||||||
|
useHostMofed: false
|
||||||
|
upgradePolicy:
|
||||||
|
# global switch for automatic upgrade feature
|
||||||
|
# if set to false all other options are ignored
|
||||||
|
autoUpgrade: true
|
||||||
|
# how many nodes can be upgraded in parallel
|
||||||
|
# 0 means no limit, all nodes will be upgraded in parallel
|
||||||
|
maxParallelUpgrades: 1
|
||||||
|
# maximum number of nodes with the driver installed, that can be unavailable during
|
||||||
|
# the upgrade. Value can be an absolute number (ex: 5) or
|
||||||
|
# a percentage of total nodes at the start of upgrade (ex:
|
||||||
|
# 10%). Absolute number is calculated from percentage by rounding
|
||||||
|
# up. By default, a fixed value of 25% is used.'
|
||||||
|
maxUnavailable: 25%
|
||||||
|
# options for waiting on pod(job) completions
|
||||||
|
waitForCompletion:
|
||||||
|
timeoutSeconds: 0
|
||||||
|
podSelector: ""
|
||||||
|
# options for gpu pod deletion
|
||||||
|
gpuPodDeletion:
|
||||||
|
force: false
|
||||||
|
timeoutSeconds: 300
|
||||||
|
deleteEmptyDir: false
|
||||||
|
# options for node drain (`kubectl drain`) before the driver reload
|
||||||
|
# this is required only if default GPU pod deletions done by the operator
|
||||||
|
# are not sufficient to re-install the driver
|
||||||
|
drain:
|
||||||
|
enable: false
|
||||||
|
force: false
|
||||||
|
podSelector: ""
|
||||||
|
# It's recommended to set a timeout to avoid infinite drain in case non-fatal error keeps happening on retries
|
||||||
|
timeoutSeconds: 300
|
||||||
|
deleteEmptyDir: false
|
||||||
|
manager:
|
||||||
|
image: k8s-driver-manager
|
||||||
|
repository: nvcr.io/nvidia/cloud-native
|
||||||
|
# When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4
|
||||||
|
# to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0
|
||||||
|
version: v0.7.0
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: ENABLE_GPU_POD_EVICTION
|
||||||
|
value: "true"
|
||||||
|
- name: ENABLE_AUTO_DRAIN
|
||||||
|
value: "false"
|
||||||
|
- name: DRAIN_USE_FORCE
|
||||||
|
value: "false"
|
||||||
|
- name: DRAIN_POD_SELECTOR_LABEL
|
||||||
|
value: ""
|
||||||
|
- name: DRAIN_TIMEOUT_SECONDS
|
||||||
|
value: "0s"
|
||||||
|
- name: DRAIN_DELETE_EMPTYDIR_DATA
|
||||||
|
value: "false"
|
||||||
|
env: []
|
||||||
|
resources: {}
|
||||||
|
# Private mirror repository configuration
|
||||||
|
repoConfig:
|
||||||
|
configMapName: ""
|
||||||
|
# custom ssl key/certificate configuration
|
||||||
|
certConfig:
|
||||||
|
name: ""
|
||||||
|
# vGPU licensing configuration
|
||||||
|
licensingConfig:
|
||||||
|
configMapName: ""
|
||||||
|
nlsEnabled: true
|
||||||
|
# vGPU topology daemon configuration
|
||||||
|
virtualTopology:
|
||||||
|
config: ""
|
||||||
|
# kernel module configuration for NVIDIA driver
|
||||||
|
kernelModuleConfig:
|
||||||
|
name: ""
|
||||||
|
|
||||||
|
toolkit:
|
||||||
|
enabled: true
|
||||||
|
repository: nvcr.io/nvidia/k8s
|
||||||
|
image: container-toolkit
|
||||||
|
version: v1.17.3-ubuntu20.04
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
env: []
|
||||||
|
resources: {}
|
||||||
|
installDir: "/usr/local/nvidia"
|
||||||
|
|
||||||
|
devicePlugin:
|
||||||
|
enabled: true
|
||||||
|
repository: nvcr.io/nvidia
|
||||||
|
image: k8s-device-plugin
|
||||||
|
version: v0.17.0
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
args: []
|
||||||
|
env:
|
||||||
|
- name: PASS_DEVICE_SPECS
|
||||||
|
value: "true"
|
||||||
|
- name: FAIL_ON_INIT_ERROR
|
||||||
|
value: "true"
|
||||||
|
- name: DEVICE_LIST_STRATEGY
|
||||||
|
value: envvar
|
||||||
|
- name: DEVICE_ID_STRATEGY
|
||||||
|
value: uuid
|
||||||
|
- name: NVIDIA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||||
|
value: all
|
||||||
|
resources: {}
|
||||||
|
# Plugin configuration
|
||||||
|
# Use "name" to either point to an existing ConfigMap or to create a new one with a list of configurations(i.e with create=true).
|
||||||
|
# Use "data" to build an integrated ConfigMap from a set of configurations as
|
||||||
|
# part of this helm chart. An example of setting "data" might be:
|
||||||
|
# config:
|
||||||
|
# name: device-plugin-config
|
||||||
|
# create: true
|
||||||
|
# data:
|
||||||
|
# default: |-
|
||||||
|
# version: v1
|
||||||
|
# flags:
|
||||||
|
# migStrategy: none
|
||||||
|
# mig-single: |-
|
||||||
|
# version: v1
|
||||||
|
# flags:
|
||||||
|
# migStrategy: single
|
||||||
|
# mig-mixed: |-
|
||||||
|
# version: v1
|
||||||
|
# flags:
|
||||||
|
# migStrategy: mixed
|
||||||
|
config:
|
||||||
|
# Create a ConfigMap (default: false)
|
||||||
|
create: false
|
||||||
|
# ConfigMap name (either existing or to create a new one with create=true above)
|
||||||
|
name: ""
|
||||||
|
# Default config name within the ConfigMap
|
||||||
|
default: ""
|
||||||
|
# Data section for the ConfigMap to create (i.e only applies when create=true)
|
||||||
|
data: {}
|
||||||
|
# MPS related configuration for the plugin
|
||||||
|
mps:
|
||||||
|
# MPS root path on the host
|
||||||
|
root: "/run/nvidia/mps"
|
||||||
|
|
||||||
|
# standalone dcgm hostengine
|
||||||
|
dcgm:
|
||||||
|
# disabled by default to use embedded nv-hostengine by exporter
|
||||||
|
enabled: false
|
||||||
|
repository: nvcr.io/nvidia/cloud-native
|
||||||
|
image: dcgm
|
||||||
|
version: 3.3.9-1-ubuntu22.04
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
args: []
|
||||||
|
env: []
|
||||||
|
resources: {}
|
||||||
|
|
||||||
|
dcgmExporter:
|
||||||
|
enabled: true
|
||||||
|
repository: nvcr.io/nvidia/k8s
|
||||||
|
image: dcgm-exporter
|
||||||
|
version: 3.3.9-3.6.1-ubuntu22.04
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: DCGM_EXPORTER_LISTEN
|
||||||
|
value: ":9400"
|
||||||
|
- name: DCGM_EXPORTER_KUBERNETES
|
||||||
|
value: "true"
|
||||||
|
- name: DCGM_EXPORTER_COLLECTORS
|
||||||
|
value: "/etc/dcgm-exporter/dcp-metrics-included.csv"
|
||||||
|
resources: {}
|
||||||
|
serviceMonitor:
|
||||||
|
enabled: false
|
||||||
|
interval: 15s
|
||||||
|
honorLabels: false
|
||||||
|
additionalLabels: {}
|
||||||
|
relabelings: []
|
||||||
|
# - source_labels:
|
||||||
|
# - __meta_kubernetes_pod_node_name
|
||||||
|
# regex: (.*)
|
||||||
|
# target_label: instance
|
||||||
|
# replacement: $1
|
||||||
|
# action: replace
|
||||||
|
# DCGM Exporter configuration
|
||||||
|
# This block is used to configure DCGM Exporter to emit a customized list of metrics.
|
||||||
|
# Use "name" to either point to an existing ConfigMap or to create a new one with a
|
||||||
|
# list of configurations (i.e with create=true).
|
||||||
|
# When pointing to an existing ConfigMap, the ConfigMap must exist in the same namespace as the release.
|
||||||
|
# The metrics are expected to be listed under a key called `dcgm-metrics.csv`.
|
||||||
|
# Use "data" to build an integrated ConfigMap from a set of custom metrics as
|
||||||
|
# part of the chart. An example of some custom metrics are shown below. Note that
|
||||||
|
# the contents of "data" must be in CSV format and be valid DCGM Exporter metric configurations.
|
||||||
|
# config:
|
||||||
|
# name: custom-dcgm-exporter-metrics
|
||||||
|
# create: true
|
||||||
|
# data: |-
|
||||||
|
# Format
|
||||||
|
# If line starts with a '#' it is considered a comment
|
||||||
|
# DCGM FIELD, Prometheus metric type, help message
|
||||||
|
|
||||||
|
# Clocks
|
||||||
|
# DCGM_FI_DEV_SM_CLOCK, gauge, SM clock frequency (in MHz).
|
||||||
|
# DCGM_FI_DEV_MEM_CLOCK, gauge, Memory clock frequency (in MHz).
|
||||||
|
gfd:
|
||||||
|
enabled: true
|
||||||
|
repository: nvcr.io/nvidia
|
||||||
|
image: k8s-device-plugin
|
||||||
|
version: v0.17.0
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
env:
|
||||||
|
- name: GFD_SLEEP_INTERVAL
|
||||||
|
value: 60s
|
||||||
|
- name: GFD_FAIL_ON_INIT_ERROR
|
||||||
|
value: "true"
|
||||||
|
resources: {}
|
||||||
|
|
||||||
|
migManager:
|
||||||
|
enabled: true
|
||||||
|
repository: nvcr.io/nvidia/cloud-native
|
||||||
|
image: k8s-mig-manager
|
||||||
|
version: v0.10.0-ubuntu20.04
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
env:
|
||||||
|
- name: WITH_REBOOT
|
||||||
|
value: "false"
|
||||||
|
resources: {}
|
||||||
|
# MIG configuration
|
||||||
|
# Use "name" to either point to an existing ConfigMap or to create a new one with a list of configurations(i.e with create=true).
|
||||||
|
# Use "data" to build an integrated ConfigMap from a set of configurations as
|
||||||
|
# part of this helm chart. An example of setting "data" might be:
|
||||||
|
# config:
|
||||||
|
# name: custom-mig-parted-configs
|
||||||
|
# create: true
|
||||||
|
# data: |-
|
||||||
|
# config.yaml: |-
|
||||||
|
# version: v1
|
||||||
|
# mig-configs:
|
||||||
|
# all-disabled:
|
||||||
|
# - devices: all
|
||||||
|
# mig-enabled: false
|
||||||
|
# custom-mig:
|
||||||
|
# - devices: [0]
|
||||||
|
# mig-enabled: false
|
||||||
|
# - devices: [1]
|
||||||
|
# mig-enabled: true
|
||||||
|
# mig-devices:
|
||||||
|
# "1g.10gb": 7
|
||||||
|
# - devices: [2]
|
||||||
|
# mig-enabled: true
|
||||||
|
# mig-devices:
|
||||||
|
# "2g.20gb": 2
|
||||||
|
# "3g.40gb": 1
|
||||||
|
# - devices: [3]
|
||||||
|
# mig-enabled: true
|
||||||
|
# mig-devices:
|
||||||
|
# "3g.40gb": 1
|
||||||
|
# "4g.40gb": 1
|
||||||
|
config:
|
||||||
|
default: "all-disabled"
|
||||||
|
# Create a ConfigMap (default: false)
|
||||||
|
create: false
|
||||||
|
# ConfigMap name (either existing or to create a new one with create=true above)
|
||||||
|
name: ""
|
||||||
|
# Data section for the ConfigMap to create (i.e only applies when create=true)
|
||||||
|
data: {}
|
||||||
|
gpuClientsConfig:
|
||||||
|
name: ""
|
||||||
|
|
||||||
|
nodeStatusExporter:
|
||||||
|
enabled: false
|
||||||
|
repository: nvcr.io/nvidia/cloud-native
|
||||||
|
image: gpu-operator-validator
|
||||||
|
# If version is not specified, then default is to use chart.AppVersion
|
||||||
|
#version: ""
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
resources: {}
|
||||||
|
|
||||||
|
gds:
|
||||||
|
enabled: false
|
||||||
|
repository: nvcr.io/nvidia/cloud-native
|
||||||
|
image: nvidia-fs
|
||||||
|
version: "2.20.5"
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
env: []
|
||||||
|
args: []
|
||||||
|
|
||||||
|
gdrcopy:
|
||||||
|
enabled: false
|
||||||
|
repository: nvcr.io/nvidia/cloud-native
|
||||||
|
image: gdrdrv
|
||||||
|
version: "v2.4.1-2"
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
env: []
|
||||||
|
args: []
|
||||||
|
|
||||||
|
vgpuManager:
|
||||||
|
enabled: false
|
||||||
|
repository: ""
|
||||||
|
image: vgpu-manager
|
||||||
|
version: ""
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
env: []
|
||||||
|
resources: {}
|
||||||
|
driverManager:
|
||||||
|
image: k8s-driver-manager
|
||||||
|
repository: nvcr.io/nvidia/cloud-native
|
||||||
|
# When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4
|
||||||
|
# to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0
|
||||||
|
version: v0.7.0
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: ENABLE_GPU_POD_EVICTION
|
||||||
|
value: "false"
|
||||||
|
- name: ENABLE_AUTO_DRAIN
|
||||||
|
value: "false"
|
||||||
|
|
||||||
|
vgpuDeviceManager:
|
||||||
|
enabled: true
|
||||||
|
repository: nvcr.io/nvidia/cloud-native
|
||||||
|
image: vgpu-device-manager
|
||||||
|
version: v0.2.8
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
env: []
|
||||||
|
config:
|
||||||
|
name: ""
|
||||||
|
default: "default"
|
||||||
|
|
||||||
|
vfioManager:
|
||||||
|
enabled: true
|
||||||
|
repository: nvcr.io/nvidia
|
||||||
|
image: cuda
|
||||||
|
version: 12.6.3-base-ubi9
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
env: []
|
||||||
|
resources: {}
|
||||||
|
driverManager:
|
||||||
|
image: k8s-driver-manager
|
||||||
|
repository: nvcr.io/nvidia/cloud-native
|
||||||
|
# When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4
|
||||||
|
# to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0
|
||||||
|
version: v0.7.0
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: ENABLE_GPU_POD_EVICTION
|
||||||
|
value: "false"
|
||||||
|
- name: ENABLE_AUTO_DRAIN
|
||||||
|
value: "false"
|
||||||
|
|
||||||
|
kataManager:
|
||||||
|
enabled: false
|
||||||
|
config:
|
||||||
|
artifactsDir: "/opt/nvidia-gpu-operator/artifacts/runtimeclasses"
|
||||||
|
runtimeClasses:
|
||||||
|
- name: kata-nvidia-gpu
|
||||||
|
nodeSelector: {}
|
||||||
|
artifacts:
|
||||||
|
url: nvcr.io/nvidia/cloud-native/kata-gpu-artifacts:ubuntu22.04-535.54.03
|
||||||
|
pullSecret: ""
|
||||||
|
- name: kata-nvidia-gpu-snp
|
||||||
|
nodeSelector:
|
||||||
|
"nvidia.com/cc.capable": "true"
|
||||||
|
artifacts:
|
||||||
|
url: nvcr.io/nvidia/cloud-native/kata-gpu-artifacts:ubuntu22.04-535.86.10-snp
|
||||||
|
pullSecret: ""
|
||||||
|
repository: nvcr.io/nvidia/cloud-native
|
||||||
|
image: k8s-kata-manager
|
||||||
|
version: v0.2.2
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
env: []
|
||||||
|
resources: {}
|
||||||
|
|
||||||
|
sandboxDevicePlugin:
|
||||||
|
enabled: true
|
||||||
|
repository: nvcr.io/nvidia
|
||||||
|
image: kubevirt-gpu-device-plugin
|
||||||
|
version: v1.2.10
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
args: []
|
||||||
|
env: []
|
||||||
|
resources: {}
|
||||||
|
|
||||||
|
ccManager:
|
||||||
|
enabled: false
|
||||||
|
defaultMode: "off"
|
||||||
|
repository: nvcr.io/nvidia/cloud-native
|
||||||
|
image: k8s-cc-manager
|
||||||
|
version: v0.1.1
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
imagePullSecrets: []
|
||||||
|
env:
|
||||||
|
- name: CC_CAPABLE_DEVICE_IDS
|
||||||
|
value: "0x2339,0x2331,0x2330,0x2324,0x2322,0x233d"
|
||||||
|
resources: {}
|
||||||
|
|
||||||
|
node-feature-discovery:
|
||||||
|
enableNodeFeatureApi: true
|
||||||
|
priorityClassName: system-node-critical
|
||||||
|
gc:
|
||||||
|
enable: true
|
||||||
|
replicaCount: 1
|
||||||
|
serviceAccount:
|
||||||
|
name: node-feature-discovery
|
||||||
|
create: false
|
||||||
|
worker:
|
||||||
|
serviceAccount:
|
||||||
|
name: node-feature-discovery
|
||||||
|
# disable creation to avoid duplicate serviceaccount creation by master spec below
|
||||||
|
create: false
|
||||||
|
tolerations:
|
||||||
|
- key: "node-role.kubernetes.io/master"
|
||||||
|
operator: "Equal"
|
||||||
|
value: ""
|
||||||
|
effect: "NoSchedule"
|
||||||
|
- key: "node-role.kubernetes.io/control-plane"
|
||||||
|
operator: "Equal"
|
||||||
|
value: ""
|
||||||
|
effect: "NoSchedule"
|
||||||
|
- key: nvidia.com/gpu
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
config:
|
||||||
|
sources:
|
||||||
|
pci:
|
||||||
|
deviceClassWhitelist:
|
||||||
|
- "02"
|
||||||
|
- "0200"
|
||||||
|
- "0207"
|
||||||
|
- "0300"
|
||||||
|
- "0302"
|
||||||
|
deviceLabelFields:
|
||||||
|
- vendor
|
||||||
|
master:
|
||||||
|
serviceAccount:
|
||||||
|
name: node-feature-discovery
|
||||||
|
create: true
|
||||||
|
config:
|
||||||
|
extraLabelNs: ["nvidia.com"]
|
||||||
|
# noPublish: false
|
||||||
|
# resourceLabels: ["nvidia.com/feature-1","nvidia.com/feature-2"]
|
||||||
|
# enableTaints: false
|
||||||
|
# labelWhiteList: "nvidia.com/gpu"
|
15
resources/gpu-slice/configmap.yaml
Normal file
15
resources/gpu-slice/configmap.yaml
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: time-slicing-config-all
|
||||||
|
namespace: gpu-operator
|
||||||
|
data:
|
||||||
|
any: |-
|
||||||
|
version: v1
|
||||||
|
flags:
|
||||||
|
migStrategy: none
|
||||||
|
sharing:
|
||||||
|
timeSlicing:
|
||||||
|
resources:
|
||||||
|
- name: nvidia.com/gpu
|
||||||
|
replicas: 4
|
Loading…
Reference in a new issue