added gpu-operator
This commit is contained in:
parent
cb672d1f0d
commit
a2b2bd17c5
48 changed files with 8358 additions and 0 deletions
22
charts/gpu-operator/.helmignore
Normal file
22
charts/gpu-operator/.helmignore
Normal file
|
@ -0,0 +1,22 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
6
charts/gpu-operator/Chart.lock
Normal file
6
charts/gpu-operator/Chart.lock
Normal file
|
@ -0,0 +1,6 @@
|
|||
dependencies:
|
||||
- name: node-feature-discovery
|
||||
repository: https://kubernetes-sigs.github.io/node-feature-discovery/charts
|
||||
version: 0.16.6
|
||||
digest: sha256:e7b02cbdf9daff49892c0b74c50da2ed11e18eff2105a1b1abc9a8f2ebd8be47
|
||||
generated: "2024-10-31T07:12:50.141904-07:00"
|
23
charts/gpu-operator/Chart.yaml
Normal file
23
charts/gpu-operator/Chart.yaml
Normal file
|
@ -0,0 +1,23 @@
|
|||
apiVersion: v2
|
||||
appVersion: v24.9.1
|
||||
dependencies:
|
||||
- condition: nfd.enabled
|
||||
name: node-feature-discovery
|
||||
repository: https://kubernetes-sigs.github.io/node-feature-discovery/charts
|
||||
version: v0.16.6
|
||||
description: NVIDIA GPU Operator creates/configures/manages GPUs atop Kubernetes
|
||||
home: https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/overview.html
|
||||
icon: https://assets.nvidiagrid.net/ngc/logos/GPUoperator.png
|
||||
keywords:
|
||||
- gpu
|
||||
- cuda
|
||||
- compute
|
||||
- operator
|
||||
- deep learning
|
||||
- monitoring
|
||||
- tesla
|
||||
kubeVersion: '>= 1.16.0-0'
|
||||
name: gpu-operator
|
||||
sources:
|
||||
- https://github.com/NVIDIA/gpu-operator
|
||||
version: v24.9.1
|
|
@ -0,0 +1,23 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
14
charts/gpu-operator/charts/node-feature-discovery/Chart.yaml
Normal file
14
charts/gpu-operator/charts/node-feature-discovery/Chart.yaml
Normal file
|
@ -0,0 +1,14 @@
|
|||
apiVersion: v2
|
||||
appVersion: v0.16.6
|
||||
description: 'Detects hardware features available on each node in a Kubernetes cluster,
|
||||
and advertises those features using node labels. '
|
||||
home: https://github.com/kubernetes-sigs/node-feature-discovery
|
||||
keywords:
|
||||
- feature-discovery
|
||||
- feature-detection
|
||||
- node-labels
|
||||
name: node-feature-discovery
|
||||
sources:
|
||||
- https://github.com/kubernetes-sigs/node-feature-discovery
|
||||
type: application
|
||||
version: 0.16.6
|
10
charts/gpu-operator/charts/node-feature-discovery/README.md
Normal file
10
charts/gpu-operator/charts/node-feature-discovery/README.md
Normal file
|
@ -0,0 +1,10 @@
|
|||
# Node Feature Discovery
|
||||
|
||||
Node Feature Discovery (NFD) is a Kubernetes add-on for detecting hardware
|
||||
features and system configuration. Detected features are advertised as node
|
||||
labels. NFD provides flexible configuration and extension points for a wide
|
||||
range of vendor and application specific node labeling needs.
|
||||
|
||||
See
|
||||
[NFD documentation](https://kubernetes-sigs.github.io/node-feature-discovery/v0.16/deployment/helm.html)
|
||||
for deployment instructions.
|
|
@ -0,0 +1,710 @@
|
|||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.14.0
|
||||
name: nodefeatures.nfd.k8s-sigs.io
|
||||
spec:
|
||||
group: nfd.k8s-sigs.io
|
||||
names:
|
||||
kind: NodeFeature
|
||||
listKind: NodeFeatureList
|
||||
plural: nodefeatures
|
||||
singular: nodefeature
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: |-
|
||||
NodeFeature resource holds the features discovered for one node in the
|
||||
cluster.
|
||||
properties:
|
||||
apiVersion:
|
||||
description: |-
|
||||
APIVersion defines the versioned schema of this representation of an object.
|
||||
Servers should convert recognized schemas to the latest internal value, and
|
||||
may reject unrecognized values.
|
||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||||
type: string
|
||||
kind:
|
||||
description: |-
|
||||
Kind is a string value representing the REST resource this object represents.
|
||||
Servers may infer this from the endpoint the client submits requests to.
|
||||
Cannot be updated.
|
||||
In CamelCase.
|
||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: Specification of the NodeFeature, containing features discovered
|
||||
for a node.
|
||||
properties:
|
||||
features:
|
||||
description: Features is the full "raw" features data that has been
|
||||
discovered.
|
||||
properties:
|
||||
attributes:
|
||||
additionalProperties:
|
||||
description: AttributeFeatureSet is a set of features having
|
||||
string value.
|
||||
properties:
|
||||
elements:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Individual features of the feature set.
|
||||
type: object
|
||||
required:
|
||||
- elements
|
||||
type: object
|
||||
description: Attributes contains all the attribute-type features
|
||||
of the node.
|
||||
type: object
|
||||
flags:
|
||||
additionalProperties:
|
||||
description: FlagFeatureSet is a set of simple features only
|
||||
containing names without values.
|
||||
properties:
|
||||
elements:
|
||||
additionalProperties:
|
||||
description: Nil is a dummy empty struct for protobuf
|
||||
compatibility
|
||||
type: object
|
||||
description: Individual features of the feature set.
|
||||
type: object
|
||||
required:
|
||||
- elements
|
||||
type: object
|
||||
description: Flags contains all the flag-type features of the
|
||||
node.
|
||||
type: object
|
||||
instances:
|
||||
additionalProperties:
|
||||
description: InstanceFeatureSet is a set of features each of
|
||||
which is an instance having multiple attributes.
|
||||
properties:
|
||||
elements:
|
||||
description: Individual features of the feature set.
|
||||
items:
|
||||
description: InstanceFeature represents one instance of
|
||||
a complex features, e.g. a device.
|
||||
properties:
|
||||
attributes:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Attributes of the instance feature.
|
||||
type: object
|
||||
required:
|
||||
- attributes
|
||||
type: object
|
||||
type: array
|
||||
required:
|
||||
- elements
|
||||
type: object
|
||||
description: Instances contains all the instance-type features
|
||||
of the node.
|
||||
type: object
|
||||
type: object
|
||||
labels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Labels is the set of node labels that are requested to
|
||||
be created.
|
||||
type: object
|
||||
type: object
|
||||
required:
|
||||
- spec
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.14.0
|
||||
name: nodefeaturegroups.nfd.k8s-sigs.io
|
||||
spec:
|
||||
group: nfd.k8s-sigs.io
|
||||
names:
|
||||
kind: NodeFeatureGroup
|
||||
listKind: NodeFeatureGroupList
|
||||
plural: nodefeaturegroups
|
||||
shortNames:
|
||||
- nfg
|
||||
singular: nodefeaturegroup
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: NodeFeatureGroup resource holds Node pools by featureGroup
|
||||
properties:
|
||||
apiVersion:
|
||||
description: |-
|
||||
APIVersion defines the versioned schema of this representation of an object.
|
||||
Servers should convert recognized schemas to the latest internal value, and
|
||||
may reject unrecognized values.
|
||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||||
type: string
|
||||
kind:
|
||||
description: |-
|
||||
Kind is a string value representing the REST resource this object represents.
|
||||
Servers may infer this from the endpoint the client submits requests to.
|
||||
Cannot be updated.
|
||||
In CamelCase.
|
||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: Spec defines the rules to be evaluated.
|
||||
properties:
|
||||
featureGroupRules:
|
||||
description: List of rules to evaluate to determine nodes that belong
|
||||
in this group.
|
||||
items:
|
||||
description: GroupRule defines a rule for nodegroup filtering.
|
||||
properties:
|
||||
matchAny:
|
||||
description: MatchAny specifies a list of matchers one of which
|
||||
must match.
|
||||
items:
|
||||
description: MatchAnyElem specifies one sub-matcher of MatchAny.
|
||||
properties:
|
||||
matchFeatures:
|
||||
description: MatchFeatures specifies a set of matcher
|
||||
terms all of which must match.
|
||||
items:
|
||||
description: |-
|
||||
FeatureMatcherTerm defines requirements against one feature set. All
|
||||
requirements (specified as MatchExpressions) are evaluated against each
|
||||
element in the feature set.
|
||||
properties:
|
||||
feature:
|
||||
description: Feature is the name of the feature
|
||||
set to match against.
|
||||
type: string
|
||||
matchExpressions:
|
||||
additionalProperties:
|
||||
description: |-
|
||||
MatchExpression specifies an expression to evaluate against a set of input
|
||||
values. It contains an operator that is applied when matching the input and
|
||||
an array of values that the operator evaluates the input against.
|
||||
properties:
|
||||
op:
|
||||
description: Op is the operator to be applied.
|
||||
enum:
|
||||
- In
|
||||
- NotIn
|
||||
- InRegexp
|
||||
- Exists
|
||||
- DoesNotExist
|
||||
- Gt
|
||||
- Lt
|
||||
- GtLt
|
||||
- IsTrue
|
||||
- IsFalse
|
||||
type: string
|
||||
value:
|
||||
description: |-
|
||||
Value is the list of values that the operand evaluates the input
|
||||
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||
In other cases Value should contain at least one element.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- op
|
||||
type: object
|
||||
description: |-
|
||||
MatchExpressions is the set of per-element expressions evaluated. These
|
||||
match against the value of the specified elements.
|
||||
type: object
|
||||
matchName:
|
||||
description: |-
|
||||
MatchName in an expression that is matched against the name of each
|
||||
element in the feature set.
|
||||
properties:
|
||||
op:
|
||||
description: Op is the operator to be applied.
|
||||
enum:
|
||||
- In
|
||||
- NotIn
|
||||
- InRegexp
|
||||
- Exists
|
||||
- DoesNotExist
|
||||
- Gt
|
||||
- Lt
|
||||
- GtLt
|
||||
- IsTrue
|
||||
- IsFalse
|
||||
type: string
|
||||
value:
|
||||
description: |-
|
||||
Value is the list of values that the operand evaluates the input
|
||||
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||
In other cases Value should contain at least one element.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- op
|
||||
type: object
|
||||
required:
|
||||
- feature
|
||||
type: object
|
||||
type: array
|
||||
required:
|
||||
- matchFeatures
|
||||
type: object
|
||||
type: array
|
||||
matchFeatures:
|
||||
description: MatchFeatures specifies a set of matcher terms
|
||||
all of which must match.
|
||||
items:
|
||||
description: |-
|
||||
FeatureMatcherTerm defines requirements against one feature set. All
|
||||
requirements (specified as MatchExpressions) are evaluated against each
|
||||
element in the feature set.
|
||||
properties:
|
||||
feature:
|
||||
description: Feature is the name of the feature set to
|
||||
match against.
|
||||
type: string
|
||||
matchExpressions:
|
||||
additionalProperties:
|
||||
description: |-
|
||||
MatchExpression specifies an expression to evaluate against a set of input
|
||||
values. It contains an operator that is applied when matching the input and
|
||||
an array of values that the operator evaluates the input against.
|
||||
properties:
|
||||
op:
|
||||
description: Op is the operator to be applied.
|
||||
enum:
|
||||
- In
|
||||
- NotIn
|
||||
- InRegexp
|
||||
- Exists
|
||||
- DoesNotExist
|
||||
- Gt
|
||||
- Lt
|
||||
- GtLt
|
||||
- IsTrue
|
||||
- IsFalse
|
||||
type: string
|
||||
value:
|
||||
description: |-
|
||||
Value is the list of values that the operand evaluates the input
|
||||
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||
In other cases Value should contain at least one element.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- op
|
||||
type: object
|
||||
description: |-
|
||||
MatchExpressions is the set of per-element expressions evaluated. These
|
||||
match against the value of the specified elements.
|
||||
type: object
|
||||
matchName:
|
||||
description: |-
|
||||
MatchName in an expression that is matched against the name of each
|
||||
element in the feature set.
|
||||
properties:
|
||||
op:
|
||||
description: Op is the operator to be applied.
|
||||
enum:
|
||||
- In
|
||||
- NotIn
|
||||
- InRegexp
|
||||
- Exists
|
||||
- DoesNotExist
|
||||
- Gt
|
||||
- Lt
|
||||
- GtLt
|
||||
- IsTrue
|
||||
- IsFalse
|
||||
type: string
|
||||
value:
|
||||
description: |-
|
||||
Value is the list of values that the operand evaluates the input
|
||||
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||
In other cases Value should contain at least one element.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- op
|
||||
type: object
|
||||
required:
|
||||
- feature
|
||||
type: object
|
||||
type: array
|
||||
name:
|
||||
description: Name of the rule.
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
type: object
|
||||
type: array
|
||||
required:
|
||||
- featureGroupRules
|
||||
type: object
|
||||
status:
|
||||
description: |-
|
||||
Status of the NodeFeatureGroup after the most recent evaluation of the
|
||||
specification.
|
||||
properties:
|
||||
nodes:
|
||||
description: Nodes is a list of FeatureGroupNode in the cluster that
|
||||
match the featureGroupRules
|
||||
items:
|
||||
properties:
|
||||
name:
|
||||
description: Name of the node.
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-map-keys:
|
||||
- name
|
||||
x-kubernetes-list-type: map
|
||||
type: object
|
||||
required:
|
||||
- spec
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
||||
subresources:
|
||||
status: {}
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.14.0
|
||||
name: nodefeaturerules.nfd.k8s-sigs.io
|
||||
spec:
|
||||
group: nfd.k8s-sigs.io
|
||||
names:
|
||||
kind: NodeFeatureRule
|
||||
listKind: NodeFeatureRuleList
|
||||
plural: nodefeaturerules
|
||||
shortNames:
|
||||
- nfr
|
||||
singular: nodefeaturerule
|
||||
scope: Cluster
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: |-
|
||||
NodeFeatureRule resource specifies a configuration for feature-based
|
||||
customization of node objects, such as node labeling.
|
||||
properties:
|
||||
apiVersion:
|
||||
description: |-
|
||||
APIVersion defines the versioned schema of this representation of an object.
|
||||
Servers should convert recognized schemas to the latest internal value, and
|
||||
may reject unrecognized values.
|
||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||||
type: string
|
||||
kind:
|
||||
description: |-
|
||||
Kind is a string value representing the REST resource this object represents.
|
||||
Servers may infer this from the endpoint the client submits requests to.
|
||||
Cannot be updated.
|
||||
In CamelCase.
|
||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: Spec defines the rules to be evaluated.
|
||||
properties:
|
||||
rules:
|
||||
description: Rules is a list of node customization rules.
|
||||
items:
|
||||
description: Rule defines a rule for node customization such as
|
||||
labeling.
|
||||
properties:
|
||||
annotations:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Annotations to create if the rule matches.
|
||||
type: object
|
||||
extendedResources:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: ExtendedResources to create if the rule matches.
|
||||
type: object
|
||||
labels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Labels to create if the rule matches.
|
||||
type: object
|
||||
labelsTemplate:
|
||||
description: |-
|
||||
LabelsTemplate specifies a template to expand for dynamically generating
|
||||
multiple labels. Data (after template expansion) must be keys with an
|
||||
optional value (<key>[=<value>]) separated by newlines.
|
||||
type: string
|
||||
matchAny:
|
||||
description: MatchAny specifies a list of matchers one of which
|
||||
must match.
|
||||
items:
|
||||
description: MatchAnyElem specifies one sub-matcher of MatchAny.
|
||||
properties:
|
||||
matchFeatures:
|
||||
description: MatchFeatures specifies a set of matcher
|
||||
terms all of which must match.
|
||||
items:
|
||||
description: |-
|
||||
FeatureMatcherTerm defines requirements against one feature set. All
|
||||
requirements (specified as MatchExpressions) are evaluated against each
|
||||
element in the feature set.
|
||||
properties:
|
||||
feature:
|
||||
description: Feature is the name of the feature
|
||||
set to match against.
|
||||
type: string
|
||||
matchExpressions:
|
||||
additionalProperties:
|
||||
description: |-
|
||||
MatchExpression specifies an expression to evaluate against a set of input
|
||||
values. It contains an operator that is applied when matching the input and
|
||||
an array of values that the operator evaluates the input against.
|
||||
properties:
|
||||
op:
|
||||
description: Op is the operator to be applied.
|
||||
enum:
|
||||
- In
|
||||
- NotIn
|
||||
- InRegexp
|
||||
- Exists
|
||||
- DoesNotExist
|
||||
- Gt
|
||||
- Lt
|
||||
- GtLt
|
||||
- IsTrue
|
||||
- IsFalse
|
||||
type: string
|
||||
value:
|
||||
description: |-
|
||||
Value is the list of values that the operand evaluates the input
|
||||
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||
In other cases Value should contain at least one element.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- op
|
||||
type: object
|
||||
description: |-
|
||||
MatchExpressions is the set of per-element expressions evaluated. These
|
||||
match against the value of the specified elements.
|
||||
type: object
|
||||
matchName:
|
||||
description: |-
|
||||
MatchName in an expression that is matched against the name of each
|
||||
element in the feature set.
|
||||
properties:
|
||||
op:
|
||||
description: Op is the operator to be applied.
|
||||
enum:
|
||||
- In
|
||||
- NotIn
|
||||
- InRegexp
|
||||
- Exists
|
||||
- DoesNotExist
|
||||
- Gt
|
||||
- Lt
|
||||
- GtLt
|
||||
- IsTrue
|
||||
- IsFalse
|
||||
type: string
|
||||
value:
|
||||
description: |-
|
||||
Value is the list of values that the operand evaluates the input
|
||||
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||
In other cases Value should contain at least one element.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- op
|
||||
type: object
|
||||
required:
|
||||
- feature
|
||||
type: object
|
||||
type: array
|
||||
required:
|
||||
- matchFeatures
|
||||
type: object
|
||||
type: array
|
||||
matchFeatures:
|
||||
description: MatchFeatures specifies a set of matcher terms
|
||||
all of which must match.
|
||||
items:
|
||||
description: |-
|
||||
FeatureMatcherTerm defines requirements against one feature set. All
|
||||
requirements (specified as MatchExpressions) are evaluated against each
|
||||
element in the feature set.
|
||||
properties:
|
||||
feature:
|
||||
description: Feature is the name of the feature set to
|
||||
match against.
|
||||
type: string
|
||||
matchExpressions:
|
||||
additionalProperties:
|
||||
description: |-
|
||||
MatchExpression specifies an expression to evaluate against a set of input
|
||||
values. It contains an operator that is applied when matching the input and
|
||||
an array of values that the operator evaluates the input against.
|
||||
properties:
|
||||
op:
|
||||
description: Op is the operator to be applied.
|
||||
enum:
|
||||
- In
|
||||
- NotIn
|
||||
- InRegexp
|
||||
- Exists
|
||||
- DoesNotExist
|
||||
- Gt
|
||||
- Lt
|
||||
- GtLt
|
||||
- IsTrue
|
||||
- IsFalse
|
||||
type: string
|
||||
value:
|
||||
description: |-
|
||||
Value is the list of values that the operand evaluates the input
|
||||
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||
In other cases Value should contain at least one element.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- op
|
||||
type: object
|
||||
description: |-
|
||||
MatchExpressions is the set of per-element expressions evaluated. These
|
||||
match against the value of the specified elements.
|
||||
type: object
|
||||
matchName:
|
||||
description: |-
|
||||
MatchName in an expression that is matched against the name of each
|
||||
element in the feature set.
|
||||
properties:
|
||||
op:
|
||||
description: Op is the operator to be applied.
|
||||
enum:
|
||||
- In
|
||||
- NotIn
|
||||
- InRegexp
|
||||
- Exists
|
||||
- DoesNotExist
|
||||
- Gt
|
||||
- Lt
|
||||
- GtLt
|
||||
- IsTrue
|
||||
- IsFalse
|
||||
type: string
|
||||
value:
|
||||
description: |-
|
||||
Value is the list of values that the operand evaluates the input
|
||||
against. Value should be empty if the operator is Exists, DoesNotExist,
|
||||
IsTrue or IsFalse. Value should contain exactly one element if the
|
||||
operator is Gt or Lt and exactly two elements if the operator is GtLt.
|
||||
In other cases Value should contain at least one element.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- op
|
||||
type: object
|
||||
required:
|
||||
- feature
|
||||
type: object
|
||||
type: array
|
||||
name:
|
||||
description: Name of the rule.
|
||||
type: string
|
||||
taints:
|
||||
description: Taints to create if the rule matches.
|
||||
items:
|
||||
description: |-
|
||||
The node this Taint is attached to has the "effect" on
|
||||
any pod that does not tolerate the Taint.
|
||||
properties:
|
||||
effect:
|
||||
description: |-
|
||||
Required. The effect of the taint on pods
|
||||
that do not tolerate the taint.
|
||||
Valid effects are NoSchedule, PreferNoSchedule and NoExecute.
|
||||
type: string
|
||||
key:
|
||||
description: Required. The taint key to be applied to
|
||||
a node.
|
||||
type: string
|
||||
timeAdded:
|
||||
description: |-
|
||||
TimeAdded represents the time at which the taint was added.
|
||||
It is only written for NoExecute taints.
|
||||
format: date-time
|
||||
type: string
|
||||
value:
|
||||
description: The taint value corresponding to the taint
|
||||
key.
|
||||
type: string
|
||||
required:
|
||||
- effect
|
||||
- key
|
||||
type: object
|
||||
type: array
|
||||
vars:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: |-
|
||||
Vars is the variables to store if the rule matches. Variables do not
|
||||
directly inflict any changes in the node object. However, they can be
|
||||
referenced from other rules enabling more complex rule hierarchies,
|
||||
without exposing intermediary output values as labels.
|
||||
type: object
|
||||
varsTemplate:
|
||||
description: |-
|
||||
VarsTemplate specifies a template to expand for dynamically generating
|
||||
multiple variables. Data (after template expansion) must be keys with an
|
||||
optional value (<key>[=<value>]) separated by newlines.
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
type: object
|
||||
type: array
|
||||
required:
|
||||
- rules
|
||||
type: object
|
||||
required:
|
||||
- spec
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
|
@ -0,0 +1,107 @@
|
|||
{{/* vim: set filetype=mustache: */}}
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "node-feature-discovery.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "node-feature-discovery.fullname" -}}
|
||||
{{- if .Values.fullnameOverride -}}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
|
||||
{{- else -}}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride -}}
|
||||
{{- if contains $name .Release.Name -}}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
|
||||
{{- else -}}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Allow the release namespace to be overridden for multi-namespace deployments in combined charts
|
||||
*/}}
|
||||
{{- define "node-feature-discovery.namespace" -}}
|
||||
{{- if .Values.namespaceOverride -}}
|
||||
{{- .Values.namespaceOverride -}}
|
||||
{{- else -}}
|
||||
{{- .Release.Namespace -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "node-feature-discovery.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "node-feature-discovery.labels" -}}
|
||||
helm.sh/chart: {{ include "node-feature-discovery.chart" . }}
|
||||
{{ include "node-feature-discovery.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "node-feature-discovery.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "node-feature-discovery.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account which the nfd master will use
|
||||
*/}}
|
||||
{{- define "node-feature-discovery.master.serviceAccountName" -}}
|
||||
{{- if .Values.master.serviceAccount.create -}}
|
||||
{{ default (include "node-feature-discovery.fullname" .) .Values.master.serviceAccount.name }}
|
||||
{{- else -}}
|
||||
{{ default "default" .Values.master.serviceAccount.name }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account which the nfd worker will use
|
||||
*/}}
|
||||
{{- define "node-feature-discovery.worker.serviceAccountName" -}}
|
||||
{{- if .Values.worker.serviceAccount.create -}}
|
||||
{{ default (printf "%s-worker" (include "node-feature-discovery.fullname" .)) .Values.worker.serviceAccount.name }}
|
||||
{{- else -}}
|
||||
{{ default "default" .Values.worker.serviceAccount.name }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account which topologyUpdater will use
|
||||
*/}}
|
||||
{{- define "node-feature-discovery.topologyUpdater.serviceAccountName" -}}
|
||||
{{- if .Values.topologyUpdater.serviceAccount.create -}}
|
||||
{{ default (printf "%s-topology-updater" (include "node-feature-discovery.fullname" .)) .Values.topologyUpdater.serviceAccount.name }}
|
||||
{{- else -}}
|
||||
{{ default "default" .Values.topologyUpdater.serviceAccount.name }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account which nfd-gc will use
|
||||
*/}}
|
||||
{{- define "node-feature-discovery.gc.serviceAccountName" -}}
|
||||
{{- if .Values.gc.serviceAccount.create -}}
|
||||
{{ default (printf "%s-gc" (include "node-feature-discovery.fullname" .)) .Values.gc.serviceAccount.name }}
|
||||
{{- else -}}
|
||||
{{ default "default" .Values.gc.serviceAccount.name }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
|
@ -0,0 +1,80 @@
|
|||
{{- if .Values.tls.certManager }}
|
||||
{{- if .Values.master.enable }}
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: nfd-master-cert
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
spec:
|
||||
secretName: nfd-master-cert
|
||||
subject:
|
||||
organizations:
|
||||
- node-feature-discovery
|
||||
commonName: nfd-master
|
||||
dnsNames:
|
||||
# must match the service name
|
||||
- {{ include "node-feature-discovery.fullname" . }}-master
|
||||
# first one is configured for use by the worker; below are for completeness
|
||||
- {{ include "node-feature-discovery.fullname" . }}-master.{{ include "node-feature-discovery.namespace" . }}.svc
|
||||
- {{ include "node-feature-discovery.fullname" . }}-master.{{ include "node-feature-discovery.namespace" . }}.svc.cluster.local
|
||||
issuerRef:
|
||||
name: {{ default "nfd-ca-issuer" .Values.tls.certManagerCertificate.issuerName }}
|
||||
{{- if and .Values.tls.certManagerCertificate.issuerName .Values.tls.certManagerCertificate.issuerKind }}
|
||||
kind: {{ .Values.tls.certManagerCertificate.issuerKind }}
|
||||
{{- else }}
|
||||
kind: Issuer
|
||||
{{- end }}
|
||||
group: cert-manager.io
|
||||
{{- end }}
|
||||
---
|
||||
{{- if .Values.worker.enable }}
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: nfd-worker-cert
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
spec:
|
||||
secretName: nfd-worker-cert
|
||||
subject:
|
||||
organizations:
|
||||
- node-feature-discovery
|
||||
commonName: nfd-worker
|
||||
dnsNames:
|
||||
- {{ include "node-feature-discovery.fullname" . }}-worker.{{ include "node-feature-discovery.namespace" . }}.svc.cluster.local
|
||||
issuerRef:
|
||||
name: {{ default "nfd-ca-issuer" .Values.tls.certManagerCertificate.issuerName }}
|
||||
{{- if and .Values.tls.certManagerCertificate.issuerName .Values.tls.certManagerCertificate.issuerKind }}
|
||||
kind: {{ .Values.tls.certManagerCertificate.issuerKind }}
|
||||
{{- else }}
|
||||
kind: Issuer
|
||||
{{- end }}
|
||||
group: cert-manager.io
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.topologyUpdater.enable }}
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: nfd-topology-updater-cert
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
spec:
|
||||
secretName: nfd-topology-updater-cert
|
||||
subject:
|
||||
organizations:
|
||||
- node-feature-discovery
|
||||
commonName: nfd-topology-updater
|
||||
dnsNames:
|
||||
- {{ include "node-feature-discovery.fullname" . }}-topology-updater.{{ include "node-feature-discovery.namespace" . }}.svc.cluster.local
|
||||
issuerRef:
|
||||
name: {{ default "nfd-ca-issuer" .Values.tls.certManagerCertificate.issuerName }}
|
||||
{{- if and .Values.tls.certManagerCertificate.issuerName .Values.tls.certManagerCertificate.issuerKind }}
|
||||
kind: {{ .Values.tls.certManagerCertificate.issuerKind }}
|
||||
{{- else }}
|
||||
kind: Issuer
|
||||
{{- end }}
|
||||
group: cert-manager.io
|
||||
{{- end }}
|
||||
|
||||
{{- end }}
|
|
@ -0,0 +1,42 @@
|
|||
{{- if and .Values.tls.certManager (not .Values.tls.certManagerCertificate.issuerName ) }}
|
||||
# See https://cert-manager.io/docs/configuration/selfsigned/#bootstrapping-ca-issuers
|
||||
# - Create a self signed issuer
|
||||
# - Use this to create a CA cert
|
||||
# - Use this to now create a CA issuer
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Issuer
|
||||
metadata:
|
||||
name: nfd-ca-bootstrap
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
spec:
|
||||
selfSigned: {}
|
||||
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: nfd-ca-cert
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
spec:
|
||||
isCA: true
|
||||
secretName: nfd-ca-cert
|
||||
subject:
|
||||
organizations:
|
||||
- node-feature-discovery
|
||||
commonName: nfd-ca-cert
|
||||
issuerRef:
|
||||
name: nfd-ca-bootstrap
|
||||
kind: Issuer
|
||||
group: cert-manager.io
|
||||
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Issuer
|
||||
metadata:
|
||||
name: nfd-ca-issuer
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
spec:
|
||||
ca:
|
||||
secretName: nfd-ca-cert
|
||||
{{- end }}
|
|
@ -0,0 +1,133 @@
|
|||
{{- if and .Values.master.enable .Values.master.rbac.create }}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/status
|
||||
verbs:
|
||||
- get
|
||||
- patch
|
||||
- update
|
||||
- list
|
||||
- apiGroups:
|
||||
- nfd.k8s-sigs.io
|
||||
resources:
|
||||
- nodefeatures
|
||||
- nodefeaturerules
|
||||
- nodefeaturegroups
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- nfd.k8s-sigs.io
|
||||
resources:
|
||||
- nodefeaturegroups/status
|
||||
verbs:
|
||||
- patch
|
||||
- update
|
||||
- apiGroups:
|
||||
- coordination.k8s.io
|
||||
resources:
|
||||
- leases
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- coordination.k8s.io
|
||||
resources:
|
||||
- leases
|
||||
resourceNames:
|
||||
- "nfd-master.nfd.kubernetes.io"
|
||||
verbs:
|
||||
- get
|
||||
- update
|
||||
{{- end }}
|
||||
|
||||
{{- if and .Values.topologyUpdater.enable .Values.topologyUpdater.rbac.create }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- namespaces
|
||||
verbs:
|
||||
- get
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes/proxy
|
||||
verbs:
|
||||
- get
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- apiGroups:
|
||||
- topology.node.k8s.io
|
||||
resources:
|
||||
- noderesourcetopologies
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- update
|
||||
{{- end }}
|
||||
|
||||
{{- if and .Values.gc.enable .Values.gc.rbac.create (or (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) .Values.topologyUpdater.enable) }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-gc
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes/proxy
|
||||
verbs:
|
||||
- get
|
||||
- apiGroups:
|
||||
- topology.node.k8s.io
|
||||
resources:
|
||||
- noderesourcetopologies
|
||||
verbs:
|
||||
- delete
|
||||
- list
|
||||
- apiGroups:
|
||||
- nfd.k8s-sigs.io
|
||||
resources:
|
||||
- nodefeatures
|
||||
verbs:
|
||||
- delete
|
||||
- list
|
||||
{{- end }}
|
|
@ -0,0 +1,52 @@
|
|||
{{- if and .Values.master.enable .Values.master.rbac.create }}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: {{ include "node-feature-discovery.fullname" . }}
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ include "node-feature-discovery.master.serviceAccountName" . }}
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
{{- end }}
|
||||
|
||||
{{- if and .Values.topologyUpdater.enable .Values.topologyUpdater.rbac.create }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }}
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
{{- end }}
|
||||
|
||||
{{- if and .Values.gc.enable .Values.gc.rbac.create (or (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) .Values.topologyUpdater.enable) }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-gc
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-gc
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ include "node-feature-discovery.gc.serviceAccountName" . }}
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
{{- end }}
|
|
@ -0,0 +1,152 @@
|
|||
{{- if .Values.master.enable }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-master
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
role: master
|
||||
{{- with .Values.master.deploymentAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
replicas: {{ .Values.master.replicaCount }}
|
||||
revisionHistoryLimit: {{ .Values.master.revisionHistoryLimit }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
|
||||
role: master
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
|
||||
role: master
|
||||
{{- with .Values.master.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.priorityClassName }}
|
||||
priorityClassName: {{ . }}
|
||||
{{- end }}
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "node-feature-discovery.master.serviceAccountName" . }}
|
||||
enableServiceLinks: false
|
||||
securityContext:
|
||||
{{- toYaml .Values.master.podSecurityContext | nindent 8 }}
|
||||
hostNetwork: {{ .Values.master.hostNetwork }}
|
||||
containers:
|
||||
- name: master
|
||||
securityContext:
|
||||
{{- toYaml .Values.master.securityContext | nindent 12 }}
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
livenessProbe:
|
||||
{{- toYaml .Values.master.livenessProbe | nindent 12 }}
|
||||
readinessProbe:
|
||||
{{- toYaml .Values.master.readinessProbe | nindent 12 }}
|
||||
ports:
|
||||
- containerPort: {{ .Values.master.port | default "8080" }}
|
||||
name: grpc
|
||||
- containerPort: {{ .Values.master.metricsPort | default "8081" }}
|
||||
name: metrics
|
||||
- containerPort: {{ .Values.master.healthPort | default "8082" }}
|
||||
name: health
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
{{- with .Values.master.extraEnvs }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end}}
|
||||
command:
|
||||
- "nfd-master"
|
||||
resources:
|
||||
{{- toYaml .Values.master.resources | nindent 12 }}
|
||||
args:
|
||||
{{- if .Values.master.instance | empty | not }}
|
||||
- "-instance={{ .Values.master.instance }}"
|
||||
{{- end }}
|
||||
{{- if not (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) }}
|
||||
- "-port={{ .Values.master.port | default "8080" }}"
|
||||
{{- else if gt (int .Values.master.replicaCount) 1 }}
|
||||
- "-enable-leader-election"
|
||||
{{- end }}
|
||||
{{- if .Values.master.extraLabelNs | empty | not }}
|
||||
- "-extra-label-ns={{- join "," .Values.master.extraLabelNs }}"
|
||||
{{- end }}
|
||||
{{- if .Values.master.denyLabelNs | empty | not }}
|
||||
- "-deny-label-ns={{- join "," .Values.master.denyLabelNs }}"
|
||||
{{- end }}
|
||||
{{- if .Values.master.resourceLabels | empty | not }}
|
||||
- "-resource-labels={{- join "," .Values.master.resourceLabels }}"
|
||||
{{- end }}
|
||||
{{- if .Values.master.enableTaints }}
|
||||
- "-enable-taints"
|
||||
{{- end }}
|
||||
{{- if .Values.master.crdController | kindIs "invalid" | not }}
|
||||
- "-crd-controller={{ .Values.master.crdController }}"
|
||||
{{- else }}
|
||||
## By default, disable crd controller for other than the default instances
|
||||
- "-crd-controller={{ .Values.master.instance | empty }}"
|
||||
{{- end }}
|
||||
{{- if .Values.master.featureRulesController | kindIs "invalid" | not }}
|
||||
- "-featurerules-controller={{ .Values.master.featureRulesController }}"
|
||||
{{- end }}
|
||||
{{- if .Values.master.resyncPeriod }}
|
||||
- "-resync-period={{ .Values.master.resyncPeriod }}"
|
||||
{{- end }}
|
||||
{{- if .Values.master.nfdApiParallelism | empty | not }}
|
||||
- "-nfd-api-parallelism={{ .Values.master.nfdApiParallelism }}"
|
||||
{{- end }}
|
||||
{{- if .Values.tls.enable }}
|
||||
- "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
|
||||
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
|
||||
- "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
|
||||
{{- end }}
|
||||
# Go over featureGates and add the feature-gate flag
|
||||
{{- range $key, $value := .Values.featureGates }}
|
||||
- "-feature-gates={{ $key }}={{ $value }}"
|
||||
{{- end }}
|
||||
- "-metrics={{ .Values.master.metricsPort | default "8081" }}"
|
||||
- "-grpc-health={{ .Values.master.healthPort | default "8082" }}"
|
||||
volumeMounts:
|
||||
{{- if .Values.tls.enable }}
|
||||
- name: nfd-master-cert
|
||||
mountPath: "/etc/kubernetes/node-feature-discovery/certs"
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
- name: nfd-master-conf
|
||||
mountPath: "/etc/kubernetes/node-feature-discovery"
|
||||
readOnly: true
|
||||
volumes:
|
||||
{{- if .Values.tls.enable }}
|
||||
- name: nfd-master-cert
|
||||
secret:
|
||||
secretName: nfd-master-cert
|
||||
{{- end }}
|
||||
- name: nfd-master-conf
|
||||
configMap:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-master-conf
|
||||
items:
|
||||
- key: nfd-master.conf
|
||||
path: nfd-master.conf
|
||||
{{- with .Values.master.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.master.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.master.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
|
@ -0,0 +1,85 @@
|
|||
{{- if and .Values.gc.enable (or (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) .Values.topologyUpdater.enable) -}}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-gc
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
role: gc
|
||||
{{- with .Values.gc.deploymentAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
replicas: {{ .Values.gc.replicaCount | default 1 }}
|
||||
revisionHistoryLimit: {{ .Values.gc.revisionHistoryLimit }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
|
||||
role: gc
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
|
||||
role: gc
|
||||
{{- with .Values.gc.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
serviceAccountName: {{ include "node-feature-discovery.gc.serviceAccountName" . }}
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
{{- with .Values.priorityClassName }}
|
||||
priorityClassName: {{ . }}
|
||||
{{- end }}
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.gc.podSecurityContext | nindent 8 }}
|
||||
hostNetwork: {{ .Values.gc.hostNetwork }}
|
||||
containers:
|
||||
- name: gc
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
{{- with .Values.gc.extraEnvs }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end}}
|
||||
command:
|
||||
- "nfd-gc"
|
||||
args:
|
||||
{{- if .Values.gc.interval | empty | not }}
|
||||
- "-gc-interval={{ .Values.gc.interval }}"
|
||||
{{- end }}
|
||||
resources:
|
||||
{{- toYaml .Values.gc.resources | nindent 12 }}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: [ "ALL" ]
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: {{ .Values.gc.metricsPort | default "8081"}}
|
||||
|
||||
{{- with .Values.gc.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.gc.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.gc.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
|
@ -0,0 +1,12 @@
|
|||
{{- if .Values.master.enable }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-master-conf
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
data:
|
||||
nfd-master.conf: |-
|
||||
{{- .Values.master.config | toYaml | nindent 4 }}
|
||||
{{- end }}
|
|
@ -0,0 +1,12 @@
|
|||
{{- if .Values.topologyUpdater.enable -}}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater-conf
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
data:
|
||||
nfd-topology-updater.conf: |-
|
||||
{{- .Values.topologyUpdater.config | toYaml | nindent 4 }}
|
||||
{{- end }}
|
|
@ -0,0 +1,12 @@
|
|||
{{- if .Values.worker.enable }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-worker-conf
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
data:
|
||||
nfd-worker.conf: |-
|
||||
{{- .Values.worker.config | toYaml | nindent 4 }}
|
||||
{{- end }}
|
|
@ -0,0 +1,94 @@
|
|||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
annotations:
|
||||
"helm.sh/hook": post-delete
|
||||
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
annotations:
|
||||
"helm.sh/hook": post-delete
|
||||
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/status
|
||||
verbs:
|
||||
- get
|
||||
- patch
|
||||
- update
|
||||
- list
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
annotations:
|
||||
"helm.sh/hook": post-delete
|
||||
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
annotations:
|
||||
"helm.sh/hook": post-delete
|
||||
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 8 }}
|
||||
role: prune
|
||||
spec:
|
||||
serviceAccountName: {{ include "node-feature-discovery.fullname" . }}-prune
|
||||
containers:
|
||||
- name: nfd-master
|
||||
securityContext:
|
||||
{{- toYaml .Values.master.securityContext | nindent 12 }}
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
command:
|
||||
- "nfd-master"
|
||||
args:
|
||||
- "-prune"
|
||||
{{- if .Values.master.instance | empty | not }}
|
||||
- "-instance={{ .Values.master.instance }}"
|
||||
{{- end }}
|
||||
restartPolicy: Never
|
||||
{{- with .Values.master.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.master.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.master.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
|
@ -0,0 +1,26 @@
|
|||
{{- if .Values.prometheus.enable }}
|
||||
# Prometheus Monitor Service (Metrics)
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PodMonitor
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.selectorLabels" . | nindent 4 }}
|
||||
{{- with .Values.prometheus.labels }}
|
||||
{{ toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
podMetricsEndpoints:
|
||||
- honorLabels: true
|
||||
interval: {{ .Values.prometheus.scrapeInterval }}
|
||||
path: /metrics
|
||||
port: metrics
|
||||
scheme: http
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- {{ include "node-feature-discovery.namespace" . }}
|
||||
selector:
|
||||
matchExpressions:
|
||||
- {key: app.kubernetes.io/instance, operator: In, values: ["{{ .Release.Name }}"]}
|
||||
- {key: app.kubernetes.io/name, operator: In, values: ["{{ include "node-feature-discovery.name" . }}"]}
|
||||
{{- end }}
|
|
@ -0,0 +1,24 @@
|
|||
{{- if and .Values.worker.enable .Values.worker.rbac.create }}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-worker
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- nfd.k8s-sigs.io
|
||||
resources:
|
||||
- nodefeatures
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- update
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
{{- end }}
|
|
@ -0,0 +1,18 @@
|
|||
{{- if and .Values.worker.enable .Values.worker.rbac.create }}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-worker
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-worker
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ include "node-feature-discovery.worker.serviceAccountName" . }}
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
{{- end }}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
{{- if and (not (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi)) .Values.master.enable }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-master
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
role: master
|
||||
spec:
|
||||
type: {{ .Values.master.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.master.service.port | default "8080" }}
|
||||
targetPort: grpc
|
||||
protocol: TCP
|
||||
name: grpc
|
||||
selector:
|
||||
{{- include "node-feature-discovery.selectorLabels" . | nindent 4 }}
|
||||
role: master
|
||||
{{- end}}
|
|
@ -0,0 +1,58 @@
|
|||
{{- if and .Values.master.enable .Values.master.serviceAccount.create }}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.master.serviceAccountName" . }}
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
{{- with .Values.master.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if and .Values.topologyUpdater.enable .Values.topologyUpdater.serviceAccount.create }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }}
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
{{- with .Values.topologyUpdater.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if and .Values.gc.enable .Values.gc.serviceAccount.create (or (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) .Values.topologyUpdater.enable) }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.gc.serviceAccountName" . }}
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
{{- with .Values.gc.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if and .Values.worker.enable .Values.worker.serviceAccount.create }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.worker.serviceAccountName" . }}
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
{{- with .Values.worker.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
|
@ -0,0 +1,278 @@
|
|||
{{- if and .Values.topologyUpdater.enable .Values.topologyUpdater.createCRDs -}}
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
api-approved.kubernetes.io: https://github.com/kubernetes/enhancements/pull/1870
|
||||
controller-gen.kubebuilder.io/version: v0.11.2
|
||||
creationTimestamp: null
|
||||
name: noderesourcetopologies.topology.node.k8s.io
|
||||
spec:
|
||||
group: topology.node.k8s.io
|
||||
names:
|
||||
kind: NodeResourceTopology
|
||||
listKind: NodeResourceTopologyList
|
||||
plural: noderesourcetopologies
|
||||
shortNames:
|
||||
- node-res-topo
|
||||
singular: noderesourcetopology
|
||||
scope: Cluster
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: NodeResourceTopology describes node resources and their topology.
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
of an object. Servers should convert recognized schemas to the latest
|
||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this
|
||||
object represents. Servers may infer this from the endpoint the client
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
topologyPolicies:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
zones:
|
||||
description: ZoneList contains an array of Zone objects.
|
||||
items:
|
||||
description: Zone represents a resource topology zone, e.g. socket,
|
||||
node, die or core.
|
||||
properties:
|
||||
attributes:
|
||||
description: AttributeList contains an array of AttributeInfo objects.
|
||||
items:
|
||||
description: AttributeInfo contains one attribute of a Zone.
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
value:
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
type: object
|
||||
type: array
|
||||
costs:
|
||||
description: CostList contains an array of CostInfo objects.
|
||||
items:
|
||||
description: CostInfo describes the cost (or distance) between
|
||||
two Zones.
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
value:
|
||||
format: int64
|
||||
type: integer
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
type: object
|
||||
type: array
|
||||
name:
|
||||
type: string
|
||||
parent:
|
||||
type: string
|
||||
resources:
|
||||
description: ResourceInfoList contains an array of ResourceInfo
|
||||
objects.
|
||||
items:
|
||||
description: ResourceInfo contains information about one resource
|
||||
type.
|
||||
properties:
|
||||
allocatable:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Allocatable quantity of the resource, corresponding
|
||||
to allocatable in node status, i.e. total amount of this
|
||||
resource available to be used by pods.
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
available:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Available is the amount of this resource currently
|
||||
available for new (to be scheduled) pods, i.e. Allocatable
|
||||
minus the resources reserved by currently running pods.
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
capacity:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Capacity of the resource, corresponding to capacity
|
||||
in node status, i.e. total amount of this resource that
|
||||
the node has.
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
name:
|
||||
description: Name of the resource.
|
||||
type: string
|
||||
required:
|
||||
- allocatable
|
||||
- available
|
||||
- capacity
|
||||
- name
|
||||
type: object
|
||||
type: array
|
||||
type:
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
- type
|
||||
type: object
|
||||
type: array
|
||||
required:
|
||||
- topologyPolicies
|
||||
- zones
|
||||
type: object
|
||||
served: true
|
||||
storage: false
|
||||
- name: v1alpha2
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: NodeResourceTopology describes node resources and their topology.
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
of an object. Servers should convert recognized schemas to the latest
|
||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
attributes:
|
||||
description: AttributeList contains an array of AttributeInfo objects.
|
||||
items:
|
||||
description: AttributeInfo contains one attribute of a Zone.
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
value:
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
type: object
|
||||
type: array
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this
|
||||
object represents. Servers may infer this from the endpoint the client
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
topologyPolicies:
|
||||
description: 'DEPRECATED (to be removed in v1beta1): use top level attributes
|
||||
if needed'
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
zones:
|
||||
description: ZoneList contains an array of Zone objects.
|
||||
items:
|
||||
description: Zone represents a resource topology zone, e.g. socket,
|
||||
node, die or core.
|
||||
properties:
|
||||
attributes:
|
||||
description: AttributeList contains an array of AttributeInfo objects.
|
||||
items:
|
||||
description: AttributeInfo contains one attribute of a Zone.
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
value:
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
type: object
|
||||
type: array
|
||||
costs:
|
||||
description: CostList contains an array of CostInfo objects.
|
||||
items:
|
||||
description: CostInfo describes the cost (or distance) between
|
||||
two Zones.
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
value:
|
||||
format: int64
|
||||
type: integer
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
type: object
|
||||
type: array
|
||||
name:
|
||||
type: string
|
||||
parent:
|
||||
type: string
|
||||
resources:
|
||||
description: ResourceInfoList contains an array of ResourceInfo
|
||||
objects.
|
||||
items:
|
||||
description: ResourceInfo contains information about one resource
|
||||
type.
|
||||
properties:
|
||||
allocatable:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Allocatable quantity of the resource, corresponding
|
||||
to allocatable in node status, i.e. total amount of this
|
||||
resource available to be used by pods.
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
available:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Available is the amount of this resource currently
|
||||
available for new (to be scheduled) pods, i.e. Allocatable
|
||||
minus the resources reserved by currently running pods.
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
capacity:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Capacity of the resource, corresponding to capacity
|
||||
in node status, i.e. total amount of this resource that
|
||||
the node has.
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
name:
|
||||
description: Name of the resource.
|
||||
type: string
|
||||
required:
|
||||
- allocatable
|
||||
- available
|
||||
- capacity
|
||||
- name
|
||||
type: object
|
||||
type: array
|
||||
type:
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
- type
|
||||
type: object
|
||||
type: array
|
||||
required:
|
||||
- zones
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
||||
status:
|
||||
acceptedNames:
|
||||
kind: ""
|
||||
plural: ""
|
||||
conditions: []
|
||||
storedVersions: []
|
||||
{{- end }}
|
|
@ -0,0 +1,171 @@
|
|||
{{- if .Values.topologyUpdater.enable -}}
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
role: topology-updater
|
||||
{{- with .Values.topologyUpdater.daemonsetAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
revisionHistoryLimit: {{ .Values.topologyUpdater.revisionHistoryLimit }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
|
||||
role: topology-updater
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
|
||||
role: topology-updater
|
||||
{{- with .Values.topologyUpdater.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
serviceAccountName: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }}
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
{{- with .Values.priorityClassName }}
|
||||
priorityClassName: {{ . }}
|
||||
{{- end }}
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.topologyUpdater.podSecurityContext | nindent 8 }}
|
||||
hostNetwork: {{ .Values.topologyUpdater.hostNetwork }}
|
||||
containers:
|
||||
- name: topology-updater
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
|
||||
livenessProbe:
|
||||
{{- toYaml .Values.topologyUpdater.livenessProbe | nindent 10 }}
|
||||
readinessProbe:
|
||||
{{- toYaml .Values.topologyUpdater.readinessProbe | nindent 10 }}
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: NODE_ADDRESS
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.hostIP
|
||||
{{- with .Values.topologyUpdater.extraEnvs }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end}}
|
||||
command:
|
||||
- "nfd-topology-updater"
|
||||
args:
|
||||
- "-podresources-socket=/host-var/lib/kubelet-podresources/kubelet.sock"
|
||||
{{- if .Values.topologyUpdater.updateInterval | empty | not }}
|
||||
- "-sleep-interval={{ .Values.topologyUpdater.updateInterval }}"
|
||||
{{- else }}
|
||||
- "-sleep-interval=3s"
|
||||
{{- end }}
|
||||
{{- if .Values.topologyUpdater.watchNamespace | empty | not }}
|
||||
- "-watch-namespace={{ .Values.topologyUpdater.watchNamespace }}"
|
||||
{{- else }}
|
||||
- "-watch-namespace=*"
|
||||
{{- end }}
|
||||
{{- if .Values.tls.enable }}
|
||||
- "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
|
||||
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
|
||||
- "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
|
||||
{{- end }}
|
||||
{{- if not .Values.topologyUpdater.podSetFingerprint }}
|
||||
- "-pods-fingerprint=false"
|
||||
{{- end }}
|
||||
{{- if .Values.topologyUpdater.kubeletConfigPath | empty | not }}
|
||||
- "-kubelet-config-uri=file:///host-var/kubelet-config"
|
||||
{{- end }}
|
||||
{{- if .Values.topologyUpdater.kubeletStateDir | empty }}
|
||||
# Disable kubelet state tracking by giving an empty path
|
||||
- "-kubelet-state-dir="
|
||||
{{- end }}
|
||||
- -metrics={{ .Values.topologyUpdater.metricsPort | default "8081"}}
|
||||
- "-grpc-health={{ .Values.topologyUpdater.healthPort | default "8082" }}"
|
||||
ports:
|
||||
- containerPort: {{ .Values.topologyUpdater.metricsPort | default "8081"}}
|
||||
name: metrics
|
||||
- containerPort: {{ .Values.topologyUpdater.healthPort | default "8082" }}
|
||||
name: health
|
||||
volumeMounts:
|
||||
{{- if .Values.topologyUpdater.kubeletConfigPath | empty | not }}
|
||||
- name: kubelet-config
|
||||
mountPath: /host-var/kubelet-config
|
||||
{{- end }}
|
||||
- name: kubelet-podresources-sock
|
||||
mountPath: /host-var/lib/kubelet-podresources/kubelet.sock
|
||||
- name: host-sys
|
||||
mountPath: /host-sys
|
||||
{{- if .Values.topologyUpdater.kubeletStateDir | empty | not }}
|
||||
- name: kubelet-state-files
|
||||
mountPath: /host-var/lib/kubelet
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
{{- if .Values.tls.enable }}
|
||||
- name: nfd-topology-updater-cert
|
||||
mountPath: "/etc/kubernetes/node-feature-discovery/certs"
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
- name: nfd-topology-updater-conf
|
||||
mountPath: "/etc/kubernetes/node-feature-discovery"
|
||||
readOnly: true
|
||||
|
||||
resources:
|
||||
{{- toYaml .Values.topologyUpdater.resources | nindent 12 }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.topologyUpdater.securityContext | nindent 12 }}
|
||||
volumes:
|
||||
- name: host-sys
|
||||
hostPath:
|
||||
path: "/sys"
|
||||
{{- if .Values.topologyUpdater.kubeletConfigPath | empty | not }}
|
||||
- name: kubelet-config
|
||||
hostPath:
|
||||
path: {{ .Values.topologyUpdater.kubeletConfigPath }}
|
||||
{{- end }}
|
||||
- name: kubelet-podresources-sock
|
||||
hostPath:
|
||||
{{- if .Values.topologyUpdater.kubeletPodResourcesSockPath | empty | not }}
|
||||
path: {{ .Values.topologyUpdater.kubeletPodResourcesSockPath }}
|
||||
{{- else }}
|
||||
path: /var/lib/kubelet/pod-resources/kubelet.sock
|
||||
{{- end }}
|
||||
{{- if .Values.topologyUpdater.kubeletStateDir | empty | not }}
|
||||
- name: kubelet-state-files
|
||||
hostPath:
|
||||
path: {{ .Values.topologyUpdater.kubeletStateDir }}
|
||||
{{- end }}
|
||||
- name: nfd-topology-updater-conf
|
||||
configMap:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater-conf
|
||||
items:
|
||||
- key: nfd-topology-updater.conf
|
||||
path: nfd-topology-updater.conf
|
||||
{{- if .Values.tls.enable }}
|
||||
- name: nfd-topology-updater-cert
|
||||
secret:
|
||||
secretName: nfd-topology-updater-cert
|
||||
{{- end }}
|
||||
|
||||
|
||||
{{- with .Values.topologyUpdater.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.topologyUpdater.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.topologyUpdater.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
|
@ -0,0 +1,186 @@
|
|||
{{- if .Values.worker.enable }}
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-worker
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
role: worker
|
||||
{{- with .Values.worker.daemonsetAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
revisionHistoryLimit: {{ .Values.worker.revisionHistoryLimit }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
|
||||
role: worker
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
|
||||
role: worker
|
||||
{{- with .Values.worker.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
{{- with .Values.priorityClassName }}
|
||||
priorityClassName: {{ . }}
|
||||
{{- end }}
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "node-feature-discovery.worker.serviceAccountName" . }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.worker.podSecurityContext | nindent 8 }}
|
||||
hostNetwork: {{ .Values.worker.hostNetwork }}
|
||||
containers:
|
||||
- name: worker
|
||||
securityContext:
|
||||
{{- toYaml .Values.worker.securityContext | nindent 12 }}
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
livenessProbe:
|
||||
{{- toYaml .Values.worker.livenessProbe | nindent 12 }}
|
||||
readinessProbe:
|
||||
{{- toYaml .Values.worker.readinessProbe | nindent 12 }}
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: POD_UID
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.uid
|
||||
{{- with .Values.worker.extraEnvs }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end}}
|
||||
resources:
|
||||
{{- toYaml .Values.worker.resources | nindent 12 }}
|
||||
command:
|
||||
- "nfd-worker"
|
||||
args:
|
||||
{{- if not (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) }}
|
||||
- "-server={{ include "node-feature-discovery.fullname" . }}-master:{{ .Values.master.service.port }}"
|
||||
{{- end }}
|
||||
{{- if .Values.tls.enable }}
|
||||
- "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
|
||||
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
|
||||
- "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
|
||||
{{- end }}
|
||||
# Go over featureGate and add the feature-gate flag
|
||||
{{- range $key, $value := .Values.featureGates }}
|
||||
- "-feature-gates={{ $key }}={{ $value }}"
|
||||
{{- end }}
|
||||
- "-metrics={{ .Values.worker.metricsPort | default "8081"}}"
|
||||
- "-grpc-health={{ .Values.worker.healthPort | default "8082" }}"
|
||||
ports:
|
||||
- containerPort: {{ .Values.worker.metricsPort | default "8081"}}
|
||||
name: metrics
|
||||
- containerPort: {{ .Values.worker.healthPort | default "8082" }}
|
||||
name: health
|
||||
volumeMounts:
|
||||
- name: host-boot
|
||||
mountPath: "/host-boot"
|
||||
readOnly: true
|
||||
- name: host-os-release
|
||||
mountPath: "/host-etc/os-release"
|
||||
readOnly: true
|
||||
- name: host-sys
|
||||
mountPath: "/host-sys"
|
||||
readOnly: true
|
||||
- name: host-usr-lib
|
||||
mountPath: "/host-usr/lib"
|
||||
readOnly: true
|
||||
- name: host-lib
|
||||
mountPath: "/host-lib"
|
||||
readOnly: true
|
||||
- name: host-proc-swaps
|
||||
mountPath: "/host-proc/swaps"
|
||||
readOnly: true
|
||||
{{- if .Values.worker.mountUsrSrc }}
|
||||
- name: host-usr-src
|
||||
mountPath: "/host-usr/src"
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
- name: source-d
|
||||
mountPath: "/etc/kubernetes/node-feature-discovery/source.d/"
|
||||
readOnly: true
|
||||
- name: features-d
|
||||
mountPath: "/etc/kubernetes/node-feature-discovery/features.d/"
|
||||
readOnly: true
|
||||
- name: nfd-worker-conf
|
||||
mountPath: "/etc/kubernetes/node-feature-discovery"
|
||||
readOnly: true
|
||||
{{- if .Values.tls.enable }}
|
||||
- name: nfd-worker-cert
|
||||
mountPath: "/etc/kubernetes/node-feature-discovery/certs"
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: host-boot
|
||||
hostPath:
|
||||
path: "/boot"
|
||||
- name: host-os-release
|
||||
hostPath:
|
||||
path: "/etc/os-release"
|
||||
- name: host-sys
|
||||
hostPath:
|
||||
path: "/sys"
|
||||
- name: host-usr-lib
|
||||
hostPath:
|
||||
path: "/usr/lib"
|
||||
- name: host-lib
|
||||
hostPath:
|
||||
path: "/lib"
|
||||
- name: host-proc-swaps
|
||||
hostPath:
|
||||
path: "/proc/swaps"
|
||||
{{- if .Values.worker.mountUsrSrc }}
|
||||
- name: host-usr-src
|
||||
hostPath:
|
||||
path: "/usr/src"
|
||||
{{- end }}
|
||||
- name: source-d
|
||||
hostPath:
|
||||
path: "/etc/kubernetes/node-feature-discovery/source.d/"
|
||||
- name: features-d
|
||||
hostPath:
|
||||
path: "/etc/kubernetes/node-feature-discovery/features.d/"
|
||||
- name: nfd-worker-conf
|
||||
configMap:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-worker-conf
|
||||
items:
|
||||
- key: nfd-worker.conf
|
||||
path: nfd-worker.conf
|
||||
{{- if .Values.tls.enable }}
|
||||
- name: nfd-worker-cert
|
||||
secret:
|
||||
secretName: nfd-worker-cert
|
||||
{{- end }}
|
||||
{{- with .Values.worker.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.worker.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.worker.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.worker.priorityClassName }}
|
||||
priorityClassName: {{ . | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
593
charts/gpu-operator/charts/node-feature-discovery/values.yaml
Normal file
593
charts/gpu-operator/charts/node-feature-discovery/values.yaml
Normal file
|
@ -0,0 +1,593 @@
|
|||
image:
|
||||
repository: registry.k8s.io/nfd/node-feature-discovery
|
||||
# This should be set to 'IfNotPresent' for released version
|
||||
pullPolicy: IfNotPresent
|
||||
# tag, if defined will use the given image tag, else Chart.AppVersion will be used
|
||||
# tag
|
||||
imagePullSecrets: []
|
||||
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
namespaceOverride: ""
|
||||
|
||||
enableNodeFeatureApi: true
|
||||
|
||||
featureGates:
|
||||
NodeFeatureAPI: true
|
||||
NodeFeatureGroupAPI: false
|
||||
|
||||
priorityClassName: ""
|
||||
|
||||
master:
|
||||
enable: true
|
||||
extraEnvs: []
|
||||
hostNetwork: false
|
||||
config: ### <NFD-MASTER-CONF-START-DO-NOT-REMOVE>
|
||||
# noPublish: false
|
||||
# autoDefaultNs: true
|
||||
# extraLabelNs: ["added.ns.io","added.kubernets.io"]
|
||||
# denyLabelNs: ["denied.ns.io","denied.kubernetes.io"]
|
||||
# resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"]
|
||||
# enableTaints: false
|
||||
# labelWhiteList: "foo"
|
||||
# resyncPeriod: "2h"
|
||||
# klog:
|
||||
# addDirHeader: false
|
||||
# alsologtostderr: false
|
||||
# logBacktraceAt:
|
||||
# logtostderr: true
|
||||
# skipHeaders: false
|
||||
# stderrthreshold: 2
|
||||
# v: 0
|
||||
# vmodule:
|
||||
## NOTE: the following options are not dynamically run-time configurable
|
||||
## and require a nfd-master restart to take effect after being changed
|
||||
# logDir:
|
||||
# logFile:
|
||||
# logFileMaxSize: 1800
|
||||
# skipLogHeaders: false
|
||||
# leaderElection:
|
||||
# leaseDuration: 15s
|
||||
# # this value has to be lower than leaseDuration and greater than retryPeriod*1.2
|
||||
# renewDeadline: 10s
|
||||
# # this value has to be greater than 0
|
||||
# retryPeriod: 2s
|
||||
# nfdApiParallelism: 10
|
||||
### <NFD-MASTER-CONF-END-DO-NOT-REMOVE>
|
||||
# The TCP port that nfd-master listens for incoming requests. Default: 8080
|
||||
# Deprecated this parameter is related to the deprecated gRPC API and will
|
||||
# be removed with it in a future release
|
||||
port: 8080
|
||||
metricsPort: 8081
|
||||
healthPort: 8082
|
||||
instance:
|
||||
featureApi:
|
||||
resyncPeriod:
|
||||
denyLabelNs: []
|
||||
extraLabelNs: []
|
||||
resourceLabels: []
|
||||
enableTaints: false
|
||||
crdController: null
|
||||
featureRulesController: null
|
||||
nfdApiParallelism: null
|
||||
deploymentAnnotations: {}
|
||||
replicaCount: 1
|
||||
|
||||
podSecurityContext: {}
|
||||
# fsGroup: 2000
|
||||
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: [ "ALL" ]
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
# runAsUser: 1000
|
||||
|
||||
serviceAccount:
|
||||
# Specifies whether a service account should be created
|
||||
create: true
|
||||
# Annotations to add to the service account
|
||||
annotations: {}
|
||||
# The name of the service account to use.
|
||||
# If not set and create is true, a name is generated using the fullname template
|
||||
name:
|
||||
|
||||
# specify how many old ReplicaSets for the Deployment to retain.
|
||||
revisionHistoryLimit:
|
||||
|
||||
rbac:
|
||||
create: true
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 8080
|
||||
|
||||
resources:
|
||||
limits:
|
||||
memory: 4Gi
|
||||
requests:
|
||||
cpu: 100m
|
||||
# You may want to use the same value for `requests.memory` and `limits.memory`. The “requests” value affects scheduling to accommodate pods on nodes.
|
||||
# If there is a large difference between “requests” and “limits” and nodes experience memory pressure, the kernel may invoke
|
||||
# the OOM Killer, even if the memory does not exceed the “limits” threshold. This can cause unexpected pod evictions. Memory
|
||||
# cannot be compressed and once allocated to a pod, it can only be reclaimed by killing the pod.
|
||||
# Natan Yellin 22/09/2022 https://home.robusta.dev/blog/kubernetes-memory-limit
|
||||
memory: 128Mi
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations:
|
||||
- key: "node-role.kubernetes.io/master"
|
||||
operator: "Equal"
|
||||
value: ""
|
||||
effect: "NoSchedule"
|
||||
- key: "node-role.kubernetes.io/control-plane"
|
||||
operator: "Equal"
|
||||
value: ""
|
||||
effect: "NoSchedule"
|
||||
|
||||
annotations: {}
|
||||
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 1
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: "node-role.kubernetes.io/master"
|
||||
operator: In
|
||||
values: [""]
|
||||
- weight: 1
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: "node-role.kubernetes.io/control-plane"
|
||||
operator: In
|
||||
values: [""]
|
||||
|
||||
livenessProbe:
|
||||
grpc:
|
||||
port: 8082
|
||||
initialDelaySeconds: 10
|
||||
# failureThreshold: 3
|
||||
# periodSeconds: 10
|
||||
readinessProbe:
|
||||
grpc:
|
||||
port: 8082
|
||||
initialDelaySeconds: 5
|
||||
failureThreshold: 10
|
||||
# periodSeconds: 10
|
||||
|
||||
worker:
|
||||
enable: true
|
||||
extraEnvs: []
|
||||
hostNetwork: false
|
||||
config: ### <NFD-WORKER-CONF-START-DO-NOT-REMOVE>
|
||||
#core:
|
||||
# labelWhiteList:
|
||||
# noPublish: false
|
||||
# sleepInterval: 60s
|
||||
# featureSources: [all]
|
||||
# labelSources: [all]
|
||||
# klog:
|
||||
# addDirHeader: false
|
||||
# alsologtostderr: false
|
||||
# logBacktraceAt:
|
||||
# logtostderr: true
|
||||
# skipHeaders: false
|
||||
# stderrthreshold: 2
|
||||
# v: 0
|
||||
# vmodule:
|
||||
## NOTE: the following options are not dynamically run-time configurable
|
||||
## and require a nfd-worker restart to take effect after being changed
|
||||
# logDir:
|
||||
# logFile:
|
||||
# logFileMaxSize: 1800
|
||||
# skipLogHeaders: false
|
||||
#sources:
|
||||
# cpu:
|
||||
# cpuid:
|
||||
## NOTE: whitelist has priority over blacklist
|
||||
# attributeBlacklist:
|
||||
# - "AVX10"
|
||||
# - "BMI1"
|
||||
# - "BMI2"
|
||||
# - "CLMUL"
|
||||
# - "CMOV"
|
||||
# - "CX16"
|
||||
# - "ERMS"
|
||||
# - "F16C"
|
||||
# - "HTT"
|
||||
# - "LZCNT"
|
||||
# - "MMX"
|
||||
# - "MMXEXT"
|
||||
# - "NX"
|
||||
# - "POPCNT"
|
||||
# - "RDRAND"
|
||||
# - "RDSEED"
|
||||
# - "RDTSCP"
|
||||
# - "SGX"
|
||||
# - "SSE"
|
||||
# - "SSE2"
|
||||
# - "SSE3"
|
||||
# - "SSE4"
|
||||
# - "SSE42"
|
||||
# - "SSSE3"
|
||||
# - "TDX_GUEST"
|
||||
# attributeWhitelist:
|
||||
# kernel:
|
||||
# kconfigFile: "/path/to/kconfig"
|
||||
# configOpts:
|
||||
# - "NO_HZ"
|
||||
# - "X86"
|
||||
# - "DMI"
|
||||
# pci:
|
||||
# deviceClassWhitelist:
|
||||
# - "0200"
|
||||
# - "03"
|
||||
# - "12"
|
||||
# deviceLabelFields:
|
||||
# - "class"
|
||||
# - "vendor"
|
||||
# - "device"
|
||||
# - "subsystem_vendor"
|
||||
# - "subsystem_device"
|
||||
# usb:
|
||||
# deviceClassWhitelist:
|
||||
# - "0e"
|
||||
# - "ef"
|
||||
# - "fe"
|
||||
# - "ff"
|
||||
# deviceLabelFields:
|
||||
# - "class"
|
||||
# - "vendor"
|
||||
# - "device"
|
||||
# local:
|
||||
# hooksEnabled: false
|
||||
# custom:
|
||||
# # The following feature demonstrates the capabilities of the matchFeatures
|
||||
# - name: "my custom rule"
|
||||
# labels:
|
||||
# "vendor.io/my-ng-feature": "true"
|
||||
# # matchFeatures implements a logical AND over all matcher terms in the
|
||||
# # list (i.e. all of the terms, or per-feature matchers, must match)
|
||||
# matchFeatures:
|
||||
# - feature: cpu.cpuid
|
||||
# matchExpressions:
|
||||
# AVX512F: {op: Exists}
|
||||
# - feature: cpu.cstate
|
||||
# matchExpressions:
|
||||
# enabled: {op: IsTrue}
|
||||
# - feature: cpu.pstate
|
||||
# matchExpressions:
|
||||
# no_turbo: {op: IsFalse}
|
||||
# scaling_governor: {op: In, value: ["performance"]}
|
||||
# - feature: cpu.rdt
|
||||
# matchExpressions:
|
||||
# RDTL3CA: {op: Exists}
|
||||
# - feature: cpu.sst
|
||||
# matchExpressions:
|
||||
# bf.enabled: {op: IsTrue}
|
||||
# - feature: cpu.topology
|
||||
# matchExpressions:
|
||||
# hardware_multithreading: {op: IsFalse}
|
||||
#
|
||||
# - feature: kernel.config
|
||||
# matchExpressions:
|
||||
# X86: {op: Exists}
|
||||
# LSM: {op: InRegexp, value: ["apparmor"]}
|
||||
# - feature: kernel.loadedmodule
|
||||
# matchExpressions:
|
||||
# e1000e: {op: Exists}
|
||||
# - feature: kernel.selinux
|
||||
# matchExpressions:
|
||||
# enabled: {op: IsFalse}
|
||||
# - feature: kernel.version
|
||||
# matchExpressions:
|
||||
# major: {op: In, value: ["5"]}
|
||||
# minor: {op: Gt, value: ["10"]}
|
||||
#
|
||||
# - feature: storage.block
|
||||
# matchExpressions:
|
||||
# rotational: {op: In, value: ["0"]}
|
||||
# dax: {op: In, value: ["0"]}
|
||||
#
|
||||
# - feature: network.device
|
||||
# matchExpressions:
|
||||
# operstate: {op: In, value: ["up"]}
|
||||
# speed: {op: Gt, value: ["100"]}
|
||||
#
|
||||
# - feature: memory.numa
|
||||
# matchExpressions:
|
||||
# node_count: {op: Gt, value: ["2"]}
|
||||
# - feature: memory.nv
|
||||
# matchExpressions:
|
||||
# devtype: {op: In, value: ["nd_dax"]}
|
||||
# mode: {op: In, value: ["memory"]}
|
||||
#
|
||||
# - feature: system.osrelease
|
||||
# matchExpressions:
|
||||
# ID: {op: In, value: ["fedora", "centos"]}
|
||||
# - feature: system.name
|
||||
# matchExpressions:
|
||||
# nodename: {op: InRegexp, value: ["^worker-X"]}
|
||||
#
|
||||
# - feature: local.label
|
||||
# matchExpressions:
|
||||
# custom-feature-knob: {op: Gt, value: ["100"]}
|
||||
#
|
||||
# # The following feature demonstrates the capabilities of the matchAny
|
||||
# - name: "my matchAny rule"
|
||||
# labels:
|
||||
# "vendor.io/my-ng-feature-2": "my-value"
|
||||
# # matchAny implements a logical IF over all elements (sub-matchers) in
|
||||
# # the list (i.e. at least one feature matcher must match)
|
||||
# matchAny:
|
||||
# - matchFeatures:
|
||||
# - feature: kernel.loadedmodule
|
||||
# matchExpressions:
|
||||
# driver-module-X: {op: Exists}
|
||||
# - feature: pci.device
|
||||
# matchExpressions:
|
||||
# vendor: {op: In, value: ["8086"]}
|
||||
# class: {op: In, value: ["0200"]}
|
||||
# - matchFeatures:
|
||||
# - feature: kernel.loadedmodule
|
||||
# matchExpressions:
|
||||
# driver-module-Y: {op: Exists}
|
||||
# - feature: usb.device
|
||||
# matchExpressions:
|
||||
# vendor: {op: In, value: ["8086"]}
|
||||
# class: {op: In, value: ["02"]}
|
||||
#
|
||||
# - name: "avx wildcard rule"
|
||||
# labels:
|
||||
# "my-avx-feature": "true"
|
||||
# matchFeatures:
|
||||
# - feature: cpu.cpuid
|
||||
# matchName: {op: InRegexp, value: ["^AVX512"]}
|
||||
#
|
||||
# # The following features demonstreate label templating capabilities
|
||||
# - name: "my template rule"
|
||||
# labelsTemplate: |
|
||||
# {{ range .system.osrelease }}vendor.io/my-system-feature.{{ .Name }}={{ .Value }}
|
||||
# {{ end }}
|
||||
# matchFeatures:
|
||||
# - feature: system.osrelease
|
||||
# matchExpressions:
|
||||
# ID: {op: InRegexp, value: ["^open.*"]}
|
||||
# VERSION_ID.major: {op: In, value: ["13", "15"]}
|
||||
#
|
||||
# - name: "my template rule 2"
|
||||
# labelsTemplate: |
|
||||
# {{ range .pci.device }}vendor.io/my-pci-device.{{ .class }}-{{ .device }}=with-cpuid
|
||||
# {{ end }}
|
||||
# matchFeatures:
|
||||
# - feature: pci.device
|
||||
# matchExpressions:
|
||||
# class: {op: InRegexp, value: ["^06"]}
|
||||
# vendor: ["8086"]
|
||||
# - feature: cpu.cpuid
|
||||
# matchExpressions:
|
||||
# AVX: {op: Exists}
|
||||
#
|
||||
# # The following examples demonstrate vars field and back-referencing
|
||||
# # previous labels and vars
|
||||
# - name: "my dummy kernel rule"
|
||||
# labels:
|
||||
# "vendor.io/my.kernel.feature": "true"
|
||||
# matchFeatures:
|
||||
# - feature: kernel.version
|
||||
# matchExpressions:
|
||||
# major: {op: Gt, value: ["2"]}
|
||||
#
|
||||
# - name: "my dummy rule with no labels"
|
||||
# vars:
|
||||
# "my.dummy.var": "1"
|
||||
# matchFeatures:
|
||||
# - feature: cpu.cpuid
|
||||
# matchExpressions: {}
|
||||
#
|
||||
# - name: "my rule using backrefs"
|
||||
# labels:
|
||||
# "vendor.io/my.backref.feature": "true"
|
||||
# matchFeatures:
|
||||
# - feature: rule.matched
|
||||
# matchExpressions:
|
||||
# vendor.io/my.kernel.feature: {op: IsTrue}
|
||||
# my.dummy.var: {op: Gt, value: ["0"]}
|
||||
#
|
||||
# - name: "kconfig template rule"
|
||||
# labelsTemplate: |
|
||||
# {{ range .kernel.config }}kconfig-{{ .Name }}={{ .Value }}
|
||||
# {{ end }}
|
||||
# matchFeatures:
|
||||
# - feature: kernel.config
|
||||
# matchName: {op: In, value: ["SWAP", "X86", "ARM"]}
|
||||
### <NFD-WORKER-CONF-END-DO-NOT-REMOVE>
|
||||
|
||||
metricsPort: 8081
|
||||
healthPort: 8082
|
||||
daemonsetAnnotations: {}
|
||||
podSecurityContext: {}
|
||||
# fsGroup: 2000
|
||||
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: [ "ALL" ]
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
# runAsUser: 1000
|
||||
|
||||
livenessProbe:
|
||||
grpc:
|
||||
port: 8082
|
||||
initialDelaySeconds: 10
|
||||
# failureThreshold: 3
|
||||
# periodSeconds: 10
|
||||
readinessProbe:
|
||||
grpc:
|
||||
port: 8082
|
||||
initialDelaySeconds: 5
|
||||
failureThreshold: 10
|
||||
# periodSeconds: 10
|
||||
|
||||
serviceAccount:
|
||||
# Specifies whether a service account should be created.
|
||||
# We create this by default to make it easier for downstream users to apply PodSecurityPolicies.
|
||||
create: true
|
||||
# Annotations to add to the service account
|
||||
annotations: {}
|
||||
# The name of the service account to use.
|
||||
# If not set and create is true, a name is generated using the fullname template
|
||||
name:
|
||||
|
||||
# specify how many old ControllerRevisions for the DaemonSet to retain.
|
||||
revisionHistoryLimit:
|
||||
|
||||
rbac:
|
||||
create: true
|
||||
|
||||
# Allow users to mount the hostPath /usr/src, useful for RHCOS on s390x
|
||||
# Does not work on systems without /usr/src AND a read-only /usr, such as Talos
|
||||
mountUsrSrc: false
|
||||
|
||||
resources:
|
||||
limits:
|
||||
memory: 512Mi
|
||||
requests:
|
||||
cpu: 5m
|
||||
memory: 64Mi
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
annotations: {}
|
||||
|
||||
affinity: {}
|
||||
|
||||
priorityClassName: ""
|
||||
|
||||
topologyUpdater:
|
||||
config: ### <NFD-TOPOLOGY-UPDATER-CONF-START-DO-NOT-REMOVE>
|
||||
## key = node name, value = list of resources to be excluded.
|
||||
## use * to exclude from all nodes.
|
||||
## an example for how the exclude list should looks like
|
||||
#excludeList:
|
||||
# node1: [cpu]
|
||||
# node2: [memory, example/deviceA]
|
||||
# *: [hugepages-2Mi]
|
||||
### <NFD-TOPOLOGY-UPDATER-CONF-END-DO-NOT-REMOVE>
|
||||
|
||||
enable: false
|
||||
createCRDs: false
|
||||
extraEnvs: []
|
||||
hostNetwork: false
|
||||
|
||||
serviceAccount:
|
||||
create: true
|
||||
annotations: {}
|
||||
name:
|
||||
|
||||
# specify how many old ControllerRevisions for the DaemonSet to retain.
|
||||
revisionHistoryLimit:
|
||||
|
||||
rbac:
|
||||
create: true
|
||||
|
||||
metricsPort: 8081
|
||||
healthPort: 8082
|
||||
kubeletConfigPath:
|
||||
kubeletPodResourcesSockPath:
|
||||
updateInterval: 60s
|
||||
watchNamespace: "*"
|
||||
kubeletStateDir: /var/lib/kubelet
|
||||
|
||||
podSecurityContext: {}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: [ "ALL" ]
|
||||
readOnlyRootFilesystem: true
|
||||
runAsUser: 0
|
||||
|
||||
livenessProbe:
|
||||
grpc:
|
||||
port: 8082
|
||||
initialDelaySeconds: 10
|
||||
# failureThreshold: 3
|
||||
# periodSeconds: 10
|
||||
readinessProbe:
|
||||
grpc:
|
||||
port: 8082
|
||||
initialDelaySeconds: 5
|
||||
failureThreshold: 10
|
||||
# periodSeconds: 10
|
||||
|
||||
resources:
|
||||
limits:
|
||||
memory: 60Mi
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 40Mi
|
||||
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
annotations: {}
|
||||
daemonsetAnnotations: {}
|
||||
affinity: {}
|
||||
podSetFingerprint: true
|
||||
|
||||
gc:
|
||||
enable: true
|
||||
extraEnvs: []
|
||||
hostNetwork: false
|
||||
replicaCount: 1
|
||||
|
||||
serviceAccount:
|
||||
create: true
|
||||
annotations: {}
|
||||
name:
|
||||
rbac:
|
||||
create: true
|
||||
|
||||
interval: 1h
|
||||
|
||||
podSecurityContext: {}
|
||||
|
||||
resources:
|
||||
limits:
|
||||
memory: 1Gi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 128Mi
|
||||
|
||||
metricsPort: 8081
|
||||
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
annotations: {}
|
||||
deploymentAnnotations: {}
|
||||
affinity: {}
|
||||
|
||||
# specify how many old ReplicaSets for the Deployment to retain.
|
||||
revisionHistoryLimit:
|
||||
|
||||
# Optionally use encryption for worker <--> master comms
|
||||
# TODO: verify hostname is not yet supported
|
||||
#
|
||||
# If you do not enable certManager (and have it installed) you will
|
||||
# need to manually, or otherwise, provision the TLS certs as secrets
|
||||
tls:
|
||||
enable: false
|
||||
certManager: false
|
||||
certManagerCertificate:
|
||||
issuerKind:
|
||||
issuerName:
|
||||
|
||||
prometheus:
|
||||
enable: false
|
||||
scrapeInterval: 10s
|
||||
labels: {}
|
2384
charts/gpu-operator/crds/nvidia.com_clusterpolicies.yaml
Normal file
2384
charts/gpu-operator/crds/nvidia.com_clusterpolicies.yaml
Normal file
File diff suppressed because it is too large
Load diff
797
charts/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml
Normal file
797
charts/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml
Normal file
|
@ -0,0 +1,797 @@
|
|||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.16.4
|
||||
name: nvidiadrivers.nvidia.com
|
||||
spec:
|
||||
group: nvidia.com
|
||||
names:
|
||||
kind: NVIDIADriver
|
||||
listKind: NVIDIADriverList
|
||||
plural: nvidiadrivers
|
||||
shortNames:
|
||||
- nvd
|
||||
- nvdriver
|
||||
- nvdrivers
|
||||
singular: nvidiadriver
|
||||
scope: Cluster
|
||||
versions:
|
||||
- additionalPrinterColumns:
|
||||
- jsonPath: .status.state
|
||||
name: Status
|
||||
type: string
|
||||
- jsonPath: .metadata.creationTimestamp
|
||||
name: Age
|
||||
type: string
|
||||
name: v1alpha1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: NVIDIADriver is the Schema for the nvidiadrivers API
|
||||
properties:
|
||||
apiVersion:
|
||||
description: |-
|
||||
APIVersion defines the versioned schema of this representation of an object.
|
||||
Servers should convert recognized schemas to the latest internal value, and
|
||||
may reject unrecognized values.
|
||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||||
type: string
|
||||
kind:
|
||||
description: |-
|
||||
Kind is a string value representing the REST resource this object represents.
|
||||
Servers may infer this from the endpoint the client submits requests to.
|
||||
Cannot be updated.
|
||||
In CamelCase.
|
||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: NVIDIADriverSpec defines the desired state of NVIDIADriver
|
||||
properties:
|
||||
annotations:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: |-
|
||||
Optional: Annotations is an unstructured key value map stored with a resource that may be
|
||||
set by external tools to store and retrieve arbitrary metadata. They are not
|
||||
queryable and should be preserved when modifying objects.
|
||||
type: object
|
||||
args:
|
||||
description: 'Optional: List of arguments'
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
certConfig:
|
||||
description: 'Optional: Custom certificates configuration for NVIDIA
|
||||
Driver container'
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
type: object
|
||||
driverType:
|
||||
default: gpu
|
||||
description: DriverType defines NVIDIA driver type
|
||||
enum:
|
||||
- gpu
|
||||
- vgpu
|
||||
- vgpu-host-manager
|
||||
type: string
|
||||
x-kubernetes-validations:
|
||||
- message: driverType is an immutable field. Please create a new NvidiaDriver
|
||||
resource instead when you want to change this setting.
|
||||
rule: self == oldSelf
|
||||
env:
|
||||
description: 'Optional: List of environment variables'
|
||||
items:
|
||||
description: EnvVar represents an environment variable present in
|
||||
a Container.
|
||||
properties:
|
||||
name:
|
||||
description: Name of the environment variable.
|
||||
type: string
|
||||
value:
|
||||
description: Value of the environment variable.
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
type: object
|
||||
type: array
|
||||
gdrcopy:
|
||||
description: GDRCopy defines the spec for GDRCopy driver
|
||||
properties:
|
||||
args:
|
||||
description: 'Optional: List of arguments'
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
enabled:
|
||||
description: Enabled indicates if GDRCopy is enabled through GPU
|
||||
operator
|
||||
type: boolean
|
||||
env:
|
||||
description: 'Optional: List of environment variables'
|
||||
items:
|
||||
description: EnvVar represents an environment variable present
|
||||
in a Container.
|
||||
properties:
|
||||
name:
|
||||
description: Name of the environment variable.
|
||||
type: string
|
||||
value:
|
||||
description: Value of the environment variable.
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
type: object
|
||||
type: array
|
||||
image:
|
||||
description: GDRCopy driver image name
|
||||
pattern: '[a-zA-Z0-9\-]+'
|
||||
type: string
|
||||
imagePullPolicy:
|
||||
description: Image pull policy
|
||||
type: string
|
||||
imagePullSecrets:
|
||||
description: Image pull secrets
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
repository:
|
||||
description: GDRCopy diver image repository
|
||||
type: string
|
||||
version:
|
||||
description: GDRCopy driver image tag
|
||||
type: string
|
||||
type: object
|
||||
gds:
|
||||
description: GPUDirectStorage defines the spec for GDS driver
|
||||
properties:
|
||||
args:
|
||||
description: 'Optional: List of arguments'
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
enabled:
|
||||
description: Enabled indicates if GPUDirect Storage is enabled
|
||||
through GPU operator
|
||||
type: boolean
|
||||
env:
|
||||
description: 'Optional: List of environment variables'
|
||||
items:
|
||||
description: EnvVar represents an environment variable present
|
||||
in a Container.
|
||||
properties:
|
||||
name:
|
||||
description: Name of the environment variable.
|
||||
type: string
|
||||
value:
|
||||
description: Value of the environment variable.
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
type: object
|
||||
type: array
|
||||
image:
|
||||
description: NVIDIA GPUDirect Storage Driver image name
|
||||
pattern: '[a-zA-Z0-9\-]+'
|
||||
type: string
|
||||
imagePullPolicy:
|
||||
description: Image pull policy
|
||||
type: string
|
||||
imagePullSecrets:
|
||||
description: Image pull secrets
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
repository:
|
||||
description: NVIDIA GPUDirect Storage Driver image repository
|
||||
type: string
|
||||
version:
|
||||
description: NVIDIA GPUDirect Storage Driver image tag
|
||||
type: string
|
||||
type: object
|
||||
image:
|
||||
default: nvcr.io/nvidia/driver
|
||||
description: NVIDIA Driver container image name
|
||||
type: string
|
||||
imagePullPolicy:
|
||||
description: Image pull policy
|
||||
type: string
|
||||
imagePullSecrets:
|
||||
description: Image pull secrets
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
kernelModuleConfig:
|
||||
description: 'Optional: Kernel module configuration parameters for
|
||||
the NVIDIA Driver'
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
type: object
|
||||
labels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: |-
|
||||
Optional: Map of string keys and values that can be used to organize and categorize
|
||||
(scope and select) objects. May match selectors of replication controllers
|
||||
and services.
|
||||
type: object
|
||||
licensingConfig:
|
||||
description: 'Optional: Licensing configuration for NVIDIA vGPU licensing'
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
nlsEnabled:
|
||||
description: NLSEnabled indicates if NVIDIA Licensing System is
|
||||
used for licensing.
|
||||
type: boolean
|
||||
type: object
|
||||
livenessProbe:
|
||||
description: NVIDIA Driver container liveness probe settings
|
||||
properties:
|
||||
failureThreshold:
|
||||
description: |-
|
||||
Minimum consecutive failures for the probe to be considered failed after having succeeded.
|
||||
Defaults to 3. Minimum value is 1.
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
initialDelaySeconds:
|
||||
description: |-
|
||||
Number of seconds after the container has started before liveness probes are initiated.
|
||||
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||||
format: int32
|
||||
type: integer
|
||||
periodSeconds:
|
||||
description: |-
|
||||
How often (in seconds) to perform the probe.
|
||||
Default to 10 seconds. Minimum value is 1.
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
successThreshold:
|
||||
description: |-
|
||||
Minimum consecutive successes for the probe to be considered successful after having failed.
|
||||
Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
timeoutSeconds:
|
||||
description: |-
|
||||
Number of seconds after which the probe times out.
|
||||
Defaults to 1 second. Minimum value is 1.
|
||||
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
type: object
|
||||
manager:
|
||||
description: Manager represents configuration for NVIDIA Driver Manager
|
||||
initContainer
|
||||
properties:
|
||||
env:
|
||||
description: 'Optional: List of environment variables'
|
||||
items:
|
||||
description: EnvVar represents an environment variable present
|
||||
in a Container.
|
||||
properties:
|
||||
name:
|
||||
description: Name of the environment variable.
|
||||
type: string
|
||||
value:
|
||||
description: Value of the environment variable.
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
type: object
|
||||
type: array
|
||||
image:
|
||||
description: Image represents NVIDIA Driver Manager image name
|
||||
pattern: '[a-zA-Z0-9\-]+'
|
||||
type: string
|
||||
imagePullPolicy:
|
||||
description: Image pull policy
|
||||
type: string
|
||||
imagePullSecrets:
|
||||
description: Image pull secrets
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
repository:
|
||||
description: Repository represents Driver Managerrepository path
|
||||
type: string
|
||||
version:
|
||||
description: Version represents NVIDIA Driver Manager image tag(version)
|
||||
type: string
|
||||
type: object
|
||||
nodeAffinity:
|
||||
description: Affinity specifies node affinity rules for driver pods
|
||||
properties:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
description: |-
|
||||
The scheduler will prefer to schedule pods to nodes that satisfy
|
||||
the affinity expressions specified by this field, but it may choose
|
||||
a node that violates one or more of the expressions. The node that is
|
||||
most preferred is the one with the greatest sum of weights, i.e.
|
||||
for each node that meets all of the scheduling requirements (resource
|
||||
request, requiredDuringScheduling affinity expressions, etc.),
|
||||
compute a sum by iterating through the elements of this field and adding
|
||||
"weight" to the sum if the node matches the corresponding matchExpressions; the
|
||||
node(s) with the highest sum are the most preferred.
|
||||
items:
|
||||
description: |-
|
||||
An empty preferred scheduling term matches all objects with implicit weight 0
|
||||
(i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op).
|
||||
properties:
|
||||
preference:
|
||||
description: A node selector term, associated with the corresponding
|
||||
weight.
|
||||
properties:
|
||||
matchExpressions:
|
||||
description: A list of node selector requirements by
|
||||
node's labels.
|
||||
items:
|
||||
description: |-
|
||||
A node selector requirement is a selector that contains values, a key, and an operator
|
||||
that relates the key and values.
|
||||
properties:
|
||||
key:
|
||||
description: The label key that the selector applies
|
||||
to.
|
||||
type: string
|
||||
operator:
|
||||
description: |-
|
||||
Represents a key's relationship to a set of values.
|
||||
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
|
||||
type: string
|
||||
values:
|
||||
description: |-
|
||||
An array of string values. If the operator is In or NotIn,
|
||||
the values array must be non-empty. If the operator is Exists or DoesNotExist,
|
||||
the values array must be empty. If the operator is Gt or Lt, the values
|
||||
array must have a single element, which will be interpreted as an integer.
|
||||
This array is replaced during a strategic merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
required:
|
||||
- key
|
||||
- operator
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
matchFields:
|
||||
description: A list of node selector requirements by
|
||||
node's fields.
|
||||
items:
|
||||
description: |-
|
||||
A node selector requirement is a selector that contains values, a key, and an operator
|
||||
that relates the key and values.
|
||||
properties:
|
||||
key:
|
||||
description: The label key that the selector applies
|
||||
to.
|
||||
type: string
|
||||
operator:
|
||||
description: |-
|
||||
Represents a key's relationship to a set of values.
|
||||
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
|
||||
type: string
|
||||
values:
|
||||
description: |-
|
||||
An array of string values. If the operator is In or NotIn,
|
||||
the values array must be non-empty. If the operator is Exists or DoesNotExist,
|
||||
the values array must be empty. If the operator is Gt or Lt, the values
|
||||
array must have a single element, which will be interpreted as an integer.
|
||||
This array is replaced during a strategic merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
required:
|
||||
- key
|
||||
- operator
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
weight:
|
||||
description: Weight associated with matching the corresponding
|
||||
nodeSelectorTerm, in the range 1-100.
|
||||
format: int32
|
||||
type: integer
|
||||
required:
|
||||
- preference
|
||||
- weight
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
description: |-
|
||||
If the affinity requirements specified by this field are not met at
|
||||
scheduling time, the pod will not be scheduled onto the node.
|
||||
If the affinity requirements specified by this field cease to be met
|
||||
at some point during pod execution (e.g. due to an update), the system
|
||||
may or may not try to eventually evict the pod from its node.
|
||||
properties:
|
||||
nodeSelectorTerms:
|
||||
description: Required. A list of node selector terms. The
|
||||
terms are ORed.
|
||||
items:
|
||||
description: |-
|
||||
A null or empty node selector term matches no objects. The requirements of
|
||||
them are ANDed.
|
||||
The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
|
||||
properties:
|
||||
matchExpressions:
|
||||
description: A list of node selector requirements by
|
||||
node's labels.
|
||||
items:
|
||||
description: |-
|
||||
A node selector requirement is a selector that contains values, a key, and an operator
|
||||
that relates the key and values.
|
||||
properties:
|
||||
key:
|
||||
description: The label key that the selector applies
|
||||
to.
|
||||
type: string
|
||||
operator:
|
||||
description: |-
|
||||
Represents a key's relationship to a set of values.
|
||||
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
|
||||
type: string
|
||||
values:
|
||||
description: |-
|
||||
An array of string values. If the operator is In or NotIn,
|
||||
the values array must be non-empty. If the operator is Exists or DoesNotExist,
|
||||
the values array must be empty. If the operator is Gt or Lt, the values
|
||||
array must have a single element, which will be interpreted as an integer.
|
||||
This array is replaced during a strategic merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
required:
|
||||
- key
|
||||
- operator
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
matchFields:
|
||||
description: A list of node selector requirements by
|
||||
node's fields.
|
||||
items:
|
||||
description: |-
|
||||
A node selector requirement is a selector that contains values, a key, and an operator
|
||||
that relates the key and values.
|
||||
properties:
|
||||
key:
|
||||
description: The label key that the selector applies
|
||||
to.
|
||||
type: string
|
||||
operator:
|
||||
description: |-
|
||||
Represents a key's relationship to a set of values.
|
||||
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
|
||||
type: string
|
||||
values:
|
||||
description: |-
|
||||
An array of string values. If the operator is In or NotIn,
|
||||
the values array must be non-empty. If the operator is Exists or DoesNotExist,
|
||||
the values array must be empty. If the operator is Gt or Lt, the values
|
||||
array must have a single element, which will be interpreted as an integer.
|
||||
This array is replaced during a strategic merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
required:
|
||||
- key
|
||||
- operator
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
required:
|
||||
- nodeSelectorTerms
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
nodeSelector:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: NodeSelector specifies a selector for installation of
|
||||
NVIDIA driver
|
||||
type: object
|
||||
priorityClassName:
|
||||
description: 'Optional: Set priorityClassName'
|
||||
type: string
|
||||
rdma:
|
||||
description: GPUDirectRDMA defines the spec for NVIDIA Peer Memory
|
||||
driver
|
||||
properties:
|
||||
enabled:
|
||||
description: Enabled indicates if GPUDirect RDMA is enabled through
|
||||
GPU operator
|
||||
type: boolean
|
||||
useHostMofed:
|
||||
description: UseHostMOFED indicates to use MOFED drivers directly
|
||||
installed on the host to enable GPUDirect RDMA
|
||||
type: boolean
|
||||
type: object
|
||||
readinessProbe:
|
||||
description: NVIDIA Driver container readiness probe settings
|
||||
properties:
|
||||
failureThreshold:
|
||||
description: |-
|
||||
Minimum consecutive failures for the probe to be considered failed after having succeeded.
|
||||
Defaults to 3. Minimum value is 1.
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
initialDelaySeconds:
|
||||
description: |-
|
||||
Number of seconds after the container has started before liveness probes are initiated.
|
||||
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||||
format: int32
|
||||
type: integer
|
||||
periodSeconds:
|
||||
description: |-
|
||||
How often (in seconds) to perform the probe.
|
||||
Default to 10 seconds. Minimum value is 1.
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
successThreshold:
|
||||
description: |-
|
||||
Minimum consecutive successes for the probe to be considered successful after having failed.
|
||||
Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
timeoutSeconds:
|
||||
description: |-
|
||||
Number of seconds after which the probe times out.
|
||||
Defaults to 1 second. Minimum value is 1.
|
||||
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
type: object
|
||||
repoConfig:
|
||||
description: 'Optional: Custom repo configuration for NVIDIA Driver
|
||||
container'
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
type: object
|
||||
repository:
|
||||
description: NVIDIA Driver repository
|
||||
type: string
|
||||
resources:
|
||||
description: 'Optional: Define resources requests and limits for each
|
||||
pod'
|
||||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: |-
|
||||
Limits describes the maximum amount of compute resources allowed.
|
||||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: |-
|
||||
Requests describes the minimum amount of compute resources required.
|
||||
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
|
||||
otherwise to an implementation-defined value. Requests cannot exceed Limits.
|
||||
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||||
type: object
|
||||
type: object
|
||||
startupProbe:
|
||||
description: NVIDIA Driver container startup probe settings
|
||||
properties:
|
||||
failureThreshold:
|
||||
description: |-
|
||||
Minimum consecutive failures for the probe to be considered failed after having succeeded.
|
||||
Defaults to 3. Minimum value is 1.
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
initialDelaySeconds:
|
||||
description: |-
|
||||
Number of seconds after the container has started before liveness probes are initiated.
|
||||
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||||
format: int32
|
||||
type: integer
|
||||
periodSeconds:
|
||||
description: |-
|
||||
How often (in seconds) to perform the probe.
|
||||
Default to 10 seconds. Minimum value is 1.
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
successThreshold:
|
||||
description: |-
|
||||
Minimum consecutive successes for the probe to be considered successful after having failed.
|
||||
Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
timeoutSeconds:
|
||||
description: |-
|
||||
Number of seconds after which the probe times out.
|
||||
Defaults to 1 second. Minimum value is 1.
|
||||
More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
type: object
|
||||
tolerations:
|
||||
description: 'Optional: Set tolerations'
|
||||
items:
|
||||
description: |-
|
||||
The pod this Toleration is attached to tolerates any taint that matches
|
||||
the triple <key,value,effect> using the matching operator <operator>.
|
||||
properties:
|
||||
effect:
|
||||
description: |-
|
||||
Effect indicates the taint effect to match. Empty means match all taint effects.
|
||||
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
|
||||
type: string
|
||||
key:
|
||||
description: |-
|
||||
Key is the taint key that the toleration applies to. Empty means match all taint keys.
|
||||
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
|
||||
type: string
|
||||
operator:
|
||||
description: |-
|
||||
Operator represents a key's relationship to the value.
|
||||
Valid operators are Exists and Equal. Defaults to Equal.
|
||||
Exists is equivalent to wildcard for value, so that a pod can
|
||||
tolerate all taints of a particular category.
|
||||
type: string
|
||||
tolerationSeconds:
|
||||
description: |-
|
||||
TolerationSeconds represents the period of time the toleration (which must be
|
||||
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
|
||||
it is not set, which means tolerate the taint forever (do not evict). Zero and
|
||||
negative values will be treated as 0 (evict immediately) by the system.
|
||||
format: int64
|
||||
type: integer
|
||||
value:
|
||||
description: |-
|
||||
Value is the taint value the toleration matches to.
|
||||
If the operator is Exists, the value should be empty, otherwise just a regular string.
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
useOpenKernelModules:
|
||||
description: UseOpenKernelModules indicates if the open GPU kernel
|
||||
modules should be used
|
||||
type: boolean
|
||||
usePrecompiled:
|
||||
description: UsePrecompiled indicates if deployment of NVIDIA Driver
|
||||
using pre-compiled modules is enabled
|
||||
type: boolean
|
||||
x-kubernetes-validations:
|
||||
- message: usePrecompiled is an immutable field. Please create a new
|
||||
NvidiaDriver resource instead when you want to change this setting.
|
||||
rule: self == oldSelf
|
||||
version:
|
||||
description: NVIDIA Driver version (or just branch for precompiled
|
||||
drivers)
|
||||
type: string
|
||||
virtualTopologyConfig:
|
||||
description: 'Optional: Virtual Topology Daemon configuration for
|
||||
NVIDIA vGPU drivers'
|
||||
properties:
|
||||
name:
|
||||
description: 'Optional: Config name representing virtual topology
|
||||
daemon configuration file nvidia-topologyd.conf'
|
||||
type: string
|
||||
type: object
|
||||
required:
|
||||
- driverType
|
||||
- image
|
||||
type: object
|
||||
status:
|
||||
description: NVIDIADriverStatus defines the observed state of NVIDIADriver
|
||||
properties:
|
||||
conditions:
|
||||
description: Conditions is a list of conditions representing the NVIDIADriver's
|
||||
current state.
|
||||
items:
|
||||
description: Condition contains details for one aspect of the current
|
||||
state of this API Resource.
|
||||
properties:
|
||||
lastTransitionTime:
|
||||
description: |-
|
||||
lastTransitionTime is the last time the condition transitioned from one status to another.
|
||||
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
|
||||
format: date-time
|
||||
type: string
|
||||
message:
|
||||
description: |-
|
||||
message is a human readable message indicating details about the transition.
|
||||
This may be an empty string.
|
||||
maxLength: 32768
|
||||
type: string
|
||||
observedGeneration:
|
||||
description: |-
|
||||
observedGeneration represents the .metadata.generation that the condition was set based upon.
|
||||
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
|
||||
with respect to the current state of the instance.
|
||||
format: int64
|
||||
minimum: 0
|
||||
type: integer
|
||||
reason:
|
||||
description: |-
|
||||
reason contains a programmatic identifier indicating the reason for the condition's last transition.
|
||||
Producers of specific condition types may define expected values and meanings for this field,
|
||||
and whether the values are considered a guaranteed API.
|
||||
The value should be a CamelCase string.
|
||||
This field may not be empty.
|
||||
maxLength: 1024
|
||||
minLength: 1
|
||||
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
|
||||
type: string
|
||||
status:
|
||||
description: status of the condition, one of True, False, Unknown.
|
||||
enum:
|
||||
- "True"
|
||||
- "False"
|
||||
- Unknown
|
||||
type: string
|
||||
type:
|
||||
description: type of condition in CamelCase or in foo.example.com/CamelCase.
|
||||
maxLength: 316
|
||||
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
|
||||
type: string
|
||||
required:
|
||||
- lastTransitionTime
|
||||
- message
|
||||
- reason
|
||||
- status
|
||||
- type
|
||||
type: object
|
||||
type: array
|
||||
namespace:
|
||||
description: Namespace indicates a namespace in which the operator
|
||||
and driver are installed
|
||||
type: string
|
||||
state:
|
||||
description: |-
|
||||
INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
|
||||
Important: Run "make" to regenerate code after modifying this file
|
||||
State indicates status of NVIDIADriver instance
|
||||
enum:
|
||||
- ignored
|
||||
- ready
|
||||
- notReady
|
||||
type: string
|
||||
required:
|
||||
- state
|
||||
type: object
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
||||
subresources:
|
||||
status: {}
|
80
charts/gpu-operator/templates/_helpers.tpl
Normal file
80
charts/gpu-operator/templates/_helpers.tpl
Normal file
|
@ -0,0 +1,80 @@
|
|||
{{/* vim: set filetype=mustache: */}}
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "gpu-operator.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "gpu-operator.fullname" -}}
|
||||
{{- if .Values.fullnameOverride -}}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
|
||||
{{- else -}}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride -}}
|
||||
{{- if contains $name .Release.Name -}}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
|
||||
{{- else -}}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "gpu-operator.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
|
||||
{{- define "gpu-operator.labels" -}}
|
||||
app.kubernetes.io/name: {{ include "gpu-operator.name" . }}
|
||||
helm.sh/chart: {{ include "gpu-operator.chart" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- if .Values.operator.labels }}
|
||||
{{ toYaml .Values.operator.labels }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
|
||||
{{- define "gpu-operator.operand-labels" -}}
|
||||
helm.sh/chart: {{ include "gpu-operator.chart" . }}
|
||||
app.kubernetes.io/managed-by: {{ include "gpu-operator.name" . }}
|
||||
{{- if .Values.daemonsets.labels }}
|
||||
{{ toYaml .Values.daemonsets.labels }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
|
||||
{{- define "gpu-operator.matchLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "gpu-operator.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Full image name with tag
|
||||
*/}}
|
||||
{{- define "gpu-operator.fullimage" -}}
|
||||
{{- .Values.operator.repository -}}/{{- .Values.operator.image -}}:{{- .Values.operator.version | default .Chart.AppVersion -}}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Full image name with tag
|
||||
*/}}
|
||||
{{- define "driver-manager.fullimage" -}}
|
||||
{{- .Values.driver.manager.repository -}}/{{- .Values.driver.manager.image -}}:{{- .Values.driver.manager.version -}}
|
||||
{{- end }}
|
45
charts/gpu-operator/templates/cleanup_crd.yaml
Normal file
45
charts/gpu-operator/templates/cleanup_crd.yaml
Normal file
|
@ -0,0 +1,45 @@
|
|||
{{- if .Values.operator.cleanupCRD }}
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: gpu-operator-cleanup-crd
|
||||
namespace: {{ .Release.Namespace }}
|
||||
annotations:
|
||||
"helm.sh/hook": pre-delete
|
||||
"helm.sh/hook-weight": "1"
|
||||
"helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
name: gpu-operator-cleanup-crd
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 8 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
spec:
|
||||
serviceAccountName: gpu-operator
|
||||
{{- if .Values.operator.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- range .Values.operator.imagePullSecrets }}
|
||||
- name: {{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- with .Values.operator.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: cleanup-crd
|
||||
image: {{ include "gpu-operator.fullimage" . }}
|
||||
imagePullPolicy: {{ .Values.operator.imagePullPolicy }}
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- >
|
||||
kubectl delete clusterpolicy cluster-policy;
|
||||
kubectl delete crd clusterpolicies.nvidia.com;
|
||||
|
||||
restartPolicy: OnFailure
|
||||
{{- end }}
|
683
charts/gpu-operator/templates/clusterpolicy.yaml
Normal file
683
charts/gpu-operator/templates/clusterpolicy.yaml
Normal file
|
@ -0,0 +1,683 @@
|
|||
apiVersion: nvidia.com/v1
|
||||
kind: ClusterPolicy
|
||||
metadata:
|
||||
name: cluster-policy
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
{{- if .Values.operator.cleanupCRD }}
|
||||
# CR cleanup is handled during pre-delete hook
|
||||
# Add below annotation so that helm doesn't attempt to cleanup CR twice
|
||||
annotations:
|
||||
"helm.sh/resource-policy": keep
|
||||
{{- end }}
|
||||
spec:
|
||||
hostPaths:
|
||||
rootFS: {{ .Values.hostPaths.rootFS }}
|
||||
driverInstallDir: {{ .Values.hostPaths.driverInstallDir }}
|
||||
operator:
|
||||
{{- if .Values.operator.defaultRuntime }}
|
||||
defaultRuntime: {{ .Values.operator.defaultRuntime }}
|
||||
{{- end }}
|
||||
{{- if .Values.operator.runtimeClass }}
|
||||
runtimeClass: {{ .Values.operator.runtimeClass }}
|
||||
{{- end }}
|
||||
{{- if .Values.operator.defaultGPUMode }}
|
||||
defaultGPUMode: {{ .Values.operator.defaultGPUMode }}
|
||||
{{- end }}
|
||||
{{- if .Values.operator.initContainer }}
|
||||
initContainer:
|
||||
{{- if .Values.operator.initContainer.repository }}
|
||||
repository: {{ .Values.operator.initContainer.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.operator.initContainer.image }}
|
||||
image: {{ .Values.operator.initContainer.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.operator.initContainer.version }}
|
||||
version: {{ .Values.operator.initContainer.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.operator.initContainer.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.operator.initContainer.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.operator.initContainer.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.operator.initContainer.imagePullSecrets | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.operator.use_ocp_driver_toolkit }}
|
||||
use_ocp_driver_toolkit: {{ .Values.operator.use_ocp_driver_toolkit }}
|
||||
{{- end }}
|
||||
daemonsets:
|
||||
labels:
|
||||
{{- include "gpu-operator.operand-labels" . | nindent 6 }}
|
||||
{{- if .Values.daemonsets.annotations }}
|
||||
annotations: {{ toYaml .Values.daemonsets.annotations | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.daemonsets.tolerations }}
|
||||
tolerations: {{ toYaml .Values.daemonsets.tolerations | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.daemonsets.priorityClassName }}
|
||||
priorityClassName: {{ .Values.daemonsets.priorityClassName }}
|
||||
{{- end }}
|
||||
{{- if .Values.daemonsets.updateStrategy }}
|
||||
updateStrategy: {{ .Values.daemonsets.updateStrategy }}
|
||||
{{- end }}
|
||||
{{- if .Values.daemonsets.rollingUpdate }}
|
||||
rollingUpdate:
|
||||
maxUnavailable: {{ .Values.daemonsets.rollingUpdate.maxUnavailable | quote }}
|
||||
{{- end }}
|
||||
validator:
|
||||
{{- if .Values.validator.repository }}
|
||||
repository: {{ .Values.validator.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.validator.image }}
|
||||
image: {{ .Values.validator.image }}
|
||||
{{- end }}
|
||||
version: {{ .Values.validator.version | default .Chart.AppVersion | quote }}
|
||||
{{- if .Values.validator.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.validator.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.validator.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.validator.imagePullSecrets | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if .Values.validator.resources }}
|
||||
resources: {{ toYaml .Values.validator.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.validator.env }}
|
||||
env: {{ toYaml .Values.validator.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.validator.args }}
|
||||
args: {{ toYaml .Values.validator.args | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.validator.plugin }}
|
||||
plugin:
|
||||
{{- if .Values.validator.plugin.env }}
|
||||
env: {{ toYaml .Values.validator.plugin.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.validator.cuda }}
|
||||
cuda:
|
||||
{{- if .Values.validator.cuda.env }}
|
||||
env: {{ toYaml .Values.validator.cuda.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.validator.driver }}
|
||||
driver:
|
||||
{{- if .Values.validator.driver.env }}
|
||||
env: {{ toYaml .Values.validator.driver.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.validator.toolkit }}
|
||||
toolkit:
|
||||
{{- if .Values.validator.toolkit.env }}
|
||||
env: {{ toYaml .Values.validator.toolkit.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.validator.vfioPCI }}
|
||||
vfioPCI:
|
||||
{{- if .Values.validator.vfioPCI.env }}
|
||||
env: {{ toYaml .Values.validator.vfioPCI.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.validator.vgpuManager }}
|
||||
vgpuManager:
|
||||
{{- if .Values.validator.vgpuManager.env }}
|
||||
env: {{ toYaml .Values.validator.vgpuManager.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.validator.vgpuDevices }}
|
||||
vgpuDevices:
|
||||
{{- if .Values.validator.vgpuDevices.env }}
|
||||
env: {{ toYaml .Values.validator.vgpuDevices.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
mig:
|
||||
{{- if .Values.mig.strategy }}
|
||||
strategy: {{ .Values.mig.strategy }}
|
||||
{{- end }}
|
||||
psa:
|
||||
enabled: {{ .Values.psa.enabled }}
|
||||
cdi:
|
||||
enabled: {{ .Values.cdi.enabled }}
|
||||
default: {{ .Values.cdi.default }}
|
||||
driver:
|
||||
enabled: {{ .Values.driver.enabled }}
|
||||
useNvidiaDriverCRD: {{ .Values.driver.nvidiaDriverCRD.enabled }}
|
||||
useOpenKernelModules: {{ .Values.driver.useOpenKernelModules }}
|
||||
usePrecompiled: {{ .Values.driver.usePrecompiled }}
|
||||
{{- if .Values.driver.repository }}
|
||||
repository: {{ .Values.driver.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.image }}
|
||||
image: {{ .Values.driver.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.version }}
|
||||
version: {{ .Values.driver.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.driver.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.driver.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.startupProbe }}
|
||||
startupProbe: {{ toYaml .Values.driver.startupProbe | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.livenessProbe }}
|
||||
livenessProbe: {{ toYaml .Values.driver.livenessProbe | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.readinessProbe }}
|
||||
readinessProbe: {{ toYaml .Values.driver.readinessProbe | nindent 6 }}
|
||||
{{- end }}
|
||||
rdma:
|
||||
enabled: {{ .Values.driver.rdma.enabled }}
|
||||
useHostMofed: {{ .Values.driver.rdma.useHostMofed }}
|
||||
manager:
|
||||
{{- if .Values.driver.manager.repository }}
|
||||
repository: {{ .Values.driver.manager.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.manager.image }}
|
||||
image: {{ .Values.driver.manager.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.manager.version }}
|
||||
version: {{ .Values.driver.manager.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.manager.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.driver.manager.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.manager.env }}
|
||||
env: {{ toYaml .Values.driver.manager.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.repoConfig }}
|
||||
repoConfig: {{ toYaml .Values.driver.repoConfig | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.certConfig }}
|
||||
certConfig: {{ toYaml .Values.driver.certConfig | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.licensingConfig }}
|
||||
licensingConfig: {{ toYaml .Values.driver.licensingConfig | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.virtualTopology }}
|
||||
virtualTopology: {{ toYaml .Values.driver.virtualTopology | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.kernelModuleConfig }}
|
||||
kernelModuleConfig: {{ toYaml .Values.driver.kernelModuleConfig | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.resources }}
|
||||
resources: {{ toYaml .Values.driver.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.env }}
|
||||
env: {{ toYaml .Values.driver.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.args }}
|
||||
args: {{ toYaml .Values.driver.args | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.upgradePolicy }}
|
||||
upgradePolicy:
|
||||
autoUpgrade: {{ .Values.driver.upgradePolicy.autoUpgrade | default false }}
|
||||
maxParallelUpgrades: {{ .Values.driver.upgradePolicy.maxParallelUpgrades | default 0 }}
|
||||
maxUnavailable : {{ .Values.driver.upgradePolicy.maxUnavailable | default "25%" }}
|
||||
waitForCompletion:
|
||||
timeoutSeconds: {{ .Values.driver.upgradePolicy.waitForCompletion.timeoutSeconds }}
|
||||
{{- if .Values.driver.upgradePolicy.waitForCompletion.podSelector }}
|
||||
podSelector: {{ .Values.driver.upgradePolicy.waitForCompletion.podSelector }}
|
||||
{{- end }}
|
||||
podDeletion:
|
||||
force: {{ .Values.driver.upgradePolicy.gpuPodDeletion.force | default false }}
|
||||
timeoutSeconds: {{ .Values.driver.upgradePolicy.gpuPodDeletion.timeoutSeconds }}
|
||||
deleteEmptyDir: {{ .Values.driver.upgradePolicy.gpuPodDeletion.deleteEmptyDir | default false }}
|
||||
drain:
|
||||
enable: {{ .Values.driver.upgradePolicy.drain.enable | default false }}
|
||||
force: {{ .Values.driver.upgradePolicy.drain.force | default false }}
|
||||
{{- if .Values.driver.upgradePolicy.drain.podSelector }}
|
||||
podSelector: {{ .Values.driver.upgradePolicy.drain.podSelector }}
|
||||
{{- end }}
|
||||
timeoutSeconds: {{ .Values.driver.upgradePolicy.drain.timeoutSeconds }}
|
||||
deleteEmptyDir: {{ .Values.driver.upgradePolicy.drain.deleteEmptyDir | default false}}
|
||||
{{- end }}
|
||||
vgpuManager:
|
||||
enabled: {{ .Values.vgpuManager.enabled }}
|
||||
{{- if .Values.vgpuManager.repository }}
|
||||
repository: {{ .Values.vgpuManager.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuManager.image }}
|
||||
image: {{ .Values.vgpuManager.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuManager.version }}
|
||||
version: {{ .Values.vgpuManager.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuManager.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.vgpuManager.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuManager.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.vgpuManager.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuManager.resources }}
|
||||
resources: {{ toYaml .Values.vgpuManager.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuManager.env }}
|
||||
env: {{ toYaml .Values.vgpuManager.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuManager.args }}
|
||||
args: {{ toYaml .Values.vgpuManager.args | nindent 6 }}
|
||||
{{- end }}
|
||||
driverManager:
|
||||
{{- if .Values.vgpuManager.driverManager.repository }}
|
||||
repository: {{ .Values.vgpuManager.driverManager.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuManager.driverManager.image }}
|
||||
image: {{ .Values.vgpuManager.driverManager.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuManager.driverManager.version }}
|
||||
version: {{ .Values.vgpuManager.driverManager.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuManager.driverManager.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.vgpuManager.driverManager.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuManager.driverManager.env }}
|
||||
env: {{ toYaml .Values.vgpuManager.driverManager.env | nindent 8 }}
|
||||
{{- end }}
|
||||
kataManager:
|
||||
enabled: {{ .Values.kataManager.enabled }}
|
||||
config: {{ toYaml .Values.kataManager.config | nindent 6 }}
|
||||
{{- if .Values.kataManager.repository }}
|
||||
repository: {{ .Values.kataManager.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.kataManager.image }}
|
||||
image: {{ .Values.kataManager.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.kataManager.version }}
|
||||
version: {{ .Values.kataManager.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.kataManager.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.kataManager.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.kataManager.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.kataManager.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.kataManager.resources }}
|
||||
resources: {{ toYaml .Values.kataManager.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.kataManager.env }}
|
||||
env: {{ toYaml .Values.kataManager.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.kataManager.args }}
|
||||
args: {{ toYaml .Values.kataManager.args | nindent 6 }}
|
||||
{{- end }}
|
||||
vfioManager:
|
||||
enabled: {{ .Values.vfioManager.enabled }}
|
||||
{{- if .Values.vfioManager.repository }}
|
||||
repository: {{ .Values.vfioManager.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.vfioManager.image }}
|
||||
image: {{ .Values.vfioManager.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.vfioManager.version }}
|
||||
version: {{ .Values.vfioManager.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.vfioManager.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.vfioManager.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.vfioManager.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.vfioManager.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.vfioManager.resources }}
|
||||
resources: {{ toYaml .Values.vfioManager.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.vfioManager.env }}
|
||||
env: {{ toYaml .Values.vfioManager.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.vfioManager.args }}
|
||||
args: {{ toYaml .Values.vfioManager.args | nindent 6 }}
|
||||
{{- end }}
|
||||
driverManager:
|
||||
{{- if .Values.vfioManager.driverManager.repository }}
|
||||
repository: {{ .Values.vfioManager.driverManager.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.vfioManager.driverManager.image }}
|
||||
image: {{ .Values.vfioManager.driverManager.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.vfioManager.driverManager.version }}
|
||||
version: {{ .Values.vfioManager.driverManager.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.vfioManager.driverManager.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.vfioManager.driverManager.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.vfioManager.driverManager.env }}
|
||||
env: {{ toYaml .Values.vfioManager.driverManager.env | nindent 8 }}
|
||||
{{- end }}
|
||||
vgpuDeviceManager:
|
||||
enabled: {{ .Values.vgpuDeviceManager.enabled }}
|
||||
{{- if .Values.vgpuDeviceManager.repository }}
|
||||
repository: {{ .Values.vgpuDeviceManager.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuDeviceManager.image }}
|
||||
image: {{ .Values.vgpuDeviceManager.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuDeviceManager.version }}
|
||||
version: {{ .Values.vgpuDeviceManager.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuDeviceManager.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.vgpuDeviceManager.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuDeviceManager.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.vgpuDeviceManager.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuDeviceManager.resources }}
|
||||
resources: {{ toYaml .Values.vgpuDeviceManager.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuDeviceManager.env }}
|
||||
env: {{ toYaml .Values.vgpuDeviceManager.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuDeviceManager.args }}
|
||||
args: {{ toYaml .Values.vgpuDeviceManager.args | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.vgpuDeviceManager.config }}
|
||||
config: {{ toYaml .Values.vgpuDeviceManager.config | nindent 6 }}
|
||||
{{- end }}
|
||||
ccManager:
|
||||
enabled: {{ .Values.ccManager.enabled }}
|
||||
defaultMode: {{ .Values.ccManager.defaultMode | quote }}
|
||||
{{- if .Values.ccManager.repository }}
|
||||
repository: {{ .Values.ccManager.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.ccManager.image }}
|
||||
image: {{ .Values.ccManager.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.ccManager.version }}
|
||||
version: {{ .Values.ccManager.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.ccManager.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.ccManager.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.ccManager.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.ccManager.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.ccManager.resources }}
|
||||
resources: {{ toYaml .Values.ccManager.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.ccManager.env }}
|
||||
env: {{ toYaml .Values.vfioManager.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.ccManager.args }}
|
||||
args: {{ toYaml .Values.ccManager.args | nindent 6 }}
|
||||
{{- end }}
|
||||
toolkit:
|
||||
enabled: {{ .Values.toolkit.enabled }}
|
||||
{{- if .Values.toolkit.repository }}
|
||||
repository: {{ .Values.toolkit.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.toolkit.image }}
|
||||
image: {{ .Values.toolkit.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.toolkit.version }}
|
||||
version: {{ .Values.toolkit.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.toolkit.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.toolkit.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.toolkit.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.toolkit.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.toolkit.resources }}
|
||||
resources: {{ toYaml .Values.toolkit.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.toolkit.env }}
|
||||
env: {{ toYaml .Values.toolkit.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.toolkit.installDir }}
|
||||
installDir: {{ .Values.toolkit.installDir }}
|
||||
{{- end }}
|
||||
devicePlugin:
|
||||
enabled: {{ .Values.devicePlugin.enabled }}
|
||||
{{- if .Values.devicePlugin.repository }}
|
||||
repository: {{ .Values.devicePlugin.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.devicePlugin.image }}
|
||||
image: {{ .Values.devicePlugin.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.devicePlugin.version }}
|
||||
version: {{ .Values.devicePlugin.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.devicePlugin.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.devicePlugin.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.devicePlugin.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.devicePlugin.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.devicePlugin.resources }}
|
||||
resources: {{ toYaml .Values.devicePlugin.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.devicePlugin.env }}
|
||||
env: {{ toYaml .Values.devicePlugin.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.devicePlugin.args }}
|
||||
args: {{ toYaml .Values.devicePlugin.args | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.devicePlugin.config.name }}
|
||||
config:
|
||||
name: {{ .Values.devicePlugin.config.name }}
|
||||
default: {{ .Values.devicePlugin.config.default }}
|
||||
{{- end }}
|
||||
dcgm:
|
||||
enabled: {{ .Values.dcgm.enabled }}
|
||||
{{- if .Values.dcgm.repository }}
|
||||
repository: {{ .Values.dcgm.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgm.image }}
|
||||
image: {{ .Values.dcgm.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgm.version }}
|
||||
version: {{ .Values.dcgm.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgm.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.dcgm.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgm.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.dcgm.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgm.resources }}
|
||||
resources: {{ toYaml .Values.dcgm.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgm.env }}
|
||||
env: {{ toYaml .Values.dcgm.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgm.args }}
|
||||
args: {{ toYaml .Values.dcgm.args | nindent 6 }}
|
||||
{{- end }}
|
||||
dcgmExporter:
|
||||
enabled: {{ .Values.dcgmExporter.enabled }}
|
||||
{{- if .Values.dcgmExporter.repository }}
|
||||
repository: {{ .Values.dcgmExporter.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgmExporter.image }}
|
||||
image: {{ .Values.dcgmExporter.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgmExporter.version }}
|
||||
version: {{ .Values.dcgmExporter.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgmExporter.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.dcgmExporter.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgmExporter.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.dcgmExporter.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgmExporter.resources }}
|
||||
resources: {{ toYaml .Values.dcgmExporter.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgmExporter.env }}
|
||||
env: {{ toYaml .Values.dcgmExporter.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgmExporter.args }}
|
||||
args: {{ toYaml .Values.dcgmExporter.args | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if and (.Values.dcgmExporter.config) (.Values.dcgmExporter.config.name) }}
|
||||
config:
|
||||
name: {{ .Values.dcgmExporter.config.name }}
|
||||
{{- end }}
|
||||
{{- if .Values.dcgmExporter.serviceMonitor }}
|
||||
serviceMonitor: {{ toYaml .Values.dcgmExporter.serviceMonitor | nindent 6 }}
|
||||
{{- end }}
|
||||
gfd:
|
||||
enabled: {{ .Values.gfd.enabled }}
|
||||
{{- if .Values.gfd.repository }}
|
||||
repository: {{ .Values.gfd.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.gfd.image }}
|
||||
image: {{ .Values.gfd.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.gfd.version }}
|
||||
version: {{ .Values.gfd.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.gfd.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.gfd.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.gfd.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.gfd.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gfd.resources }}
|
||||
resources: {{ toYaml .Values.gfd.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gfd.env }}
|
||||
env: {{ toYaml .Values.gfd.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gfd.args }}
|
||||
args: {{ toYaml .Values.gfd.args | nindent 6 }}
|
||||
{{- end }}
|
||||
migManager:
|
||||
enabled: {{ .Values.migManager.enabled }}
|
||||
{{- if .Values.migManager.repository }}
|
||||
repository: {{ .Values.migManager.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.migManager.image }}
|
||||
image: {{ .Values.migManager.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.migManager.version }}
|
||||
version: {{ .Values.migManager.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.migManager.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.migManager.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.migManager.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.migManager.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.migManager.resources }}
|
||||
resources: {{ toYaml .Values.migManager.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.migManager.env }}
|
||||
env: {{ toYaml .Values.migManager.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.migManager.args }}
|
||||
args: {{ toYaml .Values.migManager.args | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.migManager.config }}
|
||||
config:
|
||||
name: {{ .Values.migManager.config.name }}
|
||||
default: {{ .Values.migManager.config.default }}
|
||||
{{- end }}
|
||||
{{- if .Values.migManager.gpuClientsConfig }}
|
||||
gpuClientsConfig: {{ toYaml .Values.migManager.gpuClientsConfig | nindent 6 }}
|
||||
{{- end }}
|
||||
nodeStatusExporter:
|
||||
enabled: {{ .Values.nodeStatusExporter.enabled }}
|
||||
{{- if .Values.nodeStatusExporter.repository }}
|
||||
repository: {{ .Values.nodeStatusExporter.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.nodeStatusExporter.image }}
|
||||
image: {{ .Values.nodeStatusExporter.image }}
|
||||
{{- end }}
|
||||
version: {{ .Values.nodeStatusExporter.version | default .Chart.AppVersion | quote }}
|
||||
{{- if .Values.nodeStatusExporter.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.nodeStatusExporter.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.nodeStatusExporter.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.nodeStatusExporter.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.nodeStatusExporter.resources }}
|
||||
resources: {{ toYaml .Values.nodeStatusExporter.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.nodeStatusExporter.env }}
|
||||
env: {{ toYaml .Values.nodeStatusExporter.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.nodeStatusExporter.args }}
|
||||
args: {{ toYaml .Values.nodeStatusExporter.args | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gds.enabled }}
|
||||
gds:
|
||||
enabled: {{ .Values.gds.enabled }}
|
||||
{{- if .Values.gds.repository }}
|
||||
repository: {{ .Values.gds.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.gds.image }}
|
||||
image: {{ .Values.gds.image }}
|
||||
{{- end }}
|
||||
version: {{ .Values.gds.version | quote }}
|
||||
{{- if .Values.gds.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.gds.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.gds.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.gds.imagePullSecrets | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gds.env }}
|
||||
env: {{ toYaml .Values.gds.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gds.args }}
|
||||
args: {{ toYaml .Values.gds.args | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.gdrcopy }}
|
||||
gdrcopy:
|
||||
enabled: {{ .Values.gdrcopy.enabled | default false }}
|
||||
{{- if .Values.gdrcopy.repository }}
|
||||
repository: {{ .Values.gdrcopy.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.gdrcopy.image }}
|
||||
image: {{ .Values.gdrcopy.image }}
|
||||
{{- end }}
|
||||
version: {{ .Values.gdrcopy.version | quote }}
|
||||
{{- if .Values.gdrcopy.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.gdrcopy.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.gdrcopy.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.gdrcopy.imagePullSecrets | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gdrcopy.env }}
|
||||
env: {{ toYaml .Values.gdrcopy.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gdrcopy.args }}
|
||||
args: {{ toYaml .Values.gdrcopy.args | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
sandboxWorkloads:
|
||||
enabled: {{ .Values.sandboxWorkloads.enabled }}
|
||||
{{- if .Values.sandboxWorkloads.defaultWorkload }}
|
||||
defaultWorkload: {{ .Values.sandboxWorkloads.defaultWorkload }}
|
||||
{{- end }}
|
||||
sandboxDevicePlugin:
|
||||
{{- if .Values.sandboxDevicePlugin.enabled }}
|
||||
enabled: {{ .Values.sandboxDevicePlugin.enabled }}
|
||||
{{- end }}
|
||||
{{- if .Values.sandboxDevicePlugin.repository }}
|
||||
repository: {{ .Values.sandboxDevicePlugin.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.sandboxDevicePlugin.image }}
|
||||
image: {{ .Values.sandboxDevicePlugin.image }}
|
||||
{{- end }}
|
||||
{{- if .Values.sandboxDevicePlugin.version }}
|
||||
version: {{ .Values.sandboxDevicePlugin.version | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.sandboxDevicePlugin.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.sandboxDevicePlugin.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.sandboxDevicePlugin.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.sandboxDevicePlugin.imagePullSecrets | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.sandboxDevicePlugin.resources }}
|
||||
resources: {{ toYaml .Values.sandboxDevicePlugin.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.sandboxDevicePlugin.env }}
|
||||
env: {{ toYaml .Values.sandboxDevicePlugin.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.sandboxDevicePlugin.args }}
|
||||
args: {{ toYaml .Values.sandboxDevicePlugin.args | nindent 6 }}
|
||||
{{- end }}
|
146
charts/gpu-operator/templates/clusterrole.yaml
Normal file
146
charts/gpu-operator/templates/clusterrole.yaml
Normal file
|
@ -0,0 +1,146 @@
|
|||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: gpu-operator
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
rules:
|
||||
- apiGroups:
|
||||
- config.openshift.io
|
||||
resources:
|
||||
- clusterversions
|
||||
- proxies
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- image.openshift.io
|
||||
resources:
|
||||
- imagestreams
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- security.openshift.io
|
||||
resources:
|
||||
- securitycontextconstraints
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- use
|
||||
- apiGroups:
|
||||
- rbac.authorization.k8s.io
|
||||
resources:
|
||||
- clusterroles
|
||||
- clusterrolebindings
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- update
|
||||
- patch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- namespaces
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- create
|
||||
- watch
|
||||
- update
|
||||
- patch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- events
|
||||
- pods
|
||||
- pods/eviction
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- daemonsets
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- nvidia.com
|
||||
resources:
|
||||
- clusterpolicies
|
||||
- clusterpolicies/finalizers
|
||||
- clusterpolicies/status
|
||||
- nvidiadrivers
|
||||
- nvidiadrivers/finalizers
|
||||
- nvidiadrivers/status
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- deletecollection
|
||||
- apiGroups:
|
||||
- scheduling.k8s.io
|
||||
resources:
|
||||
- priorityclasses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- apiGroups:
|
||||
- node.k8s.io
|
||||
resources:
|
||||
- runtimeclasses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- create
|
||||
- update
|
||||
- watch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- apiextensions.k8s.io
|
||||
resources:
|
||||
- customresourcedefinitions
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- update
|
||||
- patch
|
||||
- create
|
||||
{{- if .Values.operator.cleanupCRD }}
|
||||
- delete
|
||||
{{- end }}
|
18
charts/gpu-operator/templates/clusterrolebinding.yaml
Normal file
18
charts/gpu-operator/templates/clusterrolebinding.yaml
Normal file
|
@ -0,0 +1,18 @@
|
|||
kind: ClusterRoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: gpu-operator
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: gpu-operator
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
- kind: ServiceAccount
|
||||
name: node-feature-discovery
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: gpu-operator
|
||||
apiGroup: rbac.authorization.k8s.io
|
14
charts/gpu-operator/templates/dcgm_exporter_config.yaml
Normal file
14
charts/gpu-operator/templates/dcgm_exporter_config.yaml
Normal file
|
@ -0,0 +1,14 @@
|
|||
{{- if .Values.dcgmExporter.config }}
|
||||
{{- if and (.Values.dcgmExporter.config.create) (not (empty .Values.dcgmExporter.config.data)) }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ .Values.dcgmExporter.config.name }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
data:
|
||||
dcgm-metrics.csv: |
|
||||
{{- .Values.dcgmExporter.config.data | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
10
charts/gpu-operator/templates/mig_config.yaml
Normal file
10
charts/gpu-operator/templates/mig_config.yaml
Normal file
|
@ -0,0 +1,10 @@
|
|||
{{- if and (.Values.migManager.config.create) (not (empty .Values.migManager.config.data)) }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ .Values.migManager.config.name }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
data: {{ toYaml .Values.migManager.config.data | nindent 2 }}
|
||||
{{- end }}
|
107
charts/gpu-operator/templates/nodefeaturerules.yaml
Normal file
107
charts/gpu-operator/templates/nodefeaturerules.yaml
Normal file
|
@ -0,0 +1,107 @@
|
|||
{{- if .Values.nfd.nodefeaturerules }}
|
||||
apiVersion: nfd.k8s-sigs.io/v1alpha1
|
||||
kind: NodeFeatureRule
|
||||
metadata:
|
||||
name: nvidia-nfd-nodefeaturerules
|
||||
spec:
|
||||
rules:
|
||||
- name: "TDX rule"
|
||||
labels:
|
||||
tdx.enabled: "true"
|
||||
matchFeatures:
|
||||
- feature: cpu.security
|
||||
matchExpressions:
|
||||
tdx.enabled: {op: IsTrue}
|
||||
- name: "TDX total keys rule"
|
||||
extendedResources:
|
||||
tdx.total_keys: "@cpu.security.tdx.total_keys"
|
||||
matchFeatures:
|
||||
- feature: cpu.security
|
||||
matchExpressions:
|
||||
tdx.enabled: {op: IsTrue}
|
||||
- name: "SEV-SNP rule"
|
||||
labels:
|
||||
sev.snp.enabled: "true"
|
||||
matchFeatures:
|
||||
- feature: cpu.security
|
||||
matchExpressions:
|
||||
sev.snp.enabled:
|
||||
op: IsTrue
|
||||
- name: "SEV-ES rule"
|
||||
labels:
|
||||
sev.es.enabled: "true"
|
||||
matchFeatures:
|
||||
- feature: cpu.security
|
||||
matchExpressions:
|
||||
sev.es.enabled:
|
||||
op: IsTrue
|
||||
- name: SEV system capacities
|
||||
extendedResources:
|
||||
sev_asids: '@cpu.security.sev.asids'
|
||||
sev_es: '@cpu.security.sev.encrypted_state_ids'
|
||||
matchFeatures:
|
||||
- feature: cpu.security
|
||||
matchExpressions:
|
||||
sev.enabled:
|
||||
op: Exists
|
||||
- name: "NVIDIA H100"
|
||||
labels:
|
||||
"nvidia.com/gpu.H100": "true"
|
||||
"nvidia.com/gpu.family": "hopper"
|
||||
matchFeatures:
|
||||
- feature: pci.device
|
||||
matchExpressions:
|
||||
vendor: {op: In, value: ["10de"]}
|
||||
device: {op: In, value: ["2339"]}
|
||||
- name: "NVIDIA H100 PCIe"
|
||||
labels:
|
||||
"nvidia.com/gpu.H100.pcie": "true"
|
||||
"nvidia.com/gpu.family": "hopper"
|
||||
matchFeatures:
|
||||
- feature: pci.device
|
||||
matchExpressions:
|
||||
vendor: {op: In, value: ["10de"]}
|
||||
device: {op: In, value: ["2331"]}
|
||||
- name: "NVIDIA H100 80GB HBM3"
|
||||
labels:
|
||||
"nvidia.com/gpu.H100.HBM3": "true"
|
||||
"nvidia.com/gpu.family": "hopper"
|
||||
matchFeatures:
|
||||
- feature: pci.device
|
||||
matchExpressions:
|
||||
vendor: {op: In, value: ["10de"]}
|
||||
device: {op: In, value: ["2330"]}
|
||||
- name: "NVIDIA H800"
|
||||
labels:
|
||||
"nvidia.com/gpu.H800": "true"
|
||||
"nvidia.com/gpu.family": "hopper"
|
||||
matchFeatures:
|
||||
- feature: pci.device
|
||||
matchExpressions:
|
||||
vendor: {op: In, value: ["10de"]}
|
||||
device: {op: In, value: ["2324"]}
|
||||
- name: "NVIDIA H800 PCIE"
|
||||
labels:
|
||||
"nvidia.com/gpu.H800.pcie": "true"
|
||||
"nvidia.com/gpu.family": "hopper"
|
||||
matchFeatures:
|
||||
- feature: pci.device
|
||||
matchExpressions:
|
||||
vendor: {op: In, value: ["10de"]}
|
||||
device: {op: In, value: ["2322"]}
|
||||
- name: "NVIDIA CC Enabled"
|
||||
labels:
|
||||
"nvidia.com/cc.capable": "true"
|
||||
matchAny: # TDX/SEV + Hopper GPU
|
||||
- matchFeatures:
|
||||
- feature: rule.matched
|
||||
matchExpressions:
|
||||
nvidia.com/gpu.family: {op: In, value: ["hopper"]}
|
||||
sev.snp.enabled: {op: IsTrue}
|
||||
- matchFeatures:
|
||||
- feature: rule.matched
|
||||
matchExpressions:
|
||||
nvidia.com/gpu.family: {op: In, value: ["hopper"]}
|
||||
tdx.enabled: {op: IsTrue}
|
||||
{{- end }}
|
||||
|
119
charts/gpu-operator/templates/nvidiadriver.yaml
Normal file
119
charts/gpu-operator/templates/nvidiadriver.yaml
Normal file
|
@ -0,0 +1,119 @@
|
|||
{{- if and .Values.driver.nvidiaDriverCRD.enabled .Values.driver.nvidiaDriverCRD.deployDefaultCR }}
|
||||
apiVersion: nvidia.com/v1alpha1
|
||||
kind: NVIDIADriver
|
||||
metadata:
|
||||
name: default
|
||||
spec:
|
||||
repository: {{ .Values.driver.repository }}
|
||||
image: {{ .Values.driver.image }}
|
||||
version: {{ .Values.driver.version }}
|
||||
useOpenKernelModules: {{ .Values.driver.useOpenKernelModules }}
|
||||
usePrecompiled: {{ .Values.driver.usePrecompiled }}
|
||||
driverType: {{ .Values.driver.nvidiaDriverCRD.driverType | default "gpu" }}
|
||||
{{- if .Values.daemonsets.annotations }}
|
||||
annotations: {{ toYaml .Values.daemonsets.annotations | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.daemonsets.labels }}
|
||||
labels: {{ toYaml .Values.daemonsets.labels | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.nvidiaDriverCRD.nodeSelector }}
|
||||
nodeSelector: {{ toYaml .Values.driver.nvidiaDriverCRD.nodeSelector | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.driver.imagePullSecrets | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.manager }}
|
||||
manager: {{ toYaml .Values.driver.manager | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.startupProbe }}
|
||||
startupProbe: {{ toYaml .Values.driver.startupProbe | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.livenessProbe }}
|
||||
livenessProbe: {{ toYaml .Values.driver.livenessProbe | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.readinessProbe }}
|
||||
readinessProbe: {{ toYaml .Values.driver.readinessProbe | nindent 4 }}
|
||||
{{- end }}
|
||||
rdma:
|
||||
enabled: {{ .Values.driver.rdma.enabled }}
|
||||
useHostMofed: {{ .Values.driver.rdma.useHostMofed }}
|
||||
{{- if .Values.daemonsets.tolerations }}
|
||||
tolerations: {{ toYaml .Values.daemonsets.tolerations | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.repoConfig.configMapName }}
|
||||
repoConfig:
|
||||
name: {{ .Values.driver.repoConfig.configMapName }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.certConfig.name }}
|
||||
certConfig:
|
||||
name: {{ .Values.driver.certConfig.name }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.licensingConfig.configMapName }}
|
||||
licensingConfig:
|
||||
name: {{ .Values.driver.licensingConfig.configMapName }}
|
||||
nlsEnabled: {{ .Values.driver.licensingConfig.nlsEnabled | default true }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.virtualTopology.config }}
|
||||
virtualTopologyConfig:
|
||||
name: {{ .Values.driver.virtualTopology.config }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.kernelModuleConfig.name }}
|
||||
kernelModuleConfig:
|
||||
name: {{ .Values.driver.kernelModuleConfig.name }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.resources }}
|
||||
resources: {{ toYaml .Values.driver.resources | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.env }}
|
||||
env: {{ toYaml .Values.driver.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.driver.args }}
|
||||
args: {{ toYaml .Values.driver.args | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gds.enabled }}
|
||||
gds:
|
||||
enabled: {{ .Values.gds.enabled }}
|
||||
{{- if .Values.gds.repository }}
|
||||
repository: {{ .Values.gds.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.gds.image }}
|
||||
image: {{ .Values.gds.image }}
|
||||
{{- end }}
|
||||
version: {{ .Values.gds.version | quote }}
|
||||
{{- if .Values.gds.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.gds.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.gds.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.gds.imagePullSecrets | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gds.env }}
|
||||
env: {{ toYaml .Values.gds.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gds.args }}
|
||||
args: {{ toYaml .Values.gds.args | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.gdrcopy }}
|
||||
gdrcopy:
|
||||
enabled: {{ .Values.gdrcopy.enabled | default false }}
|
||||
{{- if .Values.gdrcopy.repository }}
|
||||
repository: {{ .Values.gdrcopy.repository }}
|
||||
{{- end }}
|
||||
{{- if .Values.gdrcopy.image }}
|
||||
image: {{ .Values.gdrcopy.image }}
|
||||
{{- end }}
|
||||
version: {{ .Values.gdrcopy.version | quote }}
|
||||
{{- if .Values.gdrcopy.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.gdrcopy.imagePullPolicy }}
|
||||
{{- end }}
|
||||
{{- if .Values.gdrcopy.imagePullSecrets }}
|
||||
imagePullSecrets: {{ toYaml .Values.gdrcopy.imagePullSecrets | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gdrcopy.env }}
|
||||
env: {{ toYaml .Values.gdrcopy.env | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.gdrcopy.args }}
|
||||
args: {{ toYaml .Values.gdrcopy.args | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
99
charts/gpu-operator/templates/operator.yaml
Normal file
99
charts/gpu-operator/templates/operator.yaml
Normal file
|
@ -0,0 +1,99 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: gpu-operator
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
nvidia.com/gpu-driver-upgrade-drain.skip: "true"
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
app: "gpu-operator"
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 8 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
app: "gpu-operator"
|
||||
nvidia.com/gpu-driver-upgrade-drain.skip: "true"
|
||||
annotations:
|
||||
{{- toYaml .Values.operator.annotations | nindent 8 }}
|
||||
spec:
|
||||
serviceAccountName: gpu-operator
|
||||
{{- if .Values.operator.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- range .Values.operator.imagePullSecrets }}
|
||||
- name: {{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.operator.priorityClassName }}
|
||||
priorityClassName: {{ .Values.operator.priorityClassName }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: gpu-operator
|
||||
image: {{ include "gpu-operator.fullimage" . }}
|
||||
imagePullPolicy: {{ .Values.operator.imagePullPolicy }}
|
||||
command: ["gpu-operator"]
|
||||
args:
|
||||
- --leader-elect
|
||||
{{- if .Values.operator.logging.develMode }}
|
||||
- --zap-devel
|
||||
{{- else }}
|
||||
{{- if .Values.operator.logging.timeEncoding }}
|
||||
- --zap-time-encoding={{- .Values.operator.logging.timeEncoding }}
|
||||
{{- end }}
|
||||
{{- if .Values.operator.logging.level }}
|
||||
- --zap-log-level={{- .Values.operator.logging.level }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
env:
|
||||
- name: WATCH_NAMESPACE
|
||||
value: ""
|
||||
- name: OPERATOR_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: "DRIVER_MANAGER_IMAGE"
|
||||
value: "{{ include "driver-manager.fullimage" . }}"
|
||||
volumeMounts:
|
||||
- name: host-os-release
|
||||
mountPath: "/host-etc/os-release"
|
||||
readOnly: true
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8081
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 20
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /readyz
|
||||
port: 8081
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
{{- with .Values.operator.resources }}
|
||||
resources:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: 8080
|
||||
volumes:
|
||||
- name: host-os-release
|
||||
hostPath:
|
||||
path: "/etc/os-release"
|
||||
{{- with .Values.operator.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.operator.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.operator.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
11
charts/gpu-operator/templates/plugin_config.yaml
Normal file
11
charts/gpu-operator/templates/plugin_config.yaml
Normal file
|
@ -0,0 +1,11 @@
|
|||
{{- if and (.Values.devicePlugin.config.create) (not (empty .Values.devicePlugin.config.data)) }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ .Values.devicePlugin.config.name }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
data: {{ toYaml .Values.devicePlugin.config.data | nindent 2 }}
|
||||
{{- end }}
|
||||
|
49
charts/gpu-operator/templates/readonlyfs_scc.openshift.yaml
Normal file
49
charts/gpu-operator/templates/readonlyfs_scc.openshift.yaml
Normal file
|
@ -0,0 +1,49 @@
|
|||
{{- if .Values.platform.openshift }}
|
||||
apiVersion: security.openshift.io/v1
|
||||
kind: SecurityContextConstraints
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
annotations:
|
||||
kubernetes.io/description: restricted denies access to all host features and requires
|
||||
pods to be run with a UID, read-only root filesystem and SELinux context that are
|
||||
allocated to the namespace. This SCC is more restrictive than the default
|
||||
restrictive SCC and it is used by default for authenticated users and operators and operands.
|
||||
name: restricted-readonly
|
||||
allowHostDirVolumePlugin: false
|
||||
allowHostIPC: false
|
||||
allowHostNetwork: false
|
||||
allowHostPID: false
|
||||
allowHostPorts: false
|
||||
allowPrivilegeEscalation: true
|
||||
allowPrivilegedContainer: false
|
||||
allowedCapabilities: []
|
||||
defaultAddCapabilities: []
|
||||
fsGroup:
|
||||
type: MustRunAs
|
||||
groups:
|
||||
- system:authenticated
|
||||
priority: 0
|
||||
readOnlyRootFilesystem: true
|
||||
requiredDropCapabilities:
|
||||
- KILL
|
||||
- MKNOD
|
||||
- SETUID
|
||||
- SETGID
|
||||
runAsUser:
|
||||
type: MustRunAsRange
|
||||
seLinuxContext:
|
||||
type: MustRunAs
|
||||
supplementalGroups:
|
||||
type: RunAsAny
|
||||
users:
|
||||
- system:serviceaccount:{{ $.Release.Namespace }}:gpu-operator
|
||||
volumes:
|
||||
- configMap
|
||||
- downwardAPI
|
||||
- emptyDir
|
||||
- persistentVolumeClaim
|
||||
- projected
|
||||
- secret
|
||||
{{- end }}
|
84
charts/gpu-operator/templates/role.yaml
Normal file
84
charts/gpu-operator/templates/role.yaml
Normal file
|
@ -0,0 +1,84 @@
|
|||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: gpu-operator
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
rules:
|
||||
- apiGroups:
|
||||
- rbac.authorization.k8s.io
|
||||
resources:
|
||||
- roles
|
||||
- rolebindings
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- controllerrevisions
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- daemonsets
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
- endpoints
|
||||
- pods
|
||||
- pods/eviction
|
||||
- secrets
|
||||
- services
|
||||
- services/finalizers
|
||||
- serviceaccounts
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- coordination.k8s.io
|
||||
resources:
|
||||
- leases
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- monitoring.coreos.com
|
||||
resources:
|
||||
- servicemonitors
|
||||
- prometheusrules
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- create
|
||||
- watch
|
||||
- update
|
||||
- delete
|
15
charts/gpu-operator/templates/rolebinding.yaml
Normal file
15
charts/gpu-operator/templates/rolebinding.yaml
Normal file
|
@ -0,0 +1,15 @@
|
|||
kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: gpu-operator
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: gpu-operator
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
roleRef:
|
||||
kind: Role
|
||||
name: gpu-operator
|
||||
apiGroup: rbac.authorization.k8s.io
|
7
charts/gpu-operator/templates/serviceaccount.yaml
Normal file
7
charts/gpu-operator/templates/serviceaccount.yaml
Normal file
|
@ -0,0 +1,7 @@
|
|||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: gpu-operator
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
95
charts/gpu-operator/templates/upgrade_crd.yaml
Normal file
95
charts/gpu-operator/templates/upgrade_crd.yaml
Normal file
|
@ -0,0 +1,95 @@
|
|||
{{- if .Values.operator.upgradeCRD }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: gpu-operator-upgrade-crd-hook-sa
|
||||
annotations:
|
||||
helm.sh/hook: pre-upgrade
|
||||
helm.sh/hook-delete-policy: hook-succeeded,before-hook-creation
|
||||
helm.sh/hook-weight: "0"
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: gpu-operator-upgrade-crd-hook-role
|
||||
annotations:
|
||||
helm.sh/hook: pre-upgrade
|
||||
helm.sh/hook-delete-policy: hook-succeeded,before-hook-creation
|
||||
helm.sh/hook-weight: "0"
|
||||
rules:
|
||||
- apiGroups:
|
||||
- apiextensions.k8s.io
|
||||
resources:
|
||||
- customresourcedefinitions
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- patch
|
||||
- update
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: gpu-operator-upgrade-crd-hook-binding
|
||||
annotations:
|
||||
helm.sh/hook: pre-upgrade
|
||||
helm.sh/hook-delete-policy: hook-succeeded,before-hook-creation
|
||||
helm.sh/hook-weight: "0"
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: gpu-operator-upgrade-crd-hook-sa
|
||||
namespace: {{ .Release.Namespace }}
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: gpu-operator-upgrade-crd-hook-role
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: gpu-operator-upgrade-crd
|
||||
namespace: {{ .Release.Namespace }}
|
||||
annotations:
|
||||
"helm.sh/hook": pre-upgrade
|
||||
"helm.sh/hook-weight": "1"
|
||||
"helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
name: gpu-operator-upgrade-crd
|
||||
labels:
|
||||
{{- include "gpu-operator.labels" . | nindent 8 }}
|
||||
app.kubernetes.io/component: "gpu-operator"
|
||||
spec:
|
||||
serviceAccountName: gpu-operator-upgrade-crd-hook-sa
|
||||
{{- if .Values.operator.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- range .Values.operator.imagePullSecrets }}
|
||||
- name: {{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- with .Values.operator.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: upgrade-crd
|
||||
image: {{ include "gpu-operator.fullimage" . }}
|
||||
imagePullPolicy: {{ .Values.operator.imagePullPolicy }}
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- >
|
||||
kubectl apply -f /opt/gpu-operator/nvidia.com_clusterpolicies.yaml;
|
||||
kubectl apply -f /opt/gpu-operator/nvidia.com_nvidiadrivers.yaml;
|
||||
{{- if .Values.nfd.enabled }}
|
||||
kubectl apply -f /opt/gpu-operator/nfd-api-crds.yaml;
|
||||
{{- end }}
|
||||
restartPolicy: OnFailure
|
||||
{{- end }}
|
15
charts/gpu-operator/values.yaml
Normal file
15
charts/gpu-operator/values.yaml
Normal file
|
@ -0,0 +1,15 @@
|
|||
toolkit:
|
||||
env:
|
||||
- name: CONTAINERD_CONFIG
|
||||
value: "/etc/containerd/config.toml.tmpl"
|
||||
- name: CONTAINERD_SOCKET
|
||||
value: "/run/k3s/containerd/containerd.sock"
|
||||
- name: CONTAINERD_RUNTIME_CLASS
|
||||
value: "nvidia"
|
||||
- name: CONTAINERD_SET_AS_DEFAULT
|
||||
value: "true"
|
||||
|
||||
devicePlugin:
|
||||
config:
|
||||
name: time-slicing-config-all
|
||||
default: any
|
602
charts/gpu-operator/values.yaml.bk
Normal file
602
charts/gpu-operator/values.yaml.bk
Normal file
|
@ -0,0 +1,602 @@
|
|||
# Default values for gpu-operator.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
platform:
|
||||
openshift: false
|
||||
|
||||
nfd:
|
||||
enabled: true
|
||||
nodefeaturerules: false
|
||||
|
||||
psa:
|
||||
enabled: false
|
||||
|
||||
cdi:
|
||||
enabled: false
|
||||
default: false
|
||||
|
||||
sandboxWorkloads:
|
||||
enabled: false
|
||||
defaultWorkload: "container"
|
||||
|
||||
hostPaths:
|
||||
# rootFS represents the path to the root filesystem of the host.
|
||||
# This is used by components that need to interact with the host filesystem
|
||||
# and as such this must be a chroot-able filesystem.
|
||||
# Examples include the MIG Manager and Toolkit Container which may need to
|
||||
# stop, start, or restart systemd services
|
||||
rootFS: "/"
|
||||
|
||||
# driverInstallDir represents the root at which driver files including libraries,
|
||||
# config files, and executables can be found.
|
||||
driverInstallDir: "/run/nvidia/driver"
|
||||
|
||||
daemonsets:
|
||||
labels: {}
|
||||
annotations: {}
|
||||
priorityClassName: system-node-critical
|
||||
tolerations:
|
||||
- key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
# configuration for controlling update strategy("OnDelete" or "RollingUpdate") of GPU Operands
|
||||
# note that driver Daemonset is always set with OnDelete to avoid unintended disruptions
|
||||
updateStrategy: "RollingUpdate"
|
||||
# configuration for controlling rolling update of GPU Operands
|
||||
rollingUpdate:
|
||||
# maximum number of nodes to simultaneously apply pod updates on.
|
||||
# can be specified either as number or percentage of nodes. Default 1.
|
||||
maxUnavailable: "1"
|
||||
|
||||
validator:
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
image: gpu-operator-validator
|
||||
# If version is not specified, then default is to use chart.AppVersion
|
||||
#version: ""
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
env: []
|
||||
args: []
|
||||
resources: {}
|
||||
plugin:
|
||||
env:
|
||||
- name: WITH_WORKLOAD
|
||||
value: "false"
|
||||
|
||||
operator:
|
||||
repository: nvcr.io/nvidia
|
||||
image: gpu-operator
|
||||
# If version is not specified, then default is to use chart.AppVersion
|
||||
#version: ""
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
priorityClassName: system-node-critical
|
||||
defaultRuntime: docker
|
||||
runtimeClass: nvidia
|
||||
use_ocp_driver_toolkit: false
|
||||
# cleanup CRD on chart un-install
|
||||
cleanupCRD: false
|
||||
# upgrade CRD on chart upgrade, requires --disable-openapi-validation flag
|
||||
# to be passed during helm upgrade.
|
||||
upgradeCRD: true
|
||||
initContainer:
|
||||
image: cuda
|
||||
repository: nvcr.io/nvidia
|
||||
version: 12.6.3-base-ubi9
|
||||
imagePullPolicy: IfNotPresent
|
||||
tolerations:
|
||||
- key: "node-role.kubernetes.io/master"
|
||||
operator: "Equal"
|
||||
value: ""
|
||||
effect: "NoSchedule"
|
||||
- key: "node-role.kubernetes.io/control-plane"
|
||||
operator: "Equal"
|
||||
value: ""
|
||||
effect: "NoSchedule"
|
||||
annotations:
|
||||
openshift.io/scc: restricted-readonly
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 1
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: "node-role.kubernetes.io/master"
|
||||
operator: In
|
||||
values: [""]
|
||||
- weight: 1
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: "node-role.kubernetes.io/control-plane"
|
||||
operator: In
|
||||
values: [""]
|
||||
logging:
|
||||
# Zap time encoding (one of 'epoch', 'millis', 'nano', 'iso8601', 'rfc3339' or 'rfc3339nano')
|
||||
timeEncoding: epoch
|
||||
# Zap Level to configure the verbosity of logging. Can be one of 'debug', 'info', 'error', or any integer value > 0 which corresponds to custom debug levels of increasing verbosity
|
||||
level: info
|
||||
# Development Mode defaults(encoder=consoleEncoder,logLevel=Debug,stackTraceLevel=Warn)
|
||||
# Production Mode defaults(encoder=jsonEncoder,logLevel=Info,stackTraceLevel=Error)
|
||||
develMode: false
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 350Mi
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 100Mi
|
||||
|
||||
mig:
|
||||
strategy: single
|
||||
|
||||
driver:
|
||||
enabled: true
|
||||
nvidiaDriverCRD:
|
||||
enabled: false
|
||||
deployDefaultCR: true
|
||||
driverType: gpu
|
||||
nodeSelector: {}
|
||||
useOpenKernelModules: false
|
||||
# use pre-compiled packages for NVIDIA driver installation.
|
||||
# only supported for as a tech-preview feature on ubuntu22.04 kernels.
|
||||
usePrecompiled: false
|
||||
repository: nvcr.io/nvidia
|
||||
image: driver
|
||||
version: "550.127.08"
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
startupProbe:
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 10
|
||||
# nvidia-smi can take longer than 30s in some cases
|
||||
# ensure enough timeout is set
|
||||
timeoutSeconds: 60
|
||||
failureThreshold: 120
|
||||
rdma:
|
||||
enabled: false
|
||||
useHostMofed: false
|
||||
upgradePolicy:
|
||||
# global switch for automatic upgrade feature
|
||||
# if set to false all other options are ignored
|
||||
autoUpgrade: true
|
||||
# how many nodes can be upgraded in parallel
|
||||
# 0 means no limit, all nodes will be upgraded in parallel
|
||||
maxParallelUpgrades: 1
|
||||
# maximum number of nodes with the driver installed, that can be unavailable during
|
||||
# the upgrade. Value can be an absolute number (ex: 5) or
|
||||
# a percentage of total nodes at the start of upgrade (ex:
|
||||
# 10%). Absolute number is calculated from percentage by rounding
|
||||
# up. By default, a fixed value of 25% is used.'
|
||||
maxUnavailable: 25%
|
||||
# options for waiting on pod(job) completions
|
||||
waitForCompletion:
|
||||
timeoutSeconds: 0
|
||||
podSelector: ""
|
||||
# options for gpu pod deletion
|
||||
gpuPodDeletion:
|
||||
force: false
|
||||
timeoutSeconds: 300
|
||||
deleteEmptyDir: false
|
||||
# options for node drain (`kubectl drain`) before the driver reload
|
||||
# this is required only if default GPU pod deletions done by the operator
|
||||
# are not sufficient to re-install the driver
|
||||
drain:
|
||||
enable: false
|
||||
force: false
|
||||
podSelector: ""
|
||||
# It's recommended to set a timeout to avoid infinite drain in case non-fatal error keeps happening on retries
|
||||
timeoutSeconds: 300
|
||||
deleteEmptyDir: false
|
||||
manager:
|
||||
image: k8s-driver-manager
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
# When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4
|
||||
# to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0
|
||||
version: v0.7.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: ENABLE_GPU_POD_EVICTION
|
||||
value: "true"
|
||||
- name: ENABLE_AUTO_DRAIN
|
||||
value: "false"
|
||||
- name: DRAIN_USE_FORCE
|
||||
value: "false"
|
||||
- name: DRAIN_POD_SELECTOR_LABEL
|
||||
value: ""
|
||||
- name: DRAIN_TIMEOUT_SECONDS
|
||||
value: "0s"
|
||||
- name: DRAIN_DELETE_EMPTYDIR_DATA
|
||||
value: "false"
|
||||
env: []
|
||||
resources: {}
|
||||
# Private mirror repository configuration
|
||||
repoConfig:
|
||||
configMapName: ""
|
||||
# custom ssl key/certificate configuration
|
||||
certConfig:
|
||||
name: ""
|
||||
# vGPU licensing configuration
|
||||
licensingConfig:
|
||||
configMapName: ""
|
||||
nlsEnabled: true
|
||||
# vGPU topology daemon configuration
|
||||
virtualTopology:
|
||||
config: ""
|
||||
# kernel module configuration for NVIDIA driver
|
||||
kernelModuleConfig:
|
||||
name: ""
|
||||
|
||||
toolkit:
|
||||
enabled: true
|
||||
repository: nvcr.io/nvidia/k8s
|
||||
image: container-toolkit
|
||||
version: v1.17.3-ubuntu20.04
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
env: []
|
||||
resources: {}
|
||||
installDir: "/usr/local/nvidia"
|
||||
|
||||
devicePlugin:
|
||||
enabled: true
|
||||
repository: nvcr.io/nvidia
|
||||
image: k8s-device-plugin
|
||||
version: v0.17.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
args: []
|
||||
env:
|
||||
- name: PASS_DEVICE_SPECS
|
||||
value: "true"
|
||||
- name: FAIL_ON_INIT_ERROR
|
||||
value: "true"
|
||||
- name: DEVICE_LIST_STRATEGY
|
||||
value: envvar
|
||||
- name: DEVICE_ID_STRATEGY
|
||||
value: uuid
|
||||
- name: NVIDIA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||
value: all
|
||||
resources: {}
|
||||
# Plugin configuration
|
||||
# Use "name" to either point to an existing ConfigMap or to create a new one with a list of configurations(i.e with create=true).
|
||||
# Use "data" to build an integrated ConfigMap from a set of configurations as
|
||||
# part of this helm chart. An example of setting "data" might be:
|
||||
# config:
|
||||
# name: device-plugin-config
|
||||
# create: true
|
||||
# data:
|
||||
# default: |-
|
||||
# version: v1
|
||||
# flags:
|
||||
# migStrategy: none
|
||||
# mig-single: |-
|
||||
# version: v1
|
||||
# flags:
|
||||
# migStrategy: single
|
||||
# mig-mixed: |-
|
||||
# version: v1
|
||||
# flags:
|
||||
# migStrategy: mixed
|
||||
config:
|
||||
# Create a ConfigMap (default: false)
|
||||
create: false
|
||||
# ConfigMap name (either existing or to create a new one with create=true above)
|
||||
name: ""
|
||||
# Default config name within the ConfigMap
|
||||
default: ""
|
||||
# Data section for the ConfigMap to create (i.e only applies when create=true)
|
||||
data: {}
|
||||
# MPS related configuration for the plugin
|
||||
mps:
|
||||
# MPS root path on the host
|
||||
root: "/run/nvidia/mps"
|
||||
|
||||
# standalone dcgm hostengine
|
||||
dcgm:
|
||||
# disabled by default to use embedded nv-hostengine by exporter
|
||||
enabled: false
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
image: dcgm
|
||||
version: 3.3.9-1-ubuntu22.04
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: []
|
||||
env: []
|
||||
resources: {}
|
||||
|
||||
dcgmExporter:
|
||||
enabled: true
|
||||
repository: nvcr.io/nvidia/k8s
|
||||
image: dcgm-exporter
|
||||
version: 3.3.9-3.6.1-ubuntu22.04
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: DCGM_EXPORTER_LISTEN
|
||||
value: ":9400"
|
||||
- name: DCGM_EXPORTER_KUBERNETES
|
||||
value: "true"
|
||||
- name: DCGM_EXPORTER_COLLECTORS
|
||||
value: "/etc/dcgm-exporter/dcp-metrics-included.csv"
|
||||
resources: {}
|
||||
serviceMonitor:
|
||||
enabled: false
|
||||
interval: 15s
|
||||
honorLabels: false
|
||||
additionalLabels: {}
|
||||
relabelings: []
|
||||
# - source_labels:
|
||||
# - __meta_kubernetes_pod_node_name
|
||||
# regex: (.*)
|
||||
# target_label: instance
|
||||
# replacement: $1
|
||||
# action: replace
|
||||
# DCGM Exporter configuration
|
||||
# This block is used to configure DCGM Exporter to emit a customized list of metrics.
|
||||
# Use "name" to either point to an existing ConfigMap or to create a new one with a
|
||||
# list of configurations (i.e with create=true).
|
||||
# When pointing to an existing ConfigMap, the ConfigMap must exist in the same namespace as the release.
|
||||
# The metrics are expected to be listed under a key called `dcgm-metrics.csv`.
|
||||
# Use "data" to build an integrated ConfigMap from a set of custom metrics as
|
||||
# part of the chart. An example of some custom metrics are shown below. Note that
|
||||
# the contents of "data" must be in CSV format and be valid DCGM Exporter metric configurations.
|
||||
# config:
|
||||
# name: custom-dcgm-exporter-metrics
|
||||
# create: true
|
||||
# data: |-
|
||||
# Format
|
||||
# If line starts with a '#' it is considered a comment
|
||||
# DCGM FIELD, Prometheus metric type, help message
|
||||
|
||||
# Clocks
|
||||
# DCGM_FI_DEV_SM_CLOCK, gauge, SM clock frequency (in MHz).
|
||||
# DCGM_FI_DEV_MEM_CLOCK, gauge, Memory clock frequency (in MHz).
|
||||
gfd:
|
||||
enabled: true
|
||||
repository: nvcr.io/nvidia
|
||||
image: k8s-device-plugin
|
||||
version: v0.17.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
env:
|
||||
- name: GFD_SLEEP_INTERVAL
|
||||
value: 60s
|
||||
- name: GFD_FAIL_ON_INIT_ERROR
|
||||
value: "true"
|
||||
resources: {}
|
||||
|
||||
migManager:
|
||||
enabled: true
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
image: k8s-mig-manager
|
||||
version: v0.10.0-ubuntu20.04
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
env:
|
||||
- name: WITH_REBOOT
|
||||
value: "false"
|
||||
resources: {}
|
||||
# MIG configuration
|
||||
# Use "name" to either point to an existing ConfigMap or to create a new one with a list of configurations(i.e with create=true).
|
||||
# Use "data" to build an integrated ConfigMap from a set of configurations as
|
||||
# part of this helm chart. An example of setting "data" might be:
|
||||
# config:
|
||||
# name: custom-mig-parted-configs
|
||||
# create: true
|
||||
# data: |-
|
||||
# config.yaml: |-
|
||||
# version: v1
|
||||
# mig-configs:
|
||||
# all-disabled:
|
||||
# - devices: all
|
||||
# mig-enabled: false
|
||||
# custom-mig:
|
||||
# - devices: [0]
|
||||
# mig-enabled: false
|
||||
# - devices: [1]
|
||||
# mig-enabled: true
|
||||
# mig-devices:
|
||||
# "1g.10gb": 7
|
||||
# - devices: [2]
|
||||
# mig-enabled: true
|
||||
# mig-devices:
|
||||
# "2g.20gb": 2
|
||||
# "3g.40gb": 1
|
||||
# - devices: [3]
|
||||
# mig-enabled: true
|
||||
# mig-devices:
|
||||
# "3g.40gb": 1
|
||||
# "4g.40gb": 1
|
||||
config:
|
||||
default: "all-disabled"
|
||||
# Create a ConfigMap (default: false)
|
||||
create: false
|
||||
# ConfigMap name (either existing or to create a new one with create=true above)
|
||||
name: ""
|
||||
# Data section for the ConfigMap to create (i.e only applies when create=true)
|
||||
data: {}
|
||||
gpuClientsConfig:
|
||||
name: ""
|
||||
|
||||
nodeStatusExporter:
|
||||
enabled: false
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
image: gpu-operator-validator
|
||||
# If version is not specified, then default is to use chart.AppVersion
|
||||
#version: ""
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
resources: {}
|
||||
|
||||
gds:
|
||||
enabled: false
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
image: nvidia-fs
|
||||
version: "2.20.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
env: []
|
||||
args: []
|
||||
|
||||
gdrcopy:
|
||||
enabled: false
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
image: gdrdrv
|
||||
version: "v2.4.1-2"
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
env: []
|
||||
args: []
|
||||
|
||||
vgpuManager:
|
||||
enabled: false
|
||||
repository: ""
|
||||
image: vgpu-manager
|
||||
version: ""
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
env: []
|
||||
resources: {}
|
||||
driverManager:
|
||||
image: k8s-driver-manager
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
# When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4
|
||||
# to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0
|
||||
version: v0.7.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: ENABLE_GPU_POD_EVICTION
|
||||
value: "false"
|
||||
- name: ENABLE_AUTO_DRAIN
|
||||
value: "false"
|
||||
|
||||
vgpuDeviceManager:
|
||||
enabled: true
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
image: vgpu-device-manager
|
||||
version: v0.2.8
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
env: []
|
||||
config:
|
||||
name: ""
|
||||
default: "default"
|
||||
|
||||
vfioManager:
|
||||
enabled: true
|
||||
repository: nvcr.io/nvidia
|
||||
image: cuda
|
||||
version: 12.6.3-base-ubi9
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
env: []
|
||||
resources: {}
|
||||
driverManager:
|
||||
image: k8s-driver-manager
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
# When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4
|
||||
# to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0
|
||||
version: v0.7.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: ENABLE_GPU_POD_EVICTION
|
||||
value: "false"
|
||||
- name: ENABLE_AUTO_DRAIN
|
||||
value: "false"
|
||||
|
||||
kataManager:
|
||||
enabled: false
|
||||
config:
|
||||
artifactsDir: "/opt/nvidia-gpu-operator/artifacts/runtimeclasses"
|
||||
runtimeClasses:
|
||||
- name: kata-nvidia-gpu
|
||||
nodeSelector: {}
|
||||
artifacts:
|
||||
url: nvcr.io/nvidia/cloud-native/kata-gpu-artifacts:ubuntu22.04-535.54.03
|
||||
pullSecret: ""
|
||||
- name: kata-nvidia-gpu-snp
|
||||
nodeSelector:
|
||||
"nvidia.com/cc.capable": "true"
|
||||
artifacts:
|
||||
url: nvcr.io/nvidia/cloud-native/kata-gpu-artifacts:ubuntu22.04-535.86.10-snp
|
||||
pullSecret: ""
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
image: k8s-kata-manager
|
||||
version: v0.2.2
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
env: []
|
||||
resources: {}
|
||||
|
||||
sandboxDevicePlugin:
|
||||
enabled: true
|
||||
repository: nvcr.io/nvidia
|
||||
image: kubevirt-gpu-device-plugin
|
||||
version: v1.2.10
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
args: []
|
||||
env: []
|
||||
resources: {}
|
||||
|
||||
ccManager:
|
||||
enabled: false
|
||||
defaultMode: "off"
|
||||
repository: nvcr.io/nvidia/cloud-native
|
||||
image: k8s-cc-manager
|
||||
version: v0.1.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
env:
|
||||
- name: CC_CAPABLE_DEVICE_IDS
|
||||
value: "0x2339,0x2331,0x2330,0x2324,0x2322,0x233d"
|
||||
resources: {}
|
||||
|
||||
node-feature-discovery:
|
||||
enableNodeFeatureApi: true
|
||||
priorityClassName: system-node-critical
|
||||
gc:
|
||||
enable: true
|
||||
replicaCount: 1
|
||||
serviceAccount:
|
||||
name: node-feature-discovery
|
||||
create: false
|
||||
worker:
|
||||
serviceAccount:
|
||||
name: node-feature-discovery
|
||||
# disable creation to avoid duplicate serviceaccount creation by master spec below
|
||||
create: false
|
||||
tolerations:
|
||||
- key: "node-role.kubernetes.io/master"
|
||||
operator: "Equal"
|
||||
value: ""
|
||||
effect: "NoSchedule"
|
||||
- key: "node-role.kubernetes.io/control-plane"
|
||||
operator: "Equal"
|
||||
value: ""
|
||||
effect: "NoSchedule"
|
||||
- key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
config:
|
||||
sources:
|
||||
pci:
|
||||
deviceClassWhitelist:
|
||||
- "02"
|
||||
- "0200"
|
||||
- "0207"
|
||||
- "0300"
|
||||
- "0302"
|
||||
deviceLabelFields:
|
||||
- vendor
|
||||
master:
|
||||
serviceAccount:
|
||||
name: node-feature-discovery
|
||||
create: true
|
||||
config:
|
||||
extraLabelNs: ["nvidia.com"]
|
||||
# noPublish: false
|
||||
# resourceLabels: ["nvidia.com/feature-1","nvidia.com/feature-2"]
|
||||
# enableTaints: false
|
||||
# labelWhiteList: "nvidia.com/gpu"
|
15
resources/gpu-slice/configmap.yaml
Normal file
15
resources/gpu-slice/configmap.yaml
Normal file
|
@ -0,0 +1,15 @@
|
|||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: time-slicing-config-all
|
||||
namespace: gpu-operator
|
||||
data:
|
||||
any: |-
|
||||
version: v1
|
||||
flags:
|
||||
migStrategy: none
|
||||
sharing:
|
||||
timeSlicing:
|
||||
resources:
|
||||
- name: nvidia.com/gpu
|
||||
replicas: 4
|
Loading…
Reference in a new issue