Commit fe313655 by frank

Add Rancher-Monitoring Chart

(+) Only use for Rancher 2.0 Monitoring and Alerting (+) Support Grafana to proxy with authorization bearer token to Prometheus-Auth agent (+) Support Prometheus web to proxy with authorization bearer token to Prometheus-Auth agent (+) Rich metrics for Kubernetes and Rancher Co-authored-by: 's avataraiwantaozi <michelia.feng@gmail.com> Co-authored-by: 's avatarorangedeng <jxfa0043379@hotmail.com>
parent 0d9b4023
system-library system-charts
============ ============
Rancher 2.0 system library charts. Rancher 2.0 system library charts.
......
# Ignore everything in this directory
*
# Except this file
!.gitignore
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*~
# Various IDEs
.project
.idea/
*.tmproj
apiVersion: v1
description: Provides monitoring for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: rancher-monitoring
sources:
- https://github.com/coreos/prometheus-operator
version: 0.0.1
appVersion: "0.23.2"
home: https://github.com/coreos/prometheus-operator
keywords:
- operator
- prometheus
icon: https://coreos.com/sites/default/files/inline-images/Overview-prometheus_0.png
# rancher-monitoring
Installs [prometheus-operator](https://github.com/coreos/prometheus-operator) to create/configure/manage Prometheus clusters atop Kubernetes.
> **Tip**: Only use for Rancher Monitoring!!!
## Introduction
This chart bootstraps a [prometheus-operator](https://github.com/coreos/prometheus-operator) deployment on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
### Security
Alertmanager, Node exporter, Kube-state exporter, Grafana and Prometheus in same [Namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) will use the same [ServiceAccount](https://kubernetes.io/docs/reference/access-authn-authz/service-accounts-admin/) as Prometheus, which named like `prometheus-{{ .Release.Name }}`. Operator uses another one.
## Prerequisites
- Rancher 2.1+
apiVersion: v1
description: Creates Alertmanager CRD instance for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: alertmanager
version: 0.0.1
apiVersion: {{ template "operator_api_version" . }}
kind: Alertmanager
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
{{- if .Values.labels }}
{{ toYaml .Values.labels | indent 4 }}
{{- end }}
name: {{ .Release.Name }}
spec:
podMetadata:
labels:
{{- if .Values.labels }}
{{ toYaml .Values.labels | indent 6 }}
{{- else }}
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
{{- end }}
baseImage: "{{ .Values.image.repository }}"
{{- if .Values.externalUrl }}
externalUrl: "{{ .Values.externalUrl }}"
{{- end }}
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 4 }}
{{- end }}
paused: {{ .Values.paused }}
replicas: {{ .Values.replicaCount }}
logLevel: {{ .Values.logLevel }}
resources:
{{ toYaml .Values.resources | indent 4 }}
retention: "{{ .Values.retention }}"
{{- if .Values.routePrefix }}
routePrefix: "{{ .Values.routePrefix }}"
{{- end }}
{{- if .Values.secrets }}
secrets:
{{ toYaml .Values.secrets | indent 4 }}
{{- end }}
{{- if .Values.enabledRBAC }}
serviceAccountName: {{ .Values.serviceAccountName }}
{{- end }}
{{- if or .Values.storageSpec .Values.persistence.enabled }}
storage:
volumeClaimTemplate:
spec:
{{- if .Values.storageSpec }}
{{ toYaml .Values.storageSpec | indent 8 }}
{{- else }}
{{ if and .Values.persistence.storageClass (ne "default" .Values.persistence.storageClass) }}
storageClassName: {{ .Values.persistence.storageClass }}
{{ end }}
accessModes:
- {{ default "ReadWriteOnce" .Values.persistence.accessMode }}
resources:
requests:
storage: {{ .Values.persistence.size | quote }}
{{- end }}
{{- end }}
version: "{{ .Values.image.tag }}"
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
topologyKey: kubernetes.io/hostname
labelSelector:
matchLabels:
app: {{ template "app.name" . }}
alertmanager: {{ .Release.Name }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 4 }}
{{- end }}
imagePullSecrets:
{{ toYaml .Values.image.pullSecrets | indent 4 }}
{{- if .Values.sidecarsSpec }}
containers:
{{ toYaml .Values.sidecarsSpec | indent 4 }}
{{- end }}
apiVersion: v1
kind: Service
metadata:
name: expose-alertmanager-metrics
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
spec:
type: ClusterIP
selector:
{{- if .Values.labels }}
{{ toYaml .Values.labels | indent 4 }}
{{- else }}
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
{{- end }}
ports:
- name: http
port: 9093
targetPort: web
\ No newline at end of file
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ template "app.nginx.fullname" . }}
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
component: nginx
data:
nginx.conf: |-
user nginx;
worker_processes auto;
error_log /dev/null warn;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
log_format main '[$time_local - $status] $remote_addr - $remote_user $request ($http_referer)';
server {
listen 80;
access_log off;
gzip on;
gzip_min_length 1k;
gzip_comp_level 2;
gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript image/jpeg image/gif image/png;
gzip_vary on;
gzip_disable "MSIE [1-6]\.";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
location / {
proxy_pass http://alertmanager-operated:9093/;
}
}
}
\ No newline at end of file
apiVersion: {{ template "deployment_api_version" . }}
kind: Deployment
metadata:
name: {{ template "app.nginx.fullname" . }}
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
component: nginx
spec:
replicas: 1
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
component: nginx
template:
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
component: nginx
spec:
containers:
- name: nginx
image: nginx:1.15.2
args:
- nginx
- -g
- daemon off;
- -c
- /nginx/nginx.conf
volumeMounts:
- mountPath: /nginx/
name: alertmanager-nginx
ports:
- name: http
containerPort: 80
protocol: TCP
volumes:
- name: alertmanager-nginx
configMap:
defaultMode: 438
items:
- key: nginx.conf
mode: 438
path: nginx.conf
name: {{ template "app.nginx.fullname" . }}
\ No newline at end of file
{{- if not .Values.configFromSecret }}
apiVersion: v1
kind: Secret
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
data:
alertmanager.yaml: {{ toYaml .Values.config | b64enc | quote }}
{{- range $key, $val := .Values.templates }}
{{ $key }}: {{ $val | b64enc | quote }}
{{- end }}
{{- end }}
apiVersion: v1
kind: Service
metadata:
name: access-alertmanager
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
kubernetes.io/cluster-service: "true"
spec:
type: ClusterIP
selector:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
component: nginx
ports:
- name: http
port: 80
targetPort: http
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: altermanager
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
endpoints:
- port: http
interval: 30s
enabledRBAC: true
## Already exist ServiceAccount
##
serviceAccountName: ""
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## The name of a secret in the same kubernetes namespace which contains the Alertmanager config
## If defined this will be used instead of the `config` block values.
## The name of the secret must be alertmanager-{{ .Release.Name }} and its data must contain, at least, a key called `alertmanager.yaml`
## that contains the configuration as value.
##
configFromSecret: ""
## Alertmanager configuration directives
## Ref: https://prometheus.io/docs/alerting/configuration/
##
config: {}
#
# An example config:
# global:
# resolve_timeout: 5m
# route:
# group_by: ['job']
# group_wait: 30s
# group_interval: 5m
# repeat_interval: 12h
# receiver: 'null'
# routes:
# - match:
# alertname: DeadMansSwitch
# receiver: 'null'
# receivers:
# - name: 'null'
## Alertmanager template files to include
#
templates: {}
#
# An example template:
# template_1.tmpl: |-
# {{ define "cluster" }}{{ .ExternalURL | reReplaceAll ".*alertmanager\\.(.*)" "$1" }}{{ end }}
#
# {{ define "slack.myorg.text" }}
# {{- $root := . -}}
# {{ range .Alerts }}
# *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
# *Cluster:* {{ template "cluster" $root }}
# *Description:* {{ .Annotations.description }}
# *Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:>
# *Runbook:* <{{ .Annotations.runbook }}|:spiral_note_pad:>
# *Details:*
# {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
# {{ end }}
## External URL at which Alertmanager will be reachable
##
externalUrl: ""
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
## Alertmanager container image
##
image:
repository: quay.io/prometheus/alertmanager
tag: v0.15.2
## Labels to be added to the Alertmanager
##
# labels: {}
## Node labels for Alertmanager pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}
## Tolerations for use with node taints
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: {}
# - key: "key"
# operator: "Equal"
# value: "value"
# effect: "NoSchedule"
## If true, the Operator won't process any Alertmanager configuration changes
##
paused: false
## Number of Alertmanager replicas desired
##
replicaCount: 1
## Resource limits & requests
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
##
resources: {}
# requests:
# memory: 400Mi
## How long to retain metrics
##
retention: 24h
## Prefix used to register routes, overriding externalUrl route.
## Useful for proxies that rewrite URLs.
##
routePrefix: ""
## List of Secrets in the same namespace as the Alertmanager
## object, which shall be mounted into the Alertmanager Pods.
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec
##
secrets: []
service:
## Maintains session affinity. Should be set to ClientIP for HA setup
## Only options are ClientIP and None. Do not leave blank.
sessionAffinity: None
## Annotations to be added to the Service
##
annotations: {}
## Cluster-internal IP address for Alertmanager Service
##
clusterIP: ""
## List of external IP addresses at which the Alertmanager Service will be available
##
externalIPs: []
## Labels to be added to the Service
##
labels: {}
## External IP address to assign to Alertmanager Service
## Only used if service.type is 'LoadBalancer' and supported by cloud provider
##
loadBalancerIP: ""
## List of client IPs allowed to access Alertmanager Service
## Only used if service.type is 'LoadBalancer' and supported by cloud provider
##
loadBalancerSourceRanges: []
## Port to expose on each node
## Only used if service.type is 'NodePort'
##
# nodePort: 30903
## Service type
##
type: ClusterIP
logLevel: "info"
## Alertmanager StorageSpec for persistent data
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
##
storageSpec: {}
# storageClassName: gluster
# accessModes: ["ReadWriteOnce"]
# resources:
# requests:
# storage: 50Gi
# selector: {}
## Easy way to create persistent data
##
persistence: {}
# enabled: true
# storageClass: gluster
# accessMode: "ReadWriteOnce"
# size: 50Gi
sidecarsSpec: []
# - name: sidecar
# image: registry/name:tag
apiVersion: v1
description: Creates ServiceMonitor CRD of coredns for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-coredns
version: 0.0.1
{{- if .Values.endpoints }}
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
k8s-app: coredns
name: {{ template "app.dnsname" . }}
spec:
type: ClusterIP
clusterIP: None
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
targetPort: {{ .Values.ports.metrics.port }}
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.dnsname" . }}
subsets:
- addresses:
{{- range .Values.endpoints }}
- ip: {{ . }}
{{- end }}
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
{{- end }}
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: coredns
selector:
matchLabels:
k8s-app: coredns
namespaceSelector:
any: true
matchNames:
- "kube-system"
- {{ .Release.Namespace | quote }}
endpoints:
- port: metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Custom endpoints
##
endpoints: []
ports:
metrics:
port: 9153
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates Fluentd Metrics Exporter instance for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: aiwantaozi
email: michelia.feng@gmail.com
name: exporter-fluentd
version: 0.0.1
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: fluentd
selector:
matchLabels:
k8s-app: fluentd
namespaceSelector:
matchNames:
- cattle-logging
endpoints:
- port: metrics
interval: 15s
honorLabels: true
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Skip verification until we have resolved why the certificate validation
## for the kubelet on API server nodes fail.
##
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates ServiceMonitor CRD of controller manager for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kube-controller-manager
version: 0.0.1
{{- if .Values.endpoints }}
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
k8s-app: kube-controller-manager
name: {{ template "app.dnsname" . }}
spec:
type: ClusterIP
clusterIP: None
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
targetPort: {{ .Values.ports.metrics.port }}
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.dnsname" . }}
subsets:
- addresses:
{{- range .Values.endpoints }}
- ip: {{ . }}
{{- end }}
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
{{- end }}
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: kube-controller-manager
selector:
matchLabels:
k8s-app: kube-controller-manager
namespaceSelector:
any: true
matchNames:
- "kube-system"
- {{ .Release.Namespace | quote }}
endpoints:
- port: metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
{{- if .Values.insecureSkipVerify }}
insecureSkipVerify: true
{{- end }}
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Skip verification until we have resolved why the certificate validation
## for the kubelet on API server nodes fail.
##
insecureSkipVerify: true
## Custom endpoints
##
endpoints: []
ports:
metrics:
port: 10252
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates ServiceMonitor CRD of kube-dns for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kube-dns
version: 0.0.1
{{- if .Values.endpoints }}
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
k8s-app: coredns
name: {{ template "app.dnsname" . }}
spec:
type: ClusterIP
clusterIP: None
ports:
- name: dnsmasq-metrics
port: {{ .Values.ports.metrics.dnsmasq.port }}
protocol: TCP
targetPort: {{ .Values.ports.metrics.dnsmasq.port }}
- name: skydns-metrics
port: {{ .Values.ports.metrics.skydns.port }}
protocol: TCP
targetPort: {{ .Values.ports.metrics.skydns.port }}
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.dnsname" . }}
subsets:
- addresses:
{{- range .Values.endpoints }}
- ip: {{ . }}
{{- end }}
ports:
- name: dnsmasq-metrics
port: {{ .Values.ports.metrics.dnsmasq.port }}
protocol: TCP
- name: skydns-metrics
port: {{ .Values.ports.metrics.skydns.port }}
protocol: TCP
{{- end }}
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: kube-dns
selector:
matchLabels:
k8s-app: coredns
namespaceSelector:
any: true
matchNames:
- "kube-system"
- {{ .Release.Namespace | quote }}
endpoints:
- port: dnsmasq-metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- port: skydns-metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Custom endpoints
##
endpoints: []
ports:
metrics:
dnsmasq:
port: 10054
skydns:
port: 10055
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates ServiceMonitor CRD of etcd for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kube-etcd
version: 0.0.1
{{- if .Values.endpoints }}
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
k8s-app: etcd-server
name: {{ template "app.dnsname" . }}
spec:
type: ClusterIP
clusterIP: None
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
targetPort: {{ .Values.ports.metrics.port }}
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.dnsname" . }}
subsets:
- addresses:
{{- range .Values.endpoints }}
- ip: {{ . }}
{{- end }}
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
{{- end }}
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: kube-etcd
selector:
matchLabels:
k8s-app: etcd-server
namespaceSelector:
any: true
matchNames:
- "kube-system"
- {{ .Release.Namespace | quote }}
endpoints:
- port: metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
{{- if eq .Values.ports.metrics.scheme "https" }}
scheme: https
tlsConfig:
caFile: {{ .Values.caFile }}
{{- if .Values.certFile }}
certFile: {{ .Values.certFile }}
{{- end }}
{{- if .Values.keyFile }}
keyFile: {{ .Values.keyFile }}
{{- end}}
{{- if .Values.insecureSkipVerify }}
insecureSkipVerify: true
{{- end }}
{{- end }}
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Skip verification until we have resolved why the certificate validation
## for the kubelet on API server nodes fail.
##
insecureSkipVerify: true
## TLS Cofiguration for the service monitor, default to none, but append cert and keyfile if passed
##
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
certFile: ""
keyFile: ""
## Custom endpoints
##
endpoints: []
ports:
metrics:
scheme: "https"
port: 4001
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates ServiceMonitor CRD of scheduler for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kube-scheduler
version: 0.0.1
{{- if .Values.endpoints }}
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
k8s-app: kube-scheduler
name: {{ template "app.dnsname" . }}
spec:
type: ClusterIP
clusterIP: None
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
targetPort: {{ .Values.ports.metrics.port }}
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.dnsname" . }}
subsets:
- addresses:
{{- range .Values.endpoints }}
- ip: {{ . }}
{{- end }}
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
{{- end }}
\ No newline at end of file
apiVersion: {{ template "operator_api_version" . }}
kind: PrometheusRule
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.prometheusRule.labels }}
{{ toYaml .Values.prometheusRule.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
groups:
- name: kube-scheduler.rules
rules:
- record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
expr: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.99"
- record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
expr: histogram_quantile(0.9, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.9"
- record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
expr: histogram_quantile(0.5, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.5"
- record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
expr: histogram_quantile(0.99, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.99"
- record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
expr: histogram_quantile(0.9, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.9"
- record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
expr: histogram_quantile(0.5, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.5"
- record: cluster:scheduler_binding_latency_seconds:quantile
expr: histogram_quantile(0.99, sum(scheduler_binding_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.99"
- record: cluster:scheduler_binding_latency_seconds:quantile
expr: histogram_quantile(0.9, sum(scheduler_binding_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.9"
- record: cluster:scheduler_binding_latency_seconds:quantile
expr: histogram_quantile(0.5, sum(scheduler_binding_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.5"
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: kube-scheduler
selector:
matchLabels:
k8s-app: kube-scheduler
namespaceSelector:
any: true
matchNames:
- "kube-system"
- {{ .Release.Namespace | quote }}
endpoints:
- port: metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Custom endpoints
##
endpoints: []
ports:
metrics:
port: 10251
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
prometheusRule:
## Custom Labels to be added to PrometheusRule
##
labels: {}
apiVersion: v1
description: Creates Kube-state Exporter instance for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kube-state
version: 0.0.1
apiVersion: {{ template "deployment_api_version" . }}
kind: Deployment
metadata:
name: {{ template "app.fullname" . }}
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
spec:
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 8 }}
{{- end }}
containers:
- name: kube-state
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
ports:
- name: http
containerPort: 8080
protocol: TCP
livenessProbe:
httpGet:
path: /
port: 8080
initialDelaySeconds: 30
timeoutSeconds: 30
readinessProbe:
httpGet:
path: /
port: 8080
initialDelaySeconds: 30
timeoutSeconds: 5
resources:
limits:
cpu: 100m
memory: 200Mi
requests:
cpu: 100m
memory: 130Mi
{{- if .Values.enabledRBAC }}
serviceAccountName: {{ .Values.serviceAccountName }}
{{- end }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 8 }}
{{- end }}
apiVersion: v1
kind: Service
metadata:
name: expose-kubernetes-metrics
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
spec:
type: ClusterIP
selector:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
targetPort: 8080
protocol: TCP
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: kube-state
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
endpoints:
- port: metrics
interval: 15s
honorLabels: true
enabledRBAC: true
## Already exist ServiceAccount
##
serviceAccountName: ""
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Expertor listens on where and exports on host
##
ports:
metrics:
port: 8080
# Default values for kube-state-metrics.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
image:
repository: quay.io/coreos/kube-state-metrics
tag: v1.4.0
## Node Selector to constrain pods to run on particular nodes
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
##
nodeSelector: {}
## Tolerations for use with node taints
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: {}
# - key: "key"
# operator: "Equal"
# value: "value"
# effect: "NoSchedule"
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates ServiceMonitor CRD of kublets for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kubelets
version: 0.0.1
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "charts.exporter-kubelets.fullname" . }}
spec:
jobLabel: kubelet
selector:
matchLabels:
k8s-app: kubelet
namespaceSelector:
any: true
matchNames:
- "kube-system"
- {{ .Release.Namespace | quote }}
endpoints:
- port: https-metrics
scheme: https
interval: 15s
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
{{- if .Values.insecureSkipVerify }}
insecureSkipVerify: true
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- port: https-metrics
scheme: https
path: /metrics/cadvisor
interval: 30s
honorLabels: true
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
{{- if .Values.insecureSkipVerify }}
insecureSkipVerify: true
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- port: http-metrics
interval: 15s
- port: cadvisor
interval: 30s
honorLabels: true
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Skip verification until we have resolved why the certificate validation
## for the kubelet on API server nodes fail.
##
insecureSkipVerify: true
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates ServiceMonitor CRD of apiserver for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kubernetes
version: 0.0.1
apiVersion: {{ template "operator_api_version" . }}
kind: PrometheusRule
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.prometheusRule.labels }}
{{ toYaml .Values.prometheusRule.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
groups:
- name: kubernetes.rules
rules:
- record: pod_name:container_memory_usage_bytes:sum
expr: sum(container_memory_usage_bytes{container_name!="POD",pod_name!=""}) BY
(pod_name)
- record: pod_name:container_spec_cpu_shares:sum
expr: sum(container_spec_cpu_shares{container_name!="POD",pod_name!=""}) BY (pod_name)
- record: pod_name:container_cpu_usage:sum
expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name!=""}[5m]))
BY (pod_name)
- record: pod_name:container_fs_usage_bytes:sum
expr: sum(container_fs_usage_bytes{container_name!="POD",pod_name!=""}) BY (pod_name)
- record: namespace:container_memory_usage_bytes:sum
expr: sum(container_memory_usage_bytes{container_name!=""}) BY (namespace)
- record: namespace:container_spec_cpu_shares:sum
expr: sum(container_spec_cpu_shares{container_name!=""}) BY (namespace)
- record: namespace:container_cpu_usage:sum
expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD"}[5m]))
BY (namespace)
- record: cluster:memory_usage:ratio
expr: sum(container_memory_usage_bytes{container_name!="POD",pod_name!=""}) BY
(cluster) / sum(machine_memory_bytes) BY (cluster)
- record: cluster:container_spec_cpu_shares:ratio
expr: sum(container_spec_cpu_shares{container_name!="POD",pod_name!=""}) / 1000
/ sum(machine_cpu_cores)
- record: cluster:container_cpu_usage:ratio
expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name!=""}[5m]))
/ sum(machine_cpu_cores)
- record: apiserver_latency_seconds:quantile
expr: histogram_quantile(0.99, rate(apiserver_request_latencies_bucket[5m])) /
1e+06
labels:
quantile: "0.99"
- record: apiserver_latency:quantile_seconds
expr: histogram_quantile(0.9, rate(apiserver_request_latencies_bucket[5m])) /
1e+06
labels:
quantile: "0.9"
- record: apiserver_latency_seconds:quantile
expr: histogram_quantile(0.5, rate(apiserver_request_latencies_bucket[5m])) /
1e+06
labels:
quantile: "0.5"
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: kubernetes
selector:
matchLabels:
component: apiserver
provider: kubernetes
namespaceSelector:
matchNames:
- "default"
endpoints:
- port: https
interval: 15s
scheme: https
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
{{- if .Values.insecureSkipVerify }}
insecureSkipVerify: true
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Skip verification until we have resolved why the certificate validation
## for the kubelet on API server nodes fail.
##
insecureSkipVerify: true
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
prometheusRule:
## Custom Labels to be added to PrometheusRule
##
labels: {}
\ No newline at end of file
apiVersion: v1
description: Creates Node Exporter instance for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-node
version: 0.0.1
\ No newline at end of file
apiVersion: {{ template "daemonset_api_version" . }}
kind: DaemonSet
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
spec:
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
spec:
tolerations:
- operator: "Exists"
- key: "node-role.kubernetes.io/master"
operator: "Exists"
- key: "node-role.kubernetes.io/etcd"
operator: "Exists"
- key: "node-role.kubernetes.io/controlplane"
operator: "Exists"
containers:
- name: exporter-node
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
args:
- --web.listen-address=0.0.0.0:{{ .Values.ports.metrics.port }}
{{- if and .Values.container .Values.container.args }}
{{ toYaml .Values.container.args | indent 10 }}
{{- end }}
ports:
- name: http
containerPort: {{ .Values.ports.metrics.port }}
hostPort: {{ .Values.ports.metrics.port }}
resources:
{{ toYaml .Values.resources | indent 12 }}
{{- if and .Values.container .Values.container.volumeMounts }}
volumeMounts:
{{ toYaml .Values.container.volumeMounts | indent 10 }}
{{- end }}
{{- if .Values.enabledRBAC }}
serviceAccountName: {{ .Values.serviceAccountName }}
{{- end }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 8 }}
{{- end }}
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 8 }}
{{- end }}
hostNetwork: true
hostPID: true
{{- if and .Values.container .Values.container.volumes }}
volumes:
{{ toYaml .Values.container.volumes | indent 6 }}
{{- end}}
apiVersion: {{ template "operator_api_version" . }}
kind: PrometheusRule
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.prometheusRule.labels }}
{{ toYaml .Values.prometheusRule.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
groups:
- name: node.rules
rules:
- record: instance:node_cpu:rate:sum
expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[3m]))
BY (instance)
- record: instance:node_filesystem_usage:sum
expr: sum((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"}))
BY (instance)
- record: instance:node_network_receive_bytes:rate:sum
expr: sum(rate(node_network_receive_bytes[3m])) BY (instance)
- record: instance:node_network_transmit_bytes:rate:sum
expr: sum(rate(node_network_transmit_bytes[3m])) BY (instance)
- record: instance:node_cpu:ratio
expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance)
GROUP_LEFT() count(sum(node_cpu) BY (instance, cpu)) BY (instance)
- record: cluster:node_cpu:sum_rate5m
expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m]))
- record: cluster:node_cpu:ratio
expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu))
apiVersion: v1
kind: Service
metadata:
name: expose-node-metrics
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
spec:
type: ClusterIP
selector:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
targetPort: {{ .Values.ports.metrics.port }}
protocol: TCP
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: node
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
endpoints:
- port: metrics
interval: 15s
enabledRBAC: true
## Already exist ServiceAccount
##
serviceAccountName: ""
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Expertor listens on where and exports on host
##
ports:
metrics:
port: 9100
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
image:
repository: quay.io/prometheus/node-exporter
tag: v0.16.0
resources:
limits:
cpu: 200m
memory: 50Mi
requests:
cpu: 100m
memory: 30Mi
container:
args:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
volumes:
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
volumeMounts:
- name: proc
mountPath: /host/proc
readOnly: true
- name: sys
mountPath: /host/sys
readOnly: true
## Tolerations for use with node taints
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations:
- effect: NoSchedule
operator: Exists
## Node Selector to constrain pods to run on particular nodes
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
##
nodeSelector: {}
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
prometheusRule:
## Custom Labels to be added to PrometheusRule
##
labels: {}
apiVersion: v1
description: Creates Grafana instance for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: grafana
version: 0.0.1
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"},{"type":"panel","id":"table","name":"Table","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only.","editable":true,"gnetId":1621,"graphTooltip":0,"id":null,"iteration":1543396027075,"links":[],"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":34,"panels":[],"title":"Total usage","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":0,"y":1},"height":"180px","id":6,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) * 100","format":"time_series","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"65, 90","title":"Cluster CPU usage (2m avg)","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":8,"y":1},"height":"180px","id":4,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100","format":"time_series","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"65, 90","title":"Cluster memory usage","transparent":false,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":16,"y":1},"height":"180px","id":7,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (container_fs_usage_bytes{device=~\"^/dev/.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) * 100","format":"time_series","interval":"10s","intervalFactor":1,"legendFormat":"","metric":"","refId":"A","step":10}],"thresholds":"65, 90","title":"Cluster filesystem usage","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":0,"y":6},"height":"1px","id":11,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":" cores","postfixFontSize":"30%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m]))","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"","title":"Used","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":4,"y":6},"height":"1px","id":12,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":" cores","postfixFontSize":"30%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"})","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"","title":"Total","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":8,"y":6},"height":"1px","id":9,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"20%","prefix":"","prefixFontSize":"20%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"","title":"Used","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":12,"y":6},"height":"1px","id":10,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"})","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"","title":"Total","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":16,"y":6},"height":"1px","id":13,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (container_fs_usage_bytes{device=~\"^/dev/.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"","title":"Used","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":20,"y":6},"height":"1px","id":14,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (container_fs_limit_bytes{device=~\"^/dev/.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"","title":"Total","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":6,"w":8,"x":0,"y":9},"id":2051,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) * 100","format":"time_series","hide":false,"instant":false,"intervalFactor":1,"legendFormat":"Cluster","refId":"A"},{"expr":"sum (rate (container_cpu_usage_seconds_total{id=\"/\"}[2m])) by (kubernetes_io_hostname) / sum (machine_cpu_cores) by (kubernetes_io_hostname) * 100","format":"time_series","hide":false,"intervalFactor":1,"legendFormat":"{{kubernetes_io_hostname}}","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Cluster CPU usage (2m avg)","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percent","label":"","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":6,"w":8,"x":8,"y":9},"id":2052,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100","format":"time_series","hide":false,"instant":false,"intervalFactor":1,"legendFormat":"Cluster","refId":"A"},{"expr":"sum (container_memory_working_set_bytes{id=\"/\"}) by (kubernetes_io_hostname) / sum (machine_memory_bytes) by (kubernetes_io_hostname) * 100","format":"time_series","hide":false,"intervalFactor":1,"legendFormat":"{{kubernetes_io_hostname}}","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Cluster memory usage","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percent","label":"","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":6,"w":8,"x":16,"y":9},"id":2053,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (container_fs_usage_bytes{device=~\"^/dev/.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) * 100","format":"time_series","hide":false,"instant":false,"intervalFactor":1,"legendFormat":"Cluster","refId":"A"},{"expr":"sum (container_fs_usage_bytes{device=~\"^/dev/.*$\",id=\"/\"}) by (kubernetes_io_hostname) / sum (container_fs_limit_bytes{device=~\"^/dev/.*$\",id=\"/\"}) by (kubernetes_io_hostname) * 100","format":"time_series","hide":false,"intervalFactor":1,"legendFormat":"{{kubernetes_io_hostname}}","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Cluster filesystem usage","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percent","label":"","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":15},"id":2022,"panels":[],"title":"Node","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":0,"y":16},"id":2024,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(kube_node_info{node=~\"$Node\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Number Of Nodes","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":true,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":8,"y":16},"id":2025,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(kube_node_status_condition{condition=\"OutOfDisk\", node=~\"$Node\", status=\"true\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"1","title":"Nodes Out of Disk","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":true,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":16,"y":16},"id":2026,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(kube_node_spec_unschedulable{node=~\"$Node\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"1","title":"Nodes Unavailable","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":19},"id":2014,"panels":[],"title":"Deployments","type":"row"},{"columns":[{"text":"Current","value":"current"}],"datasource":"${DS_PROMETHEUS}","fontSize":"100%","gridPos":{"h":5,"w":6,"x":0,"y":20},"id":2016,"links":[],"pageSize":null,"scroll":true,"showHeader":true,"sort":{"col":1,"desc":true},"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"date"},{"alias":"","colorMode":"row","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"decimals":0,"pattern":"Metric","thresholds":["0","0",".9"],"type":"string","unit":"none"},{"alias":"","colorMode":"row","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":false,"pattern":"Value","thresholds":["0","1"],"type":"number","unit":"none"}],"targets":[{"expr":"kube_deployment_status_replicas{namespace=~\".*\"}","format":"time_series","instant":true,"interval":"","intervalFactor":1,"legendFormat":"{{ deployment }}","refId":"A"}],"title":"Deployment Replicas - Up To Date","transform":"timeseries_to_rows","type":"table"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":6,"x":6,"y":20},"id":2018,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(kube_deployment_status_replicas{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Deployment Replicas","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":6,"x":12,"y":20},"id":2019,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(kube_deployment_status_replicas_updated{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Deployment Replicas - Updated","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":6,"x":18,"y":20},"id":2020,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(kube_deployment_status_replicas_unavailable{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Deployment Replicas - Unavailable","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":25},"id":2045,"panels":[],"title":"Jobs","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":0,"y":26},"id":2047,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_job_status_succeeded{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Jobs Succeeded","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":8,"y":26},"id":2048,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_job_status_active{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Jobs Succeeded","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":16,"y":26},"id":2049,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_job_status_failed{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Jobs Failed","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":29},"id":2028,"panels":[],"title":"Pods","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":12,"x":0,"y":30},"id":2030,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(78, 203, 42, 0.28)","full":false,"lineColor":"#629e51","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_status_phase{namespace=~\".*\", phase=\"Running\"})","format":"time_series","interval":"","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Pods Running","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":12,"x":12,"y":30},"id":2031,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(78, 203, 42, 0.28)","full":false,"lineColor":"#629e51","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_status_phase{namespace=~\".*\", phase=\"Pending\"})","format":"time_series","interval":"","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Pods Pending","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":0,"y":33},"id":2032,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(78, 203, 42, 0.28)","full":false,"lineColor":"#629e51","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_status_phase{namespace=~\".*\", phase=\"Failed\"})","format":"time_series","interval":"","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Pods Failed","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":8,"y":33},"id":2033,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(78, 203, 42, 0.28)","full":false,"lineColor":"#629e51","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_status_phase{namespace=~\".*\", phase=\"Succeeded\"})","format":"time_series","interval":"","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Pods Succeeded","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":16,"y":33},"id":2034,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(78, 203, 42, 0.28)","full":false,"lineColor":"#629e51","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_status_phase{namespace=~\".*\", phase=\"Unknown\"})","format":"time_series","interval":"","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Pods Unknown","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":36},"id":2036,"panels":[],"title":"Containers","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":0,"y":37},"id":2038,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_container_status_running{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Containers Running","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":6,"y":37},"id":2039,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_container_status_waiting{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Containers Waiting","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":12,"y":37},"id":2040,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_container_status_terminated{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Containers Terminated","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":18,"y":37},"id":2041,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(delta(kube_pod_container_status_restarts{namespace=\"kube-system\"}[30m]))","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Containers Restarts (Last 30 Minutes)","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":12,"x":0,"y":40},"id":2043,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_container_resource_requests_cpu_cores{namespace=~\".*\", node=~\"$Node\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"CPU Cores Requested by Containers","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"decbytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":12,"x":12,"y":40},"id":2042,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_container_resource_requests_memory_bytes{namespace=~\".*\", node=~\"$Node\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Memory Requested By Containers","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":43},"id":33,"panels":[],"title":"Network I/O pressure","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":1,"grid":{},"gridPos":{"h":5,"w":24,"x":0,"y":44},"height":"200px","id":32,"isNew":true,"legend":{"alignAsTable":false,"avg":true,"current":true,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (rate (container_network_receive_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))","format":"time_series","interval":"10s","intervalFactor":1,"legendFormat":"Received","metric":"network","refId":"A","step":10},{"expr":"- sum (rate (container_network_transmit_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))","format":"time_series","interval":"10s","intervalFactor":1,"legendFormat":"Sent","metric":"network","refId":"B","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network I/O pressure","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"cumulative"},"transparent":false,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":49},"id":35,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":44},"height":"","id":17,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)","interval":"10s","intervalFactor":1,"legendFormat":"{{ pod_name }}","metric":"container_cpu","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Pods CPU usage (2m avg)","tooltip":{"msResolution":true,"shared":true,"sort":2,"value_type":"cumulative"},"transparent":false,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":"cores","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"Pods CPU usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":50},"id":36,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":45},"height":"","id":23,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{systemd_service_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (systemd_service_name)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"{{ systemd_service_name }}","metric":"container_cpu","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"System services CPU usage (2m avg)","tooltip":{"msResolution":true,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":"cores","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"System services CPU usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":51},"id":37,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":10},"height":"","id":24,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"pod: {{ pod_name }} | {{ container_name }}","metric":"container_cpu","refId":"A","step":10},{"expr":"sum (rate (container_cpu_usage_seconds_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})","metric":"container_cpu","refId":"B","step":10},{"expr":"sum (rate (container_cpu_usage_seconds_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)","interval":"10s","intervalFactor":1,"legendFormat":"rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}","metric":"container_cpu","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Containers CPU usage (2m avg)","tooltip":{"msResolution":true,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":"cores","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"Containers CPU usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":52},"id":38,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":13,"w":24,"x":0,"y":11},"id":20,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"{{ id }}","metric":"container_cpu","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"All processes CPU usage (2m avg)","tooltip":{"msResolution":true,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":"cores","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"All processes CPU usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":53},"id":39,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":15},"id":25,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (pod_name)","interval":"10s","intervalFactor":1,"legendFormat":"{{ pod_name }}","metric":"container_memory_usage:sort_desc","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Pods memory usage","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"Pods memory usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":54},"id":40,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":13},"id":26,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (container_memory_working_set_bytes{systemd_service_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (systemd_service_name)","interval":"10s","intervalFactor":1,"legendFormat":"{{ systemd_service_name }}","metric":"container_memory_usage:sort_desc","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"System services memory usage","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"System services memory usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":55},"id":41,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":27,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}) by (container_name, pod_name)","interval":"10s","intervalFactor":1,"legendFormat":"pod: {{ pod_name }} | {{ container_name }}","metric":"container_memory_usage:sort_desc","refId":"A","step":10},{"expr":"sum (container_memory_working_set_bytes{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, name, image)","interval":"10s","intervalFactor":1,"legendFormat":"docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})","metric":"container_memory_usage:sort_desc","refId":"B","step":10},{"expr":"sum (container_memory_working_set_bytes{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, rkt_container_name)","interval":"10s","intervalFactor":1,"legendFormat":"rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}","metric":"container_memory_usage:sort_desc","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Containers memory usage","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"Containers memory usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":56},"id":42,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":13,"w":24,"x":0,"y":15},"id":28,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (container_memory_working_set_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) by (id)","interval":"10s","intervalFactor":1,"legendFormat":"{{ id }}","metric":"container_memory_usage:sort_desc","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"All processes memory usage","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"All processes memory usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":57},"id":43,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":1,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":16},"id":16,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)","interval":"10s","intervalFactor":1,"legendFormat":"-> {{ pod_name }}","metric":"network","refId":"A","step":10},{"expr":"- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)","interval":"10s","intervalFactor":1,"legendFormat":"<- {{ pod_name }}","metric":"network","refId":"B","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Pods network I/O (2m avg)","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"Pods network I/O","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":58},"id":44,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":1,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":17},"id":30,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"-> pod: {{ pod_name }} | {{ container_name }}","metric":"network","refId":"B","step":10},{"expr":"- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"<- pod: {{ pod_name }} | {{ container_name }}","metric":"network","refId":"D","step":10},{"expr":"sum (rate (container_network_receive_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})","metric":"network","refId":"A","step":10},{"expr":"- sum (rate (container_network_transmit_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})","metric":"network","refId":"C","step":10},{"expr":"sum (rate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}","metric":"network","refId":"E","step":10},{"expr":"- sum (rate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}","metric":"network","refId":"F","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Containers network I/O (2m avg)","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"Containers network I/O","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":59},"id":45,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":1,"grid":{},"gridPos":{"h":13,"w":24,"x":0,"y":18},"id":29,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (rate (container_network_receive_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)","interval":"10s","intervalFactor":1,"legendFormat":"-> {{ id }}","metric":"network","refId":"A","step":10},{"expr":"- sum (rate (container_network_transmit_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)","interval":"10s","intervalFactor":1,"legendFormat":"<- {{ id }}","metric":"network","refId":"B","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"All processes network I/O (2m avg)","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"All processes network I/O","type":"row"}],"refresh":"1m","schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":true,"label":null,"multi":false,"name":"Node","options":[],"query":"label_values(kubernetes_io_hostname)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-30m","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Cluster","uid":"icjpCppik","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"Etcd Dashboard for Prometheus metrics scraper","editable":true,"gnetId":3070,"graphTooltip":0,"id":null,"links":[],"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":7,"w":8,"x":0,"y":0},"id":44,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(etcd_server_has_leader)","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"0,1","title":"Etcd has a leader?","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"YES","value":"1"},{"op":"=","text":"NO","value":"0"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":7,"w":8,"x":8,"y":0},"id":42,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(etcd_server_leader_changes_seen_total)","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"The number of leader changes seen","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":7,"w":8,"x":16,"y":0},"id":43,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(etcd_server_leader_changes_seen_total)","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"The total number of failed proposals seen","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":0,"gridPos":{"h":7,"w":12,"x":0,"y":7},"id":23,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(grpc_server_started_total{grpc_type=\"unary\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"RPC Rate","metric":"grpc_server_started_total","refId":"A","step":60},{"expr":"sum(rate(grpc_server_handled_total{grpc_type=\"unary\",grpc_code!=\"OK\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"RPC Failed Rate","metric":"grpc_server_handled_total","refId":"B","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"RPC Rate","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":0,"gridPos":{"h":7,"w":12,"x":12,"y":7},"id":41,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})","format":"time_series","intervalFactor":2,"legendFormat":"Watch Streams","metric":"grpc_server_handled_total","refId":"A","step":60},{"expr":"sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})","format":"time_series","intervalFactor":2,"legendFormat":"Lease Streams","metric":"grpc_server_handled_total","refId":"B","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Active Streams","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":8,"x":0,"y":14},"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"etcd_debugging_mvcc_db_total_size_in_bytes","format":"time_series","hide":false,"interval":"","intervalFactor":2,"legendFormat":"{{instance}} DB Size","metric":"","refId":"A","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"DB Size","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"max":null,"min":null,"show":true},{"format":"short","logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":8,"x":8,"y":14},"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) by (instance, le))","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"{{instance}} WAL fsync","metric":"etcd_disk_wal_fsync_duration_seconds_bucket","refId":"A","step":120},{"expr":"histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) by (instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} DB fsync","metric":"etcd_disk_backend_commit_duration_seconds_bucket","refId":"B","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Sync Duration","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","logBase":1,"max":null,"min":null,"show":true},{"format":"short","logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":0,"gridPos":{"h":7,"w":8,"x":16,"y":14},"id":29,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"process_resident_memory_bytes","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Resident Memory","metric":"process_resident_memory_bytes","refId":"A","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":5,"gridPos":{"h":7,"w":6,"x":0,"y":21},"id":22,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"rate(etcd_network_client_grpc_received_bytes_total[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Client Traffic In","metric":"etcd_network_client_grpc_received_bytes_total","refId":"A","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Client Traffic In","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":5,"gridPos":{"h":7,"w":6,"x":6,"y":21},"id":21,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"rate(etcd_network_client_grpc_sent_bytes_total[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Client Traffic Out","metric":"etcd_network_client_grpc_sent_bytes_total","refId":"A","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Client Traffic Out","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":0,"gridPos":{"h":7,"w":6,"x":12,"y":21},"id":20,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_network_peer_received_bytes_total[5m])) by (instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Peer Traffic In","metric":"etcd_network_peer_received_bytes_total","refId":"A","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Peer Traffic In","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":6,"x":18,"y":21},"id":16,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_network_peer_sent_bytes_total[5m])) by (instance)","format":"time_series","hide":false,"interval":"","intervalFactor":2,"legendFormat":"{{instance}} Peer Traffic Out","metric":"etcd_network_peer_sent_bytes_total","refId":"A","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Peer Traffic Out","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","logBase":1,"max":null,"min":null,"show":true},{"format":"short","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":0,"gridPos":{"h":7,"w":12,"x":0,"y":28},"id":40,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_server_proposals_failed_total[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Proposal Failure Rate","metric":"etcd_server_proposals_failed_total","refId":"A","step":60},{"expr":"sum(etcd_server_proposals_pending)","format":"time_series","intervalFactor":2,"legendFormat":"Proposal Pending Total","metric":"etcd_server_proposals_pending","refId":"B","step":60},{"expr":"sum(rate(etcd_server_proposals_committed_total[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Proposal Commit Rate","metric":"etcd_server_proposals_committed_total","refId":"C","step":60},{"expr":"sum(rate(etcd_server_proposals_applied_total[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Proposal Apply Rate","refId":"D","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Raft Proposals","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":0,"editable":true,"error":false,"fill":0,"gridPos":{"h":7,"w":12,"x":12,"y":28},"id":19,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"changes(etcd_server_leader_changes_seen_total[1d])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Total Leader Elections Per Day","metric":"etcd_server_leader_changes_seen_total","refId":"A","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Total Leader Elections Per Day","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"proposals_committed_total records the total number of consensus proposals committed. This gauge should increase over time if the cluster is healthy. Several healthy members of an etcd cluster may have different total committed proposals at once. This discrepancy may be due to recovering from peers after starting, lagging behind the leader, or being the leader and therefore having the most commits. It is important to monitor this metric across all the members in the cluster; a consistently large lag between a single member and its leader indicates that member is slow or unhealthy.\n\nproposals_applied_total records the total number of consensus proposals applied. The etcd server applies every committed proposal asynchronously. The difference between proposals_committed_total and proposals_applied_total should usually be small (within a few thousands even under high load). If the difference between them continues to rise, it indicates that the etcd server is overloaded. This might happen when applying expensive queries like heavy range queries or large txn operations.","fill":1,"gridPos":{"h":7,"w":12,"x":0,"y":35},"id":2,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":false,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_server_proposals_committed_total[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"total number of consensus proposals committed","metric":"","refId":"A","step":60},{"expr":"sum(rate(etcd_server_proposals_applied_total[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"total number of consensus proposals applied","metric":"","refId":"B","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"The total number of consensus proposals committed","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"indicates how many proposals are queued to commit. Rising pending proposals suggests there is a high client load or the member cannot commit proposals.","fill":1,"gridPos":{"h":7,"w":12,"x":12,"y":35},"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(etcd_server_proposals_pending)","format":"time_series","intervalFactor":2,"legendFormat":"Proposals pending","refId":"A","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Proposals pending","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":7,"w":24,"x":0,"y":42},"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_disk_wal_fsync_duration_seconds_sum[1m]))","format":"time_series","intervalFactor":2,"legendFormat":" The latency distributions of fsync called by wal","refId":"A","step":30},{"expr":"sum(rate(etcd_disk_backend_commit_duration_seconds_sum[1m]))","format":"time_series","intervalFactor":2,"legendFormat":"The latency distributions of commit called by backend","refId":"B","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disks operations","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":7,"w":24,"x":0,"y":49},"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_network_client_grpc_received_bytes_total[1m]))","format":"time_series","intervalFactor":2,"legendFormat":"The total number of bytes received by grpc clients","refId":"A","step":30},{"expr":"sum(rate(etcd_network_client_grpc_sent_bytes_total[1m]))","format":"time_series","intervalFactor":2,"legendFormat":"The total number of bytes sent to grpc clients","refId":"B","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"Abnormally high snapshot duration (snapshot_save_total_duration_seconds) indicates disk issues and might cause the cluster to be unstable.","fill":1,"gridPos":{"h":7,"w":24,"x":0,"y":56},"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_debugging_snap_save_total_duration_seconds_sum[1m]))","format":"time_series","intervalFactor":2,"legendFormat":"The total latency distributions of save called by snapshot","refId":"A","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Snapshot duration","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Etcd","uid":"8naNKoYik","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"-","editable":true,"gnetId":5508,"graphTooltip":0,"id":null,"links":[],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":12,"x":0,"y":0},"id":6,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(instance) (rate(apiserver_request_count{code!~\"2..\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Error Rate","refId":"A","step":60},{"expr":"sum by(instance) (rate(apiserver_request_count[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Request Rate","refId":"B","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"API Server Request Rates","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":12,"x":12,"y":0},"id":7,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(verb) (rate(apiserver_latency_seconds:quantile[5m]) >= 0)","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"API Server Request Latency","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":9,"w":12,"x":0,"y":7},"id":11,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(nginx_requests_total[1m])","format":"time_series","intervalFactor":1,"legendFormat":"Request Rate","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Nginx Ingress Request Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":9,"w":12,"x":12,"y":7},"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"service_depth","format":"time_series","intervalFactor":1,"legendFormat":"Service Depth","refId":"A"},{"expr":"volumes_depth","format":"time_series","intervalFactor":1,"legendFormat":"Volumes Depth","refId":"B"},{"expr":"replicationmanager_depth","format":"time_series","intervalFactor":1,"legendFormat":"Replication Manager Depth","refId":"C"},{"expr":"statefulset_depth","format":"time_series","intervalFactor":1,"legendFormat":"StatefulSet Depth","refId":"D"},{"expr":"serviceaccount_depth","format":"time_series","intervalFactor":1,"legendFormat":"Service Account Depth","refId":"E"},{"expr":"endpoint_depth","format":"time_series","intervalFactor":1,"legendFormat":"Endpoint Depth","refId":"F"},{"expr":"deployment_depth","format":"time_series","intervalFactor":1,"legendFormat":"Deployment Depth","refId":"G"},{"expr":"daemonset_depth","format":"time_series","intervalFactor":1,"legendFormat":"DaemonSet Depth","refId":"H"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Controller Manager Queue Depth","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Kubernetes Components","uid":"Ld4acTYmz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"id":null,"links":[],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"This represents the total [CPU resource requests](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu) in the cluster.\nFor comparison the total [allocatable CPU cores](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) is also shown.","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":8,"w":18,"x":0,"y":0},"id":1,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"min(sum(kube_node_status_allocatable_cpu_cores) by (instance))","hide":false,"intervalFactor":2,"legendFormat":"Allocatable CPU Cores","refId":"A","step":20},{"expr":"max(sum(kube_pod_container_resource_requests_cpu_cores) by (instance))","hide":false,"intervalFactor":2,"legendFormat":"Requested CPU Cores","refId":"B","step":20}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Cores","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"CPU Cores","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":6,"x":18,"y":0},"hideTimeOverride":false,"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"max(sum(kube_pod_container_resource_requests_cpu_cores) by (instance)) / min(sum(kube_node_status_allocatable_cpu_cores) by (instance)) * 100","intervalFactor":2,"legendFormat":"","refId":"A","step":240}],"thresholds":"80, 90","title":"CPU Cores","transparent":false,"type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"This represents the total [memory resource requests](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory) in the cluster.\nFor comparison the total [allocatable memory](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) is also shown.","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":8,"w":18,"x":0,"y":8},"id":3,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"min(sum(kube_node_status_allocatable_memory_bytes) by (instance))","hide":false,"intervalFactor":2,"legendFormat":"Allocatable Memory","refId":"A","step":20},{"expr":"max(sum(kube_pod_container_resource_requests_memory_bytes) by (instance))","hide":false,"intervalFactor":2,"legendFormat":"Requested Memory","refId":"B","step":20}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Memory","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":6,"x":18,"y":8},"hideTimeOverride":false,"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"max(sum(kube_pod_container_resource_requests_memory_bytes) by (instance)) / min(sum(kube_node_status_allocatable_memory_bytes) by (instance)) * 100","intervalFactor":2,"legendFormat":"","refId":"A","step":240}],"thresholds":"80, 90","title":"Memory","transparent":false,"type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"now-3h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Kubernetes Resource Requests","uid":"0MdTILxik","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"A quick dashboard for displaying Fluentd metrics.","editable":true,"gnetId":3522,"graphTooltip":0,"id":null,"links":[],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"fluentd_buffer_queue_length","format":"time_series","intervalFactor":2,"metric":"fluentd_buffer_queue_length","refId":"A","step":2}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Fluentd buffer queue length","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"fluentd_buffer_total_queued_size","format":"time_series","intervalFactor":2,"metric":"fluentd_buffer_total_queued_size","refId":"A","step":2}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Fluentd buffer total queued size","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(245, 54, 54, 0.9)","rgba(45, 170, 3, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":12,"x":0,"y":14},"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"fluentd_up","intervalFactor":2,"refId":"A","step":40}],"thresholds":"0,1","title":"Fluentd Up","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":8,"w":12,"x":12,"y":14},"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"fluentd_retry_count{pluginCategory=\"output\",pluginId=\"apache_log\"}","intervalFactor":2,"metric":"fluentd_retry_count","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Fluentd retry count (apache)","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"2017-10-20T13:00:11.189Z","to":"2017-10-20T13:38:24.045Z"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Rancher Components","uid":"wDHD1TYmz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"Kubernetes DaemonSet Overview","editable":true,"gnetId":6615,"graphTooltip":1,"id":null,"iteration":1543396055929,"links":[],"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","decimals":null,"editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":false},"gridPos":{"h":3,"w":8,"x":0,"y":0},"id":5,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_daemonset_status_desired_number_scheduled{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","instant":false,"interval":"","intervalFactor":2,"legendFormat":"","refId":"A","step":600}],"thresholds":"","title":"Desired Replicas","transparent":false,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":8,"y":0},"id":6,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"min(kube_daemonset_status_number_available{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Available Replicas","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":16,"y":0},"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_daemonset_metadata_generation{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Metadata Generation","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","decimals":2,"format":"s","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":6,"w":3,"x":0,"y":3},"id":11,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"time() - max(kube_daemonset_created{daemonset=~\"$daemonset_name\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"DaemonSet Create Time","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":6,"w":7,"x":3,"y":3},"id":8,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"cores","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$daemonset_namespace\",pod_name=~\"$daemonset_name.*\"}[2m]))","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Total CPU","type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":6,"w":7,"x":10,"y":3},"id":9,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"80%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(container_memory_working_set_bytes{namespace=\"$daemonset_namespace\",pod_name=~\"$daemonset_name.*\", container_name!=\"POD\"})","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Total Memory","type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"Bps","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":false},"gridPos":{"h":6,"w":7,"x":17,"y":3},"id":7,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(rate(container_network_transmit_bytes_total{namespace=\"$daemonset_namespace\",pod_name=~\"$daemonset_name.*\"}[2m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$daemonset_namespace\",pod_name=~\"$daemonset_name.*\"}[2m]))","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Total Network","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":12,"x":0,"y":9},"id":13,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":false,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (pod_name) (rate(container_cpu_usage_seconds_total{namespace=\"$daemonset_namespace\",pod_name=~\"$daemonset_name.*\"}[2m]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod_name}}","refId":"A","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"short","label":"","logBase":1,"min":"0","show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":12,"x":12,"y":9},"id":15,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (pod_name) (container_memory_working_set_bytes{namespace=\"$daemonset_namespace\",pod_name=~\"$daemonset_name.*\", container_name!=\"POD\"})","format":"time_series","interval":"10s","intervalFactor":1,"legendFormat":"{{ pod_name }}","refId":"A","step":15}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":0,"editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":9,"w":24,"x":0,"y":16},"id":1,"isNew":true,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"avg(kube_daemonset_status_number_ready{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","intervalFactor":1,"legendFormat":"Ready","refId":"A"},{"expr":"avg(kube_daemonset_status_number_available{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","hide":false,"intervalFactor":1,"legendFormat":"Available","refId":"C"},{"expr":"avg(kube_daemonset_status_number_unavailable{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","hide":false,"intervalFactor":1,"legendFormat":"Unavailable","refId":"B"},{"expr":"avg(kube_daemonset_status_number_misscheduled{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","hide":false,"intervalFactor":1,"legendFormat":"Misscheduled","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Replicas Status","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":0,"format":"none","label":"","logBase":1,"show":true},{"format":"short","label":"","logBase":1,"show":false}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":"30s","schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Namespace","multi":false,"name":"daemonset_namespace","options":[],"query":"label_values(kube_daemonset_metadata_generation, namespace)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":null,"tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"DaemonSet","multi":false,"name":"daemonset_name","options":[],"query":"label_values(kube_daemonset_metadata_generation{namespace=\"$daemonset_namespace\"}, daemonset)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-30m","to":"now"},"timepicker":{"hidden":false,"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"DaemonSet","uid":"gekRLzHiz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":1,"id":null,"iteration":1543396071820,"links":[],"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":0,"y":0},"id":8,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"cores","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m]))","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"CPU","type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":8,"y":0},"id":9,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"GB","postfixFontSize":"50%","prefix":"","prefixFontSize":"80%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(container_memory_usage_bytes{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}) / 1024^3","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Memory","type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"Bps","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":false},"gridPos":{"h":5,"w":8,"x":16,"y":0},"id":7,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(rate(container_network_transmit_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m]))","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Network","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":false},"gridPos":{"h":3,"w":6,"x":0,"y":5},"id":5,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"metric":"kube_deployment_spec_replicas","refId":"A","step":600}],"thresholds":"","title":"Desired Replicas","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":6,"y":5},"id":6,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"min(kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Available Replicas","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":12,"y":5},"id":3,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_deployment_status_observed_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Observed Generation","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":18,"y":5},"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_deployment_metadata_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Metadata Generation","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":9,"w":24,"x":0,"y":8},"id":1,"isNew":true,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"max(kube_deployment_status_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"current replicas","refId":"A","step":30},{"expr":"min(kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"available","refId":"B","step":30},{"expr":"max(kube_deployment_status_replicas_unavailable{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"unavailable","refId":"C","step":30},{"expr":"min(kube_deployment_status_replicas_updated{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"updated","refId":"D","step":30},{"expr":"max(kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"desired","refId":"E","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Replicas","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":"","logBase":1,"show":true},{"format":"short","label":"","logBase":1,"show":false}],"yaxis":{"align":false,"alignLevel":null}}],"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Namespace","multi":false,"name":"deployment_namespace","options":[],"query":"label_values(kube_deployment_metadata_generation, namespace)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":null,"tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Deployment","multi":false,"name":"deployment_name","options":[],"query":"label_values(kube_deployment_metadata_generation{namespace=\"$deployment_namespace\"}, deployment)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"deployment","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Deployment","uid":"kZdoIYxik","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":1,"id":null,"iteration":1543396157762,"links":[],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"isNew":false,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})","interval":"10s","intervalFactor":1,"legendFormat":"Current: {{ container_name }}","metric":"container_memory_usage_bytes","refId":"A","step":15},{"expr":"kube_pod_container_resource_requests_memory_bytes{pod=\"$pod\", container=~\"$container\"}","interval":"10s","intervalFactor":2,"legendFormat":"Requested: {{ container }}","metric":"kube_pod_container_resource_requests_memory_bytes","refId":"B","step":20},{"expr":"kube_pod_container_resource_limits_memory_bytes{pod=\"$pod\", container=~\"$container\"}","interval":"10s","intervalFactor":2,"legendFormat":"Limit: {{ container }}","metric":"kube_pod_container_resource_limits_memory_bytes","refId":"C","step":20}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"isNew":false,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (container_name)(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))","intervalFactor":2,"legendFormat":"{{ container_name }}","refId":"A","step":30},{"expr":"kube_pod_container_resource_requests_cpu_cores{pod=\"$pod\", container=~\"$container\"}","interval":"10s","intervalFactor":2,"legendFormat":"Requested: {{ container }}","metric":"kube_pod_container_resource_requests_cpu_cores","refId":"B","step":20},{"expr":"kube_pod_container_resource_limits_cpu_cores{pod=\"$pod\", container=~\"$container\"}","interval":"10s","intervalFactor":2,"legendFormat":"Limit: {{ container }}","metric":"kube_pod_container_resource_limits_memory_bytes","refId":"C","step":20}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"isNew":false,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{pod_name=\"$pod\"}[1m])))","intervalFactor":2,"legendFormat":"{{ pod_name }}","refId":"A","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network I/O","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":true,"label":"Namespace","multi":false,"name":"namespace","options":[],"query":"label_values(kube_pod_info, namespace)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Pod","multi":false,"name":"pod","options":[],"query":"label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":true,"label":"Container","multi":false,"name":"container","options":[],"query":"label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Pods","uid":"XSOTSYxiz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":1,"id":null,"iteration":1543396179999,"links":[],"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":0,"y":0},"id":8,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"cores","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}[3m]))","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"CPU","type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":8,"y":0},"id":9,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"GB","postfixFontSize":"50%","prefix":"","prefixFontSize":"80%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(container_memory_usage_bytes{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}) / 1024^3","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Memory","type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"Bps","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":false},"gridPos":{"h":5,"w":8,"x":16,"y":0},"id":7,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(rate(container_network_transmit_bytes_total{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}[3m]))","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Network","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":false},"gridPos":{"h":3,"w":6,"x":0,"y":5},"id":5,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_statefulset_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)","intervalFactor":2,"metric":"kube_statefulset_replicas","refId":"A","step":600}],"thresholds":"","title":"Desired Replicas","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":6,"y":5},"id":6,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"min(kube_statefulset_status_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Available Replicas","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":12,"y":5},"id":3,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_statefulset_status_observed_generation{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Observed Generation","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":18,"y":5},"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_statefulset_metadata_generation{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Metadata Generation","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":9,"w":24,"x":0,"y":8},"id":1,"isNew":true,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"min(kube_statefulset_status_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"available","refId":"B","step":30},{"expr":"max(kube_statefulset_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"desired","refId":"E","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Replicas","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":"","logBase":1,"show":true},{"format":"short","label":"","logBase":1,"show":false}],"yaxis":{"align":false,"alignLevel":null}}],"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Namespace","multi":false,"name":"statefulset_namespace","options":[],"query":"label_values(kube_statefulset_metadata_generation, namespace)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":null,"tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"StatefulSet","multi":false,"name":"statefulset_name","options":[],"query":"label_values(kube_statefulset_metadata_generation{namespace=\"$statefulset_namespace\"}, statefulset)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"statefulset","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"StatefulSet","uid":"TDdTILbiz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
apiVersion: v1
kind: ConfigMap
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.dashboards.fullname" . }}
data:
{{- if eq .Values.level "cluster" }}
{{ (.Files.Glob "dashboards/c_*.json").AsConfig | indent 2 }}
{{- end }}
{{ (.Files.Glob "dashboards/w_*.json").AsConfig | indent 2 }}
prometheus-datasource.json: |+
{
"access": "proxy",
"basicAuth": false,
"editable": false,
"isDefault:": true,
"name": "Rancher-Monitoring",
"type": "prometheus",
"url": "{{ .Values.prometheusDatasourceURL }}"
}
apiVersion: {{ template "deployment_api_version" . }}
kind: Deployment
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
spec:
replicas: 1
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
spec:
initContainers:
- name: grafana-init-plugin-json-copy
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
volumeMounts:
- name: grafana-static-hooks
mountPath: /run.sh
subPath: copy-datasource-plugin-json.sh
- name: grafana-static-contents
mountPath: /host
- name: grafana-init-plugin-json-modify
image: {{ .Values.image.inits.tools.repository }}:{{ .Values.image.inits.tools.tag }}
command:
- /usr/bin/modify-datasource-plugin-json.sh
volumeMounts:
- name: grafana-static-hooks
mountPath: /usr/bin/modify-datasource-plugin-json.sh
subPath: modify-datasource-plugin-json.sh
- name: grafana-static-contents
mountPath: /host
containers:
- name: grafana
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
env:
- name: GF_AUTH_BASIC_ENABLED
value: "true"
- name: GF_AUTH_ANONYMOUS_ENABLED
value: "true"
- name: GF_SECURITY_ADMIN_USER
valueFrom:
secretKeyRef:
name: {{ template "app.fullname" . }}
key: user
- name: GF_SECURITY_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: {{ template "app.fullname" . }}
key: password
{{- if .Values.extraVars }}
{{ toYaml .Values.extraVars | indent 8 }}
{{- end }}
volumeMounts:
- name: grafana-storage
mountPath: /var/lib/grafana
- name: grafana-static-contents
mountPath: /usr/share/grafana/public/app/plugins/datasource/prometheus/plugin.json
subPath: grafana/plugin.json
{{- if .Values.mountGrafanaConfig }}
- name: grafana-config
mountPath: /etc/grafana
{{- end }}
ports:
- name: web
containerPort: 3000
protocol: TCP
readinessProbe:
httpGet:
path: /api/health
port: 3000
periodSeconds: 1
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 10
{{- if .Values.resources }}
resources:
{{ toYaml .Values.resources | indent 12 }}
{{- end }}
- name: grafana-watcher
image: {{ .Values.grafanaWatcher.repository }}:{{ .Values.grafanaWatcher.tag }}
args:
- '--grafana-url=http://127.0.0.1:3000'
- '--watch-dir=/var/grafana-dashboards'
{{- range .Values.dashboardConfigmaps }}
- '--watch-dir=/var/additional-dashboards/{{ . }}'
{{- end }}
env:
- name: GRAFANA_USER
valueFrom:
secretKeyRef:
name: {{ template "app.fullname" . }}
key: user
- name: GRAFANA_PASSWORD
valueFrom:
secretKeyRef:
name: {{ template "app.fullname" . }}
key: password
{{- if .Values.grafanaWatcher.resources }}
resources:
{{ toYaml .Values.grafanaWatcher.resources | indent 12 }}
{{- end }}
volumeMounts:
- name: grafana-dashboards
mountPath: /var/grafana-dashboards
{{- range .Values.dashboardConfigmaps }}
- name: {{ . }}
mountPath: /var/additional-dashboards/{{ . }}
{{- end }}
- name: grafana-proxy
image: {{ .Values.grafanaProxy.repository }}:{{ .Values.grafanaProxy.tag }}
args:
- nginx
- -g
- daemon off;
- -c
- /nginx/nginx.conf
ports:
- name: http
containerPort: 80
protocol: TCP
volumeMounts:
- mountPath: /nginx/
name: grafana-nginx
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 4 }}
{{- end }}
{{- if .Values.enabledRBAC }}
serviceAccountName: {{ .Values.serviceAccountName }}
{{- end }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 8 }}
{{- end }}
volumes:
- name: grafana-static-hooks
configMap:
name: {{ template "app.hooks.fullname" . }}
defaultMode: 0777
- name: grafana-static-contents
emptyDir: {}
- name: grafana-storage
{{- if or .Values.storageSpec .Values.persistence.enabled }}
persistentVolumeClaim:
claimName: {{ template "app.fullname" . }}
{{- else }}
emptyDir: {}
{{- end }}
- name: grafana-nginx
configMap:
defaultMode: 438
items:
- key: nginx.conf
mode: 438
path: nginx.conf
name: {{ template "app.nginx.fullname" . }}
- name: grafana-dashboards
configMap:
name: {{ template "app.dashboards.fullname" . }}
{{- range .Values.dashboardConfigmaps }}
- name: {{ . }}
configMap:
name: {{ . }}
{{- end }}
apiVersion: v1
kind: ConfigMap
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.hooks.fullname" . }}
data:
copy-datasource-plugin-json.sh: |-
#!/bin/bash
srcpath="/usr/share/grafana/public/app/plugins/datasource/prometheus/plugin.json"
dstpath="/host/grafana/raw-plugin.json"
if [[ -f $srcpath ]] && [[ -d /host ]]; then
mkdir -p /host/grafana
cp -f $srcpath $dstpath
cat $srcpath
exit 0
fi
exit 1
modify-datasource-plugin-json.sh: |-
#!/bin/sh
srcpath="/host/grafana/raw-plugin.json"
dstpath="/host/grafana/plugin.json"
if [ -f $srcpath ] && [ -d /host ]; then
mkdir -p /host/grafana
token=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
cat $srcpath | K8S_BEARERTOKEN="Bearer $token" jq 'to_entries | . + [{"key":"routes","value":[{"path":"api/v1","url":"{{ .Values.prometheusDatasourceURL }}/api/v1","headers":[{"name":"Authorization","content":env.K8S_BEARERTOKEN}]}]}] | from_entries' > $dstpath
cat $dstpath
exit 0
fi
exit 1
apiVersion: v1
kind: Service
metadata:
name: expose-grafana-metrics
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
spec:
type: ClusterIP
selector:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
ports:
- name: web
port: 3000
targetPort: web
\ No newline at end of file
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ template "app.nginx.fullname" . }}
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
component: nginx
data:
nginx.conf: |-
user nginx;
worker_processes auto;
error_log /dev/null warn;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
log_format main '[$time_local - $status] $remote_addr - $remote_user $request ($http_referer)';
server {
listen 80;
access_log off;
gzip on;
gzip_min_length 1k;
gzip_comp_level 2;
gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript image/jpeg image/gif image/png;
gzip_vary on;
gzip_disable "MSIE [1-6]\.";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
location /api/dashboards {
proxy_pass http://localhost:3000;
}
location /api/search {
proxy_pass http://localhost:3000;
sub_filter_types application/json;
sub_filter_once off;
sub_filter '"url":"/d' '"url":"d';
}
location / {
proxy_pass http://localhost:3000/;
sub_filter_types text/html;
sub_filter_once off;
sub_filter '"appSubUrl":""' '"appSubUrl":"."';
sub_filter '"url":"/' '"url":"./';
sub_filter ':"/avatar/' ':"avatar/';
}
}
}
{{- if or .Values.storageSpec .Values.persistence.enabled -}}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
spec:
{{- if .Values.storageSpec }}
{{ toYaml .Values.storageSpec | indent 2 }}
{{- else }}
accessModes:
- {{ default "ReadWriteOnce" .Values.persistence.accessMode }}
{{ if and .Values.persistence.storageClass (ne "default" .Values.persistence.storageClass) }}
storageClassName: {{ .Values.persistence.storageClass }}
{{ end }}
resources:
requests:
storage: {{ .Values.persistence.size | quote }}
{{- end }}
{{- end -}}
apiVersion: v1
kind: Secret
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
type: Opaque
data:
user: {{ .Values.adminUser | b64enc | quote }}
{{- if .Values.adminPassword }}
password: {{ .Values.adminPassword | b64enc | quote }}
{{- else }}
password: {{ randAlphaNum 10 | b64enc | quote }}
{{- end }}
apiVersion: v1
kind: Service
metadata:
name: access-grafana
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
kubernetes.io/cluster-service: "true"
spec:
type: ClusterIP
sessionAffinity: ClientIP
selector:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
ports:
- name: http
port: 80
targetPort: http
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: grafana
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
endpoints:
- port: web
interval: 30s
level: cluster
enabledRBAC: true
## Already exist ServiceAccount
##
serviceAccountName: ""
enabledPSP: true
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Node labels for Grafana pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}
## Tolerations for use with node taints
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: {}
# - key: "key"
# operator: "Equal"
# value: "value"
# effect: "NoSchedule"
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
## Pass extra environment variables to the Grafana container.
##
# extraVars:
# - name: EXTRA_VAR_1
# value: extra-var-value-1
# - name: EXTRA_VAR_2
# value: extra-var-value-2
extraVars:
adminUser: "admin"
adminPassword: "admin"
## Grafana Docker image
##
image:
repository: grafana/grafana
tag: 5.3.0
inits:
tools:
repository: maiwj/curl
tag: 7.56.1-r0
storageSpec: {}
# storageClassName: default
# accessModes:
# - ReadWriteOnce
# resources:
# requests:
# storage: 2Gi
# selector: {}
## Easy way to create persistent data
##
persistence: {}
# enabled: true
# storageClass: gluster
# accessMode: "ReadWriteOnce"
# size: 50Gi
## Resource limits & requests
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
resources: {}
# limits:
# memory: 200Mi
# cpu: 200m
# requests:
# memory: 100Mi
# cpu: 100m
## A list of additional configmaps that contain -dashboard.json and/or -datasource.json files
## that should be imported into grafana.
dashboardConfigmaps: []
prometheusDatasourceURL: ""
grafanaProxy:
repository: nginx
tag: 1.15.2
grafanaWatcher:
repository: quay.io/coreos/grafana-watcher
tag: v0.0.8
## Resource limits & requests
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
resources: {}
#requests:
# memory: "16Mi"
# cpu: "50m"
#limits:
# memory: "32Mi"
# cpu: "100m"
apiVersion: v1
description: Creates Metrics CRD of Rancher monitoring graph
engine: gotpl
maintainers:
- name: aiwantaozi
email: michelia.feng@gmail.com
name: metric-expression-cluster
version: 0.0.1
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-latency-milliseconds-avg
labels:
app: metric-expression
component: apiserver
details: "false"
level: cluster
metric: request-latency-milliseconds-avg
source: rancher-monitoring
spec:
expression: avg(apiserver_request_latencies_sum / apiserver_request_latencies_count)
by (instance) /1e+06
legendFormat: '[[instance]]'
description: apiserver request latency milliseconds avg
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-latency-milliseconds-avg-details
labels:
app: metric-expression
component: apiserver
details: "true"
level: cluster
metric: request-latency-milliseconds-avg
source: rancher-monitoring
spec:
expression: avg(apiserver_request_latencies_sum / apiserver_request_latencies_count)
by (instance, verb) /1e+06
legendFormat: '[[verb]]([[instance]])'
description: apiserver request latency milliseconds avg
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-count-sum-rate
labels:
app: metric-expression
component: apiserver
details: "false"
graph: request-count
level: cluster
metric: request-count-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(apiserver_request_count[5m])) by (instance)
legendFormat: '[[instance]]'
description: apiserver request count sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-count-sum-rate-details
labels:
app: metric-expression
component: apiserver
details: "true"
graph: request-count
level: cluster
metric: request-count-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(apiserver_request_count[5m])) by (instance,
code)
legendFormat: '[[code]]([[instance]])'
description: apiserver request count sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-error-count-sum-rate
labels:
app: metric-expression
component: apiserver
details: "false"
graph: request-count
level: cluster
metric: request-error-count-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(apiserver_request_count{instance=~"$instance", code!~"2.."}[5m]))
by (instance)
legendFormat: '[[instance]]'
description: apiserver request error count sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-error-count-sum-rate-details
labels:
app: metric-expression
component: apiserver
details: "true"
graph: request-count
level: cluster
metric: request-error-count-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(apiserver_request_count{instance=~"$instance", code!~"2.."}[5m]))
by (instance, code)
legendFormat: '[[code]]([[instance]])'
description: apiserver request error count sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-disk-io-reads-bytes-sum-rate
labels:
app: metric-expression
component: cluster
details: "false"
graph: disk-io
level: cluster
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_disk_read_bytes_total[5m])) by
() * 8 / 1024
legendFormat: Read
description: cluster disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-disk-io-reads-bytes-sum-rate-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: disk-io
level: cluster
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_disk_read_bytes_total[5m])) by
(instance) * 8 / 1024
legendFormat: Read([[instance]])
description: cluster disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-bytes-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-io
level: cluster
metric: network-transmit-bytes-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
* 8 / 1024
legendFormat: Transmit
description: cluster network transmit bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-bytes-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-io
level: cluster
metric: network-transmit-bytes-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance) * 8 / 1024
legendFormat: Transmit([[instance]])
description: cluster network transmit bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-load-5
labels:
app: metric-expression
component: cluster
details: "false"
graph: cpu-load
level: cluster
metric: cpu-load-5
source: rancher-monitoring
spec:
expression: sum(node_load5) / count(node_cpu_seconds_total{mode="system"})
legendFormat: Load5
description: cluster cpu load 5
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-load-5-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: cpu-load
level: cluster
metric: cpu-load-5
source: rancher-monitoring
spec:
expression: sum(node_load5) by (instance) / count(node_cpu_seconds_total{mode="system"})
by (instance)
legendFormat: Load5([[instance]])
description: cluster cpu load 5
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-load-1
labels:
app: metric-expression
component: cluster
details: "false"
graph: cpu-load
level: cluster
metric: cpu-load-1
source: rancher-monitoring
spec:
expression: sum(node_load1) / count(node_cpu_seconds_total{mode="system"})
legendFormat: Load1
description: cluster cpu load 1
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-load-1-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: cpu-load
level: cluster
metric: cpu-load-1
source: rancher-monitoring
spec:
expression: sum(node_load1) by (instance) / count(node_cpu_seconds_total{mode="system"})
by (instance)
legendFormat: Load1([[instance]])
description: cluster cpu load 1
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-disk-io-writes-bytes-sum-rate
labels:
app: metric-expression
component: cluster
details: "false"
graph: disk-io
level: cluster
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_disk_written_bytes_total[5m]))
* 8 / 1024
legendFormat: Write
description: cluster disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-disk-io-writes-bytes-sum-rate-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: disk-io
level: cluster
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_disk_written_bytes_total[5m]))
by (instance) * 8 / 1024
legendFormat: Write([[instance]])
description: cluster disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-fs-usage-percent
labels:
app: metric-expression
component: cluster
details: "false"
level: cluster
metric: fs-usage-percent
source: rancher-monitoring
spec:
expression: (sum(node_filesystem_size_bytes{device!="rootfs"})
- sum(node_filesystem_free_bytes{device!="rootfs"})
) / sum(node_filesystem_size_bytes{device!="rootfs"})
legendFormat: Disk usage
description: cluster fs usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-fs-usage-percent-details
labels:
app: metric-expression
component: cluster
details: "true"
level: cluster
metric: fs-usage-percent
source: rancher-monitoring
spec:
expression: (sum(node_filesystem_size_bytes{device!="rootfs"})
by (instance) - sum(node_filesystem_free_bytes{device!="rootfs"})
by (instance)) / sum(node_filesystem_size_bytes{device!="rootfs"})
by (instance)
legendFormat: '[[instance]]'
description: cluster fs usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-errors-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-packet
level: cluster
metric: network-receive-errors-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
legendFormat: Receive errors
description: cluster network receive errors sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-errors-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-packet
level: cluster
metric: network-receive-errors-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance)
legendFormat: Receive errors([[instance]])
description: cluster network receive errors sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-load-15
labels:
app: metric-expression
component: cluster
details: "false"
graph: cpu-load
level: cluster
metric: cpu-load-15
source: rancher-monitoring
spec:
expression: sum(node_load15) / count(node_cpu_seconds_total{mode="system"})
legendFormat: Load15
description: cluster cpu load 15
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-load-15-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: cpu-load
level: cluster
metric: cpu-load-15
source: rancher-monitoring
spec:
expression: sum(node_load15) by (instance) / count(node_cpu_seconds_total{mode="system"})
by (instance)
legendFormat: Load15([[instance]])
description: cluster cpu load 15
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-bytes-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-io
level: cluster
metric: network-receive-bytes-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
* 8 / 1024
legendFormat: Receive
description: cluster network receive bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-bytes-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-io
level: cluster
metric: network-receive-bytes-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance) * 8 / 1024
legendFormat: Receive([[instance]])
description: cluster network receive bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-packets-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-packet
level: cluster
metric: network-receive-packets-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
legendFormat: Receive packets
description: cluster network receive packets sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-packets-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-packet
level: cluster
metric: network-receive-packets-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance)
legendFormat: Receive packets([[instance]])
description: cluster network receive packets sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-errors-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-packet
level: cluster
metric: network-transmit-errors-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
legendFormat: Transmit errors
description: cluster network transmit errors sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-errors-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-packet
level: cluster
metric: network-transmit-errors-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance)
legendFormat: Transmit errors([[instance]])
description: cluster network transmit errors sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-packets-dropped-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-packet
level: cluster
metric: network-receive-packets-dropped-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
legendFormat: Receive dropped
description: cluster network receive packets dropped sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-packets-dropped-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-packet
level: cluster
metric: network-receive-packets-dropped-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance)
legendFormat: Receive dropped([[instance]])
description: cluster network receive packets dropped sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-packets-dropped-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-packet
level: cluster
metric: network-transmit-packets-dropped-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
legendFormat: Transmit dropped
description: cluster network transmit packets dropped sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-packets-dropped-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-packet
level: cluster
metric: network-transmit-packets-dropped-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance)
legendFormat: Transmit dropped([[instance]])
description: cluster network transmit packets dropped sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-packets-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-packet
level: cluster
metric: network-transmit-packets-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
legendFormat: Transmit packets
description: cluster network transmit packets sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-packets-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-packet
level: cluster
metric: network-transmit-packets-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance)
legendFormat: Transmit packets([[instance]])
description: cluster network transmit packets sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-usage-seconds-sum-rate
labels:
app: metric-expression
component: cluster
details: "false"
level: cluster
metric: cpu-usage-seconds-sum-rate
source: rancher-monitoring
spec:
expression: 1 - (avg(irate(node_cpu_seconds_total{mode="idle"}[5m])))
legendFormat: CPU usage
description: cluster cpu usage seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-usage-seconds-sum-rate-details
labels:
app: metric-expression
component: cluster
details: "true"
level: cluster
metric: cpu-usage-seconds-sum-rate
source: rancher-monitoring
spec:
expression: 1 - (avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance))
legendFormat: '[[instance]]'
description: cluster cpu usage seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-memory-usage-percent
labels:
app: metric-expression
component: cluster
details: "false"
level: cluster
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: 1 - sum(node_memory_MemAvailable_bytes)
/ sum(node_memory_MemTotal_bytes)
legendFormat: Memory usage
description: cluster memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-memory-usage-percent-details
labels:
app: metric-expression
component: cluster
details: "true"
level: cluster
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: 1 - sum(node_memory_MemAvailable_bytes) by (instance)
/ sum(node_memory_MemTotal_bytes) by (instance)
legendFormat: '[[instance]]'
description: cluster memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-cfs-throttled-seconds-sum-rate
labels:
app: metric-expression
component: container
details: "false"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU cfs throttled
description: container cpu cfs throttled seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-cfs-throttled-seconds-sum-rate-details
labels:
app: metric-expression
component: container
details: "true"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU cfs throttled([[container_name]])
description: container cpu cfs throttled seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-usage-seconds-sum-rate
labels:
app: metric-expression
component: container
details: "false"
graph: container-cpu-usage
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU usage
description: container cpu usage seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-usage-seconds-sum-rate-details
labels:
app: metric-expression
component: container
details: "true"
graph: container-cpu-usage
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU usage([[container_name]])
description: container cpu usage seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-system-seconds-sum-rate
labels:
app: metric-expression
component: container
details: "false"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU system seconds
description: container cpu system seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-system-seconds-sum-rate-details
labels:
app: metric-expression
component: container
details: "true"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU system seconds([[container_name]])
description: container cpu system seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-user-seconds-sum-rate
labels:
app: metric-expression
component: container
details: "false"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU user seconds
description: container cpu user seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-user-seconds-sum-rate-details
labels:
app: metric-expression
component: container
details: "true"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU user seconds([[container_name]])
description: container cpu user seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-memory-usage-percent
labels:
app: metric-expression
component: container
details: "false"
level: project
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name=~"$containerName"}) by (container_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace",
pod=~"$podName", container=~"$containerName"},"container_name", "", "container"))
by (container_name)
legendFormat: Memory
description: container memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-memory-usage-percent-details
labels:
app: metric-expression
component: container
details: "true"
level: project
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name=~"$containerName"}) by (container_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace",
pod=~"$podName", container=~"$containerName"},"container_name", "", "container"))
by (container_name)
legendFormat: Memory([[container_name]])
description: container memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-memory-usage-bytes-sum
labels:
app: metric-expression
component: container
details: "false"
level: project
metric: memory-usage-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}) by (container_name)
legendFormat: Memory usage
description: container memory usage bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-memory-usage-bytes-sum-details
labels:
app: metric-expression
component: container
details: "true"
level: project
metric: memory-usage-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}) by (container_name)
legendFormat: Memory usage([[container_name]])
description: container memory usage bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-fs-bytes-sum
labels:
app: metric-expression
component: container
details: "false"
level: project
metric: fs-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name=~"$containerName"}) by (container_name)
legendFormat: Filesystem usage
description: container fs bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-fs-bytes-sum-details
labels:
app: metric-expression
component: container
details: "true"
level: project
metric: fs-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name=~"$containerName"}) by (container_name)
legendFormat: Filesystem usage([[container_name]])
description: container fs bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-disk-io-writes-bytes-sum-rate
labels:
app: metric-expression
component: container
details: "false"
graph: disk-io
level: project
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name) * 8 / 1024
legendFormat: Write
description: container disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-disk-io-writes-bytes-sum-rate-details
labels:
app: metric-expression
component: container
details: "true"
graph: disk-io
level: project
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name) * 8 / 1024
legendFormat: Write([[container_name]])
description: container disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-disk-io-reads-bytes-sum-rate
labels:
app: metric-expression
component: container
details: "false"
graph: disk-io
level: project
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name) * 8 / 1024
legendFormat: Read
description: container disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-disk-io-reads-bytes-sum-rate-details
labels:
app: metric-expression
component: container
details: "true"
graph: disk-io
level: project
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name) * 8 / 1024
legendFormat: Read([[container_name]])
description: container disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-volumes-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: volumes-depth
source: rancher-monitoring
spec:
expression: sum(volumes_depth)
legendFormat: Volumes depth
description: controllermanager volumes depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-volumes-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: volumes-depth
source: rancher-monitoring
spec:
expression: sum(volumes_depth) by (instance)
legendFormat: Volumes depth([[instance]])
description: controllermanager volumes depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-deployment-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: deployment-depth
source: rancher-monitoring
spec:
expression: sum(deployment_depth)
legendFormat: Deployment depth
description: controllermanager deployment adds
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-deployment-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: deployment-depth
source: rancher-monitoring
spec:
expression: sum(deployment_depth) by (instance)
legendFormat: Deployment depth([[instance]])
description: controllermanager deployment adds
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-replicaset-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: replicaset-depth
source: rancher-monitoring
spec:
expression: sum(replicaset_depth)
legendFormat: Replicaset depth
description: controllermanager replicaset depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-replicaset-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: replicaset-depth
source: rancher-monitoring
spec:
expression: sum(replicaset_depth) by (instance)
legendFormat: Replicaset depth([[instance]])
description: controllermanager replicaset depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-service-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: service-depth
source: rancher-monitoring
spec:
expression: sum(service_depth)
legendFormat: Service depth
description: controllermanager service depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-service-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: service-depth
source: rancher-monitoring
spec:
expression: sum(service_depth) by (instance)
legendFormat: Service depth([[instance]])
description: controllermanager service depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-serviceaccount-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: serviceaccount-depth
source: rancher-monitoring
spec:
expression: sum(serviceaccount_depth)
legendFormat: Serviceaccount depth
description: controllermanager serviceaccount depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-serviceaccount-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: serviceaccount-depth
source: rancher-monitoring
spec:
expression: sum(serviceaccount_depth) by (instance)
legendFormat: Serviceaccount depth([[instance]])
description: controllermanager serviceaccount depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-endpoint-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: endpoint-depth
source: rancher-monitoring
spec:
expression: sum(endpoint_depth)
legendFormat: Endpoint depth
description: controllermanager endpoint depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-endpoint-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: endpoint-depth
source: rancher-monitoring
spec:
expression: sum(endpoint_depth) by (instance)
legendFormat: Endpoint depth([[instance]])
description: controllermanager endpoint depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-daemonset-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: daemonset-depth
source: rancher-monitoring
spec:
expression: sum(daemonset_depth)
legendFormat: Daemonset depth
description: controllermanager daemonset depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-daemonset-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: daemonset-depth
source: rancher-monitoring
spec:
expression: sum(daemonset_depth) by (instance)
legendFormat: Daemonset depth([[instance]])
description: controllermanager daemonset depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-deployment-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: deployment-depth
source: rancher-monitoring
spec:
expression: sum(deployment_depth)
legendFormat: Deployment depth
description: controllermanager deployment depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-deployment-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: deployment-depth
source: rancher-monitoring
spec:
expression: sum(deployment_depth) by (instance)
legendFormat: Deployment depth([[instance]])
description: controllermanager deployment depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-statefulset-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: statefulset-depth
source: rancher-monitoring
spec:
expression: sum(statefulset_depth)
legendFormat: Statefulset depth
description: controllermanager statefulset adds
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-statefulset-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: statefulset-depth
source: rancher-monitoring
spec:
expression: sum(statefulset_depth) by (instance)
legendFormat: Statefulset depth([[instance]])
description: controllermanager statefulset adds
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-failed-proposal
labels:
app: metric-expression
component: etcd
details: "false"
level: cluster
metric: server-failed-proposal
source: rancher-monitoring
spec:
expression: count(up{job="exporter-kube-etcd-cluster-monitoring"}) by (instance)
legendFormat: Failed proposal
description: etcd Server failed proposal
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-failed-proposal-details
labels:
app: metric-expression
component: etcd
details: "true"
level: cluster
metric: server-failed-proposal
source: rancher-monitoring
spec:
expression: count(up{job="exporter-kube-etcd-cluster-monitoring"}) by (instance)
legendFormat: Failed proposal
description: etcd Server failed proposal
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-leader-changes-seen-sum-increase
labels:
app: metric-expression
component: etcd
details: "false"
level: cluster
metric: server-leader-changes-seen-sum-increase
source: rancher-monitoring
spec:
expression: max(etcd_server_leader_changes_seen_total)
legendFormat: Number of leader changes per hour
description: etcd server leader changes seen sum increase
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-leader-changes-seen-sum-increase-details
labels:
app: metric-expression
component: etcd
details: "true"
level: cluster
metric: server-leader-changes-seen-sum-increase
source: rancher-monitoring
spec:
expression: max(etcd_server_leader_changes_seen_total)
legendFormat: Number of leader changes per hour
description: etcd server leader changes seen sum increase
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-grpc-client-receive-bytes-sum-rate
labels:
app: metric-expression
component: etcd
details: "false"
graph: rpc-client-traffic
level: cluster
metric: grpc-client-receive-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(etcd_network_client_grpc_received_bytes_total[5m])) *
8 / 1024
legendFormat: Client traffic in
description: etcd grpc client receive bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-grpc-client-receive-bytes-sum-rate-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: rpc-client-traffic
level: cluster
metric: grpc-client-receive-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(etcd_network_client_grpc_received_bytes_total[5m])) by (instance)
* 8 / 1024
legendFormat: Client traffic in([[instance]])
description: etcd grpc client receive bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-db-bytes-sum
labels:
app: metric-expression
component: etcd
details: "false"
level: cluster
metric: db-bytes-sum
source: rancher-monitoring
spec:
expression: sum(etcd_debugging_mvcc_db_total_size_in_bytes)
legendFormat: DB size
description: etcd db bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-db-bytes-sum-details
labels:
app: metric-expression
component: etcd
details: "true"
level: cluster
metric: db-bytes-sum
source: rancher-monitoring
spec:
expression: sum(etcd_debugging_mvcc_db_total_size_in_bytes) by (instance)
legendFormat: DB size([[instance]])
description: etcd db bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-grpc-client-transmit-bytes-sum-rate
labels:
app: metric-expression
component: etcd
details: "false"
graph: rpc-client-traffic
level: cluster
metric: grpc-client-transmit-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(etcd_network_client_grpc_sent_bytes_total[5m]))
legendFormat: Client traffic out
description: etcd grpc client transmit bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-grpc-client-transmit-bytes-sum-rate-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: rpc-client-traffic
level: cluster
metric: grpc-client-transmit-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(etcd_network_client_grpc_sent_bytes_total[5m])) by (instance)
legendFormat: Client traffic out([[instance]])
description: etcd grpc client transmit bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-leader-sum
labels:
app: metric-expression
component: etcd
details: "false"
level: cluster
metric: server-leader-sum
source: rancher-monitoring
spec:
expression: max(etcd_server_has_leader)
legendFormat: Has leader
description: etcd server leader sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-leader-sum-details
labels:
app: metric-expression
component: etcd
details: "true"
level: cluster
metric: server-leader-sum
source: rancher-monitoring
spec:
expression: max(etcd_server_has_leader)
legendFormat: Has leader
description: etcd server leader sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-proposals-committed-sum-increase
labels:
app: metric-expression
component: etcd
details: "false"
graph: proposal
level: cluster
metric: server-proposals-committed-sum-increase
source: rancher-monitoring
spec:
expression: sum(increase(etcd_server_proposals_committed_total[5m]))
legendFormat: Proposal commit rate
description: etcd server proposals committed sum increase
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-proposals-committed-sum-increase-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: proposal
level: cluster
metric: server-proposals-committed-sum-increase
source: rancher-monitoring
spec:
expression: sum(increase(etcd_server_proposals_committed_total[5m])) by (instance)
legendFormat: Proposal commit rate([[instance]])
description: etcd server proposals committed sum increase
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-proposals-applied-sum-increase
labels:
app: metric-expression
component: etcd
details: "false"
graph: proposal
level: cluster
metric: server-proposals-applied-sum-increase
source: rancher-monitoring
spec:
expression: sum(increase(etcd_server_proposals_applied_total[5m]))
legendFormat: Proposals applied
description: etcd server proposals applied sum increase
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-proposals-applied-sum-increase-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: proposal
level: cluster
metric: server-proposals-applied-sum-increase
source: rancher-monitoring
spec:
expression: sum(increase(etcd_server_proposals_applied_total[5m])) by (instance)
legendFormat: proposals applied([[instance]])
description: etcd server proposals applied sum increase
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-proposals-failed-sum-increase
labels:
app: metric-expression
component: etcd
details: "false"
graph: proposal
level: cluster
metric: server-proposals-failed-sum-increase
source: rancher-monitoring
spec:
expression: sum(increase(etcd_server_proposals_failed_total[5m]))
legendFormat: Proposals failed
description: etcd server proposals failed sum increase
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-proposals-failed-sum-increase-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: proposal
level: cluster
metric: server-proposals-failed-sum-increase
source: rancher-monitoring
spec:
expression: sum(increase(etcd_server_proposals_failed_total[5m])) by (instance)
legendFormat: proposals failed([[instance]])
description: etcd server proposals failed sum increase
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-proposals-pending-sum-increase
labels:
app: metric-expression
component: etcd
details: "false"
graph: proposal
level: cluster
metric: server-proposals-pending-sum-increase
source: rancher-monitoring
spec:
expression: sum(increase(etcd_server_proposals_pending[5m]))
legendFormat: Proposals pending
description: etcd server proposals pending sum increase
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-server-proposals-pending-sum-increase-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: proposal
level: cluster
metric: server-proposals-pending-sum-increase
source: rancher-monitoring
spec:
expression: sum(increase(etcd_server_proposals_pending[5m])) by (instance)
legendFormat: proposals pending([[instance]])
description: etcd server proposals pending sum increase
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-disk-wal-fsync-duration-seconds-sum-quantile
labels:
app: metric-expression
component: etcd
details: "false"
graph: sync-duration
level: cluster
metric: disk-wal-fsync-duration-seconds-sum-quantile
source: rancher-monitoring
spec:
expression: sum(histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])))
legendFormat: WAL fsync
description: etcd disk wal fsync duration seconds sum quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-disk-wal-fsync-duration-seconds-sum-quantile-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: sync-duration
level: cluster
metric: disk-wal-fsync-duration-seconds-sum-quantile
source: rancher-monitoring
spec:
expression: sum(histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])))
by (instance)
legendFormat: WAL fsync([[instance]])
description: etcd disk wal fsync duration seconds sum quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-grpc-request-error-percent
labels:
app: metric-expression
component: etcd
details: "false"
level: cluster
metric: grpc-request-error-percent
source: rancher-monitoring
spec:
expression: sum(rate(grpc_server_handled_total{grpc_code!="OK"}[5m])) / sum(rate(grpc_server_handled_total[5m]))
legendFormat: Rpc failed rate
description: etcd grpc request error percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-grpc-request-error-percent-details
labels:
app: metric-expression
component: etcd
details: "true"
level: cluster
metric: grpc-request-error-percent
source: rancher-monitoring
spec:
expression: sum(rate(grpc_server_handled_total{grpc_code!="OK"}[5m])) by (instance)
/ sum(rate(grpc_server_handled_total[5m])) by (instance)
legendFormat: RPC failed rate([[instance]])
description: etcd grpc request error percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-disk-commit-duration-seconds-sum-quantile
labels:
app: metric-expression
component: etcd
details: "false"
graph: sync-duration
level: cluster
metric: disk-commit-duration-seconds-sum-quantile
source: rancher-monitoring
spec:
expression: sum(histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])))
legendFormat: DB fsync
description: etcd disk commit duration seconds sum quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-disk-commit-duration-seconds-sum-quantile-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: sync-duration
level: cluster
metric: disk-commit-duration-seconds-sum-quantile
source: rancher-monitoring
spec:
expression: sum(histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])))
by (instance)
legendFormat: DB fsync([[instance]])
description: etcd disk commit duration seconds sum quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-grpc-request-slow-quantile
labels:
app: metric-expression
component: etcd
details: "false"
level: cluster
metric: grpc-request-slow-quantile
source: rancher-monitoring
spec:
expression: sum(histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{grpc_type="unary"}[5m]))))
legendFormat: Request slow"
description: etcd grpc request slow quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-grpc-request-slow-quantile-details
labels:
app: metric-expression
component: etcd
details: "true"
level: cluster
metric: grpc-request-slow-quantile
source: rancher-monitoring
spec:
expression: sum(histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{grpc_type="unary"}[5m]))))
by (instance)
legendFormat: Request slow([[instance]])
description: etcd grpc request slow quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-active-watch-stream
labels:
app: metric-expression
component: etcd
details: "false"
graph: etcd-stream
level: cluster
metric: active-watch-stream
source: rancher-monitoring
spec:
expression: sum(grpc_server_started_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})
- sum(grpc_server_handled_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})
legendFormat: Watch streams
description: Etcd watch stream
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-active-watch-stream-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: etcd-stream
level: cluster
metric: active-watch-stream
source: rancher-monitoring
spec:
expression: sum(grpc_server_started_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})
by (instance) - sum(grpc_server_handled_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})
by (instance)
legendFormat: Watch streams([[instance]])
description: Etcd watch stream
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-lease-watch-stream
labels:
app: metric-expression
component: etcd
details: "false"
graph: etcd-stream
level: cluster
metric: lease-watch-stream
source: rancher-monitoring
spec:
expression: sum(grpc_server_started_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})
- sum(grpc_server_handled_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})
legendFormat: Lease watch stream
description: Etcd lease stream
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-lease-watch-stream-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: etcd-stream
level: cluster
metric: lease-watch-stream
source: rancher-monitoring
spec:
expression: sum(grpc_server_started_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})
by (instance) - sum(grpc_server_handled_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})
by (instance)
legendFormat: Lease watch stream([[instance]])
description: Etcd lease stream
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-peer-traffic-in
labels:
app: metric-expression
component: etcd
details: "false"
graph: etcd-peer-traffic
level: cluster
metric: peer-traffic-in
source: rancher-monitoring
spec:
expression: sum(rate(etcd_network_peer_received_bytes_total[5m])) * 8 / 1024
legendFormat: Traffic in"
description: Etcd peer traffic in
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-peer-traffic-in-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: etcd-peer-traffic
level: cluster
metric: peer-traffic-in
source: rancher-monitoring
spec:
expression: sum(rate(etcd_network_peer_received_bytes_total[5m])) by (instance)
* 8 / 1024
legendFormat: Traffic in([[instance]])
description: Etcd peer traffic in
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-peer-traffic-out
labels:
app: metric-expression
component: etcd
details: "false"
graph: etcd-peer-traffic
level: cluster
metric: peer-traffic-out
source: rancher-monitoring
spec:
expression: sum(rate(etcd_network_peer_sent_bytes_total[5m]))
legendFormat: Traffic out"
description: Etcd peer traffic out
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-peer-traffic-out-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: etcd-peer-traffic
level: cluster
metric: peer-traffic-out
source: rancher-monitoring
spec:
expression: sum(rate(etcd_network_peer_sent_bytes_total[5m])) by (instance)
legendFormat: Traffic out([[instance]])
description: Etcd peer traffic out
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-proposal-failure-rate
labels:
app: metric-expression
component: etcd
details: "false"
graph: proposal
level: cluster
source: rancher-monitoring
spec:
expression: sum(rate(etcd_server_proposals_failed_total[5m]))
legendFormat: Proposal failure
description: Proposal Failure Rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-proposal-failure-rate-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: proposal
level: cluster
source: rancher-monitoring
spec:
expression: sum(rate(etcd_server_proposals_failed_total[5m])) by (instance)
legendFormat: Proposal failure([[instance]])
description: Proposal Failure Rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-rpc-rate
labels:
app: metric-expression
component: etcd
details: "false"
graph: rpc-rate
level: cluster
source: rancher-monitoring
spec:
expression: sum(rate(grpc_server_started_total{grpc_type="unary"}[5m]))
legendFormat: RPC rate
description: rpc-rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-rpc-rate-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: rpc-rate
level: cluster
source: rancher-monitoring
spec:
expression: sum(rate(grpc_server_started_total{grpc_type="unary"}[5m])) by (instance)
legendFormat: Rpc rate([[instance]])
description: rpc-rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-rpc-rate-failed
labels:
app: metric-expression
component: etcd
details: "false"
graph: rpc-rate
level: cluster
source: rancher-monitoring
spec:
expression: sum(rate(grpc_server_handled_total{grpc_type="unary",grpc_code!="OK"}[5m]))
legendFormat: Rpc failed rate
description: rpc-rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-rpc-rate-failed-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: rpc-rate
level: cluster
source: rancher-monitoring
spec:
expression: sum(rate(grpc_server_handled_total{grpc_type="unary",grpc_code!="OK"}[5m]))
by (instance)
legendFormat: Rpc failed rate([[instance]])
description: rpc-rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-latency-distributions-of-commit-called-by-backend
labels:
app: metric-expression
component: etcd
details: "false"
graph: disk-operate
level: cluster
source: rancher-monitoring
spec:
expression: sum(rate(etcd_disk_backend_commit_duration_seconds_sum[1m]))
legendFormat: Commit latency called by backend
description: The latency distributions of commit called by backend
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-latency-distributions-of-commit-called-by-backend-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: disk-operate
level: cluster
source: rancher-monitoring
spec:
expression: sum(rate(etcd_disk_backend_commit_duration_seconds_sum[1m])) by (instance)
legendFormat: Commit latency called by backend([[instance]])
description: The latency distributions of commit called by backend
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-latency-distributions-of-fsync-called-by-wal
labels:
app: metric-expression
component: etcd
details: "false"
graph: disk-operate
level: cluster
source: rancher-monitoring
spec:
expression: sum(rate(etcd_disk_wal_fsync_duration_seconds_sum[1m]))
legendFormat: Fsync latency called by wal
description: The latency distributions of fsync called by wal
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: etcd-latency-distributions-of-fsync-called-by-wal-details
labels:
app: metric-expression
component: etcd
details: "true"
graph: disk-operate
level: cluster
source: rancher-monitoring
spec:
expression: sum(rate(etcd_disk_wal_fsync_duration_seconds_sum[1m])) by (instance)
legendFormat: Fsync latency called by wal([[instance]])
description: The latency distributions of fsync called by wal
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: input-record-number
labels:
app: metric-expression
component: fluentd
details: "false"
level: cluster
metric: input-record
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_input_status_num_records_total[5m]))
legendFormat: Input record number
description: Fluentd input status num records total
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: input-record-number-details
labels:
app: metric-expression
component: fluentd
details: "true"
level: cluster
metric: input-record
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_input_status_num_records_total[5m])) by (instance)
legendFormat: Input record number([[instance]])
description: Fluentd input status num records total
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: output-record-number
labels:
app: metric-expression
component: fluentd
details: "false"
level: cluster
metric: output-record
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_num_records_total[5m]))
legendFormat: Output record number
description: Fluentd output status num records total
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: output-record-number-details
labels:
app: metric-expression
component: fluentd
details: "true"
level: cluster
metric: output-record
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_num_records_total[5m])) by (instance)
legendFormat: Output record number([[instance]])
description: Fluentd output status num records total
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: output-errors
labels:
app: metric-expression
component: fluentd
details: "false"
level: cluster
metric: output-errors
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_num_errors[5m]))
legendFormat: Plugin Output errors
description: Fluentd output errors number
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: output-errors-details
labels:
app: metric-expression
component: fluentd
details: "true"
level: cluster
metric: output-errors
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_num_errors[5m])) by (type)
legendFormat: Plugin([[type]])
description: Fluentd output errors number
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: buffer-queue-length
labels:
app: metric-expression
component: fluentd
details: "false"
level: cluster
metric: buffer-queue-length
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_buffer_queue_length[5m]))
legendFormat: Buffer queue
description: Fluentd Buffer queue length
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: buffer-queue-length-details
labels:
app: metric-expression
component: fluentd
details: "true"
level: cluster
metric: buffer-queue-length
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_buffer_queue_length[5m])) by (instance)
legendFormat: '[[instance]]'
description: Fluentd Buffer queue length
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-reading
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="reading"})
legendFormat: Reading connections
description: ingresscontroller nginx connection reading
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-reading-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="reading"}) by (instance)
legendFormat: Reading connections
description: ingresscontroller nginx connection reading
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-waiting
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="waiting"})
legendFormat: Nginx waiting connection
description: ingresscontroller nginx connection waiting
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-waiting-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="waiting"}) by (instance)
legendFormat: Nginx waiting connection
description: ingresscontroller nginx connection waiting
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-writing
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="writing"})
legendFormat: Writing connections
description: ingresscontroller nginx connection writing
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-writing-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="writing"}) by (instance)
legendFormat: Writing connections
description: ingresscontroller nginx connection writing
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-accepted
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="accepted"})
legendFormat: Accepted connections
description: ingresscontroller nginx connection accepted
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-accepted-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="accepted"}) by (instance)
legendFormat: Accepted connections
description: ingresscontroller nginx connection accepted
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-active
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="active"})
legendFormat: Active connections
description: ingresscontroller nginx connection active
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-active-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="active"}) by (instance)
legendFormat: Active connections
description: ingresscontroller nginx connection active
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-handled
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="handled"})
legendFormat: Handled connections
description: ingresscontroller nginx connection handled
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-handled-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="handled"}) by (instance)
legendFormat: Handled connections
description: ingresscontroller nginx connection handled
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-upstream-response-seconds-by-host
labels:
app: metric-expression
component: ingresscontroller
details: "false"
level: cluster
metric: upstream-response-seconds
source: rancher-monitoring
spec:
expression: sort_desc(max(upstream_response_time_seconds_sum) by (host, path))
legendFormat: Upstream response seconds(host:[[host]] path:[[path]])
description: ingresscontroller nginx upstream response seconds by host
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-upstream-response-seconds-by-host-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
level: cluster
metric: upstream-response-seconds
source: rancher-monitoring
spec:
expression: sort_desc(max(upstream_response_time_seconds_sum) by (host, path))
legendFormat: Upstream response seconds(host:[[host]] path:[[path]])
description: ingresscontroller nginx upstream response seconds by host
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-process-seconds-by-path
labels:
app: metric-expression
component: ingresscontroller
details: "false"
level: cluster
metric: request-process-seconds
source: rancher-monitoring
spec:
expression: max(request_duration_seconds_bucket{le="1"}) by (host, path)
legendFormat: Request duration(host:[[host]] path:[[path]])
description: ingresscontroller nginx request duration by path
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-process-seconds-by-path-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
level: cluster
metric: request-process-seconds
source: rancher-monitoring
spec:
expression: max(request_duration_seconds_bucket{le="1"}) by (host, path)
legendFormat: Request duration(host:[[host]] path:[[path]])
description: ingresscontroller nginx request duration by path
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-transmit-bytes-sum-rate
labels:
app: metric-expression
component: node
details: "false"
graph: network-io
level: cluster
metric: network-transmit-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
* 8 / 1024
legendFormat: Transmit
description: node network transmit bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-transmit-bytes-sum-rate-details
labels:
app: metric-expression
component: node
details: "true"
graph: network-io
level: cluster
metric: network-transmit-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
by (device) * 8 / 1024
legendFormat: '[[device]]'
description: node network transmit bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-receive-packets-dropped-sum-rate
labels:
app: metric-expression
component: node
details: "false"
graph: network-packet
level: cluster
metric: network-receive-packets-dropped-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
legendFormat: Receive packets
description: node network receive packets dropped sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-receive-packets-dropped-sum-rate-details
labels:
app: metric-expression
component: node
details: "true"
graph: network-packet
level: cluster
metric: network-receive-packets-dropped-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
by (device)
legendFormat: Receive packets([[device]])
description: node network receive packets dropped sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-transmit-packets-sum-rate
labels:
app: metric-expression
component: node
details: "false"
graph: network-packet
level: cluster
metric: network-transmit-packets-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_packets_total{instance=~"$instance"}[5m]))
legendFormat: Transmit packets
description: node network transmit packets sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-transmit-packets-sum-rate-details
labels:
app: metric-expression
component: node
details: "true"
graph: network-packet
level: cluster
metric: network-transmit-packets-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_packets_total{instance=~"$instance"}[5m]))
by (device)
legendFormat: Transmit packets([[device]])
description: node network transmit packets sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-disk-io-writes-bytes-sum-rate
labels:
app: metric-expression
component: node
details: "false"
graph: disk-io
level: cluster
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_disk_written_bytes_total{instance=~"$instance"}[5m]))
* 8 / 1024
legendFormat: Write
description: node disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-disk-io-writes-bytes-sum-rate-details
labels:
app: metric-expression
component: node
details: "true"
graph: disk-io
level: cluster
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_disk_written_bytes_total{instance=~"$instance"}[5m]))
by (device) * 8 / 1024
legendFormat: Write([[device]])
description: node disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-disk-io-reads-bytes-sum-rate
labels:
app: metric-expression
component: node
details: "false"
graph: disk-io
level: cluster
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_disk_read_bytes_total{instance=~"$instance"}[5m])) by
() * 8 / 1024
legendFormat: Read
description: node disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-disk-io-reads-bytes-sum-rate-details
labels:
app: metric-expression
component: node
details: "true"
graph: disk-io
level: cluster
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_disk_read_bytes_total{instance=~"$instance"}[5m])) by
(device) * 8 / 1024
legendFormat: Read([[device]])
description: node disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-fs-usage-percent
labels:
app: metric-expression
component: node
details: "false"
level: cluster
metric: fs-usage-percent
source: rancher-monitoring
spec:
expression: (sum(node_filesystem_size_bytes{device!="rootfs",instance=~"$instance"})
- sum(node_filesystem_free_bytes{device!="rootfs",instance=~"$instance"})
) / sum(node_filesystem_size_bytes{device!="rootfs",instance=~"$instance"})
legendFormat: Disk usage
description: node fs usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-fs-usage-percent-details
labels:
app: metric-expression
component: node
details: "true"
level: cluster
metric: fs-usage-percent
source: rancher-monitoring
spec:
expression: (sum(node_filesystem_size_bytes{device!="rootfs",instance=~"$instance"})
by (device) - sum(node_filesystem_free_bytes{device!="rootfs",instance=~"$instance"})
by (device)) / sum(node_filesystem_size_bytes{device!="rootfs",instance=~"$instance"})
by (device)
legendFormat: '[[device]]'
description: node fs usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-receive-packets-sum-rate
labels:
app: metric-expression
component: node
details: "false"
graph: network-packet
level: cluster
metric: network-receive-packets-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
legendFormat: Receive packets
description: node network receive packets sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-receive-packets-sum-rate-details
labels:
app: metric-expression
component: node
details: "true"
graph: network-packet
level: cluster
metric: network-receive-packets-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
by (device)
legendFormat: Receive packets([[device]])
description: node network receive packets sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-transmit-errors-sum-rate
labels:
app: metric-expression
component: node
details: "false"
graph: network-packet
level: cluster
metric: network-transmit-errors-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
legendFormat: Transmit errors
description: node network transmit errors sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-transmit-errors-sum-rate-details
labels:
app: metric-expression
component: node
details: "true"
graph: network-packet
level: cluster
metric: network-transmit-errors-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
by (device)
legendFormat: Transmit errors([[device]])
description: node network transmit errors sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-cpu-load-5
labels:
app: metric-expression
component: node
details: "false"
graph: cpu-load
level: cluster
metric: cpu-load-5
source: rancher-monitoring
spec:
expression: sum(node_load1{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"})
legendFormat: Load1
description: node cpu load 1
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-cpu-load-5-details
labels:
app: metric-expression
component: node
details: "true"
graph: cpu-load
level: cluster
metric: cpu-load-5
source: rancher-monitoring
spec:
expression: sum(node_load1{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"})
legendFormat: Load1
description: node cpu load 1
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-cpu-load-15
labels:
app: metric-expression
component: node
details: "false"
graph: cpu-load
level: cluster
metric: cpu-load-15
source: rancher-monitoring
spec:
expression: sum(node_load15{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"})
legendFormat: Load15
description: node cpu load 15
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-cpu-load-15-details
labels:
app: metric-expression
component: node
details: "true"
graph: cpu-load
level: cluster
metric: cpu-load-15
source: rancher-monitoring
spec:
expression: sum(node_load15{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"})
legendFormat: Load15
description: node cpu load 15
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-cpu-usage-seconds-sum-rate
labels:
app: metric-expression
component: node
details: "false"
level: cluster
metric: cpu-usage-seconds-sum-rate
source: rancher-monitoring
spec:
expression: 1 - (avg(irate(node_cpu_seconds_total{mode="idle", instance=~"$instance"}[5m])) by (instance))
legendFormat: CPU
description: node cpu usage seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-cpu-usage-seconds-sum-rate-details
labels:
app: metric-expression
component: node
details: "true"
level: cluster
metric: cpu-usage-seconds-sum-rate
source: rancher-monitoring
spec:
expression: avg(irate(node_cpu_seconds_total{instance=~"$instance"}[5m]))by (mode)
legendFormat: '[[mode]]'
description: node cpu usage seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-memory-usage-percent
labels:
app: metric-expression
component: node
details: "false"
level: cluster
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: 1 - sum(node_memory_MemAvailable_bytes{instance=~"$instance"})
/ sum(node_memory_MemTotal_bytes{instance=~"$instance"})
legendFormat: Memory usage
description: node memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-memory-usage-percent-details
labels:
app: metric-expression
component: node
details: "true"
level: cluster
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: 1 - sum(node_memory_MemAvailable_bytes{instance=~"$instance"})
/ sum(node_memory_MemTotal_bytes{instance=~"$instance"})
legendFormat: Memory usage
description: node memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-receive-bytes-sum-rate
labels:
app: metric-expression
component: node
details: "false"
graph: network-io
level: cluster
metric: network-receive-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
* 8 / 1024
legendFormat: Receive
description: node network receive bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-receive-bytes-sum-rate-details
labels:
app: metric-expression
component: node
details: "true"
graph: network-io
level: cluster
metric: network-receive-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
by (device) * 8 / 1024
legendFormat: Receive([[device]])
description: node network receive bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-receive-errors-sum-rate
labels:
app: metric-expression
component: node
details: "false"
graph: network-packet
level: cluster
metric: network-receive-errors-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
legendFormat: Receive packets
description: node network receive errors sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-receive-errors-sum-rate-details
labels:
app: metric-expression
component: node
details: "true"
graph: network-packet
level: cluster
metric: network-receive-errors-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
by (device)
legendFormat: Receive packets([[device]])
description: node network receive errors sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-cpu-load-5
labels:
app: metric-expression
component: node
details: "false"
graph: cpu-load
level: cluster
metric: cpu-load-5
source: rancher-monitoring
spec:
expression: sum(node_load5{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"})
legendFormat: Load5
description: node cpu load 5
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-cpu-load-1-details
labels:
app: metric-expression
component: node
details: "true"
graph: cpu-load
level: cluster
metric: cpu-load-1
source: rancher-monitoring
spec:
expression: sum(node_load5{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"})
legendFormat: Load1
description: node cpu load 1
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-transmit-packets-dropped-sum-rate
labels:
app: metric-expression
component: node
details: "false"
graph: network-packet
level: cluster
metric: network-transmit-packets-dropped-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
legendFormat: Transmit dropped
description: node network transmit packets dropped sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: node-network-transmit-packets-dropped-sum-rate-details
labels:
app: metric-expression
component: node
details: "true"
graph: network-packet
level: cluster
metric: network-transmit-packets-dropped-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m]))
by (device)
legendFormat: Transmit dropped([[device]])
description: node network transmit packets dropped sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-cpu-cfs-throttled-seconds-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: CPU cfs throttled
description: pod cpu cfs throttled seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-cpu-cfs-throttled-seconds-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name)
legendFormat: CPU cfs throttled([[container_name]])
description: pod cpu cfs throttled seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-memory-usage-percent
labels:
app: metric-expression
component: pod
details: "false"
level: project
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name!=""}) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace",
pod=~"$InstanceName"},"pod_name", "", "pod"))
legendFormat: Memory
description: pod memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-memory-usage-percent-details
labels:
app: metric-expression
component: pod
details: "true"
level: project
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name!=""}) by (container_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace",
pod=~"$InstanceName"},"pod_name", "", "pod")) by (container_name)
legendFormat: Memory([[container_name]])
description: pod memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-fs-bytes-sum
labels:
app: metric-expression
component: pod
details: "false"
level: project
metric: fs-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name!=""})
legendFormat: Filesystem usage
description: pod fs bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-fs-bytes-sum-details
labels:
app: metric-expression
component: pod
details: "true"
level: project
metric: fs-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name!=""}) by (container_name)
legendFormat: Filesystem usage([[container_name]])
description: pod fs bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-receive-packets-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: network-packet
level: project
metric: network-receive-packets-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_packets_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: Receive packets
description: pod network receive packets sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-receive-packets-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: network-packet
level: project
metric: network-receive-packets-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_packets_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name)
legendFormat: Receive packets([[container_name]])
description: pod network receive packets sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-transmit-packets-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: network-packet
level: project
metric: network-transmit-packets-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_packets_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: Transmit packets
description: pod network transmit packets sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-transmit-packets-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: network-packet
level: project
metric: network-transmit-packets-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_packets_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name)
legendFormat: Transmit packets([[container_name]])
description: pod network transmit packets sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-cpu-user-seconds-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: CPU user seconds
description: pod cpu user seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-cpu-user-seconds-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name)
legendFormat: CPU user seconds([[container_name]])
description: pod cpu user seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-disk-io-reads-bytes-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: disk-io
level: project
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) * 8 / 1024
legendFormat: Read
description: pod disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-disk-io-reads-bytes-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: disk-io
level: project
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name) * 8 / 1024
legendFormat: Read([[container_name]])
description: pod disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-receive-bytes-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: network-io
level: project
metric: network-receive-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) * 8 / 1024
legendFormat: Receive
description: pod network receive bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-receive-bytes-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: network-io
level: project
metric: network-receive-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name) * 8 / 1024
legendFormat: Receive
description: pod network receive bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-transmit-bytes-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: network-io
level: project
metric: network-transmit-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) * 8 / 1024
legendFormat: Transmit
description: pod network transmit bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-transmit-bytes-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: network-io
level: project
metric: network-transmit-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name) * 8 / 1024
legendFormat: Transmit
description: pod network transmit bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-receive-packets-dropped-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: network-packet
level: project
metric: network-receive-packets-dropped-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: Receive dropped
description: pod network receive packets dropped sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-receive-packets-dropped-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: network-packet
level: project
metric: network-receive-packets-dropped-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name)
legendFormat: Receive dropped
description: pod network receive packets dropped sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-memory-usage-bytes-sum
labels:
app: metric-expression
component: pod
details: "false"
level: project
metric: memory-usage-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName",
container_name!=""})
legendFormat: Memory usage
description: pod memory usage bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-memory-usage-bytes-sum-details
labels:
app: metric-expression
component: pod
details: "true"
level: project
metric: memory-usage-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}) by (container_name)
legendFormat: Memory usage([[container_name]])
description: pod memory usage bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-disk-io-writes-bytes-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: disk-io
level: project
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) * 8 / 1024
legendFormat: Write
description: pod disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-disk-io-writes-bytes-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: disk-io
level: project
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name) * 8 / 1024
legendFormat: Write([[container_name]])
description: pod disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-receive-errors-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: network-packet
level: project
metric: network-receive-errors-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_errors_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: Receive errors
description: pod network receive errors sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-receive-errors-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: network-packet
level: project
metric: network-receive-errors-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_errors_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name)
legendFormat: Receive errors
description: pod network receive errors sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-cpu-usage-seconds-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: container-cpu-usage
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: CPU usage
description: pod CPU usage sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-cpu-usage-seconds-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: container-cpu-usage
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name)
legendFormat: CPU usage([[container_name]])
description: pod CPU usage sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-transmit-errors-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: network-packet
level: project
metric: network-transmit-errors-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_errors_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: Transmit errors
description: pod network transmit errors sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-transmit-errors-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: network-packet
level: project
metric: network-transmit-errors-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_errors_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name)
legendFormat: Transmit errors
description: pod network transmit errors sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-transmit-packets-dropped-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: network-packet
level: project
metric: network-transmit-packets-dropped-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: Transmit dropped
description: pod network transmit packets dropped sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-network-transmit-packets-dropped-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: network-packet
level: project
metric: network-transmit-packets-dropped-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name)
legendFormat: Transmit dropped
description: pod network transmit packets dropped sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-cpu-system-seconds-sum-rate
labels:
app: metric-expression
component: pod
details: "false"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: CPU system seconds
description: pod cpu system seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: pod-cpu-system-seconds-sum-rate-details
labels:
app: metric-expression
component: pod
details: "true"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (container_name)
legendFormat: CPU system seconds([[container_name]])
description: pod cpu system seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-e-2-e-scheduling-latency-seconds-quantile
labels:
app: metric-expression
component: scheduler
details: "false"
level: cluster
metric: e-2-e-scheduling-latency-seconds-quantile
source: rancher-monitoring
spec:
expression: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
by (le, instance)) / 1e+06
legendFormat: E2E latency
description: scheduler e 2 e scheduling latency seconds quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-e-2-e-scheduling-latency-seconds-quantile-details
labels:
app: metric-expression
component: scheduler
details: "true"
level: cluster
metric: e-2-e-scheduling-latency-seconds-quantile
source: rancher-monitoring
spec:
expression: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
by (le, instance)) / 1e+06
legendFormat: E2E latency([[instance]])
description: scheduler e 2 e scheduling latency seconds quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-total-preemption-attempts
labels:
app: metric-expression
component: scheduler
details: "false"
level: cluster
metric: total-preemption-attempts
source: rancher-monitoring
spec:
expression: sum(rate(scheduler_total_preemption_attempts[5m])) by (instance)
legendFormat: Preemption attempts
description: Scheduler scheduling algorithm latency seconds quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-total-preemption-attempts-details
labels:
app: metric-expression
component: scheduler
details: "true"
level: cluster
metric: total-preemption-attempts
source: rancher-monitoring
spec:
expression: sum(rate(scheduler_total_preemption_attempts[5m]))
legendFormat: Preemption attempts([[instance]])
description: Scheduler scheduling algorithm latency seconds quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-pod-unscheduler
labels:
app: metric-expression
component: scheduler
details: "false"
level: cluster
metric: pod-unscheduler
source: rancher-monitoring
spec:
expression: sum(kube_pod_status_scheduled{condition="false"})
legendFormat: Scheduling failed pods
description: pod unscheduler
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-pod-unscheduler-details
labels:
app: metric-expression
component: scheduler
details: "true"
level: cluster
metric: pod-unscheduler
source: rancher-monitoring
spec:
expression: sum(kube_pod_status_scheduled{condition="false"})
legendFormat: Scheduling failed pods
description: pod unscheduler
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-disk-io-writes-bytes-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: disk-io
level: project
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) * 8 / 1024
legendFormat: Write
description: workload disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-disk-io-writes-bytes-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: disk-io
level: project
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name) * 8 / 1024
legendFormat: Write([[pod_name]])
description: workload disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-disk-io-reads-bytes-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: disk-io
level: project
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) * 8 / 1024
legendFormat: Read
description: workload disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-disk-io-reads-bytes-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: disk-io
level: project
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name) * 8 / 1024
legendFormat: Read([[pod_name]])
description: workload disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-fs-bytes-sum
labels:
app: metric-expression
component: workload
details: "false"
level: project
metric: fs-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name!=""})
legendFormat: File usage
description: workload fs bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-fs-bytes-sum-details
labels:
app: metric-expression
component: workload
details: "true"
level: project
metric: fs-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name!=""}) by (pod_name)
legendFormat: pod_name]]
description: workload fs bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-transmit-packets-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: network-packet
level: project
metric: network-transmit-packets-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_packets_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: Transmit packets
description: workload network transmit packets sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-transmit-packets-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: network-packet
level: project
metric: network-transmit-packets-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_packets_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name)
legendFormat: Transmit packets([[pod_name]])
description: workload network transmit packets sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-receive-packets-dropped-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: network-packet
level: project
metric: network-receive-packets-dropped-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: Receive dropped
description: workload network receive packets dropped sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-receive-packets-dropped-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: network-packet
level: project
metric: network-receive-packets-dropped-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name)
legendFormat: Receive dropped([[pod_name]])
description: workload network receive packets dropped sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-cpu-usage-seconds-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: container-cpu-usage
level: project
metric: cpu-usage-seconds-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: CPU usage
description: workload cpu usage seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-cpu-usage-seconds-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: container-cpu-usage
level: project
metric: cpu-usage-seconds-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name)
legendFormat: CPU usage([[pod_name]])
description: workload cpu usage seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-cpu-system-seconds-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: container-cpu-usage-details
level: project
metric: cpu-system-seconds-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: CPU system seconds
description: workload cpu system seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-cpu-system-seconds-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: container-cpu-usage-details
level: project
metric: cpu-system-seconds-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name)
legendFormat: CPU system seconds([[pod_name]])
description: workload cpu system seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-receive-bytes-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: network-io
level: project
metric: network-receive-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) * 8 / 1024
legendFormat: Receive
description: workload network receive bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-receive-bytes-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: network-io
level: project
metric: network-receive-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name) * 8 / 1024
legendFormat: Receive([[pod_name]])
description: workload network receive bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-receive-errors-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: network-packet
level: project
metric: network-receive-errors-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_errors_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: Receive errors
description: workload network receive errors sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-receive-errors-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: network-packet
level: project
metric: network-receive-errors-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_errors_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name)
legendFormat: Receive errors([[pod_name]])
description: workload network receive errors sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-receive-packets-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: network-packet
level: project
metric: network-receive-packets-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_packets_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: Receive packets
description: workload network receive packets sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-receive-packets-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: network-packet
level: project
metric: network-receive-packets-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_receive_packets_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name)
legendFormat: Receive packets([[pod_name]])
description: workload network receive packets sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-memory-usage-bytes-sum
labels:
app: metric-expression
component: workload
details: "false"
level: project
metric: memory-usage-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName",
container_name!=""})
legendFormat: Memory
description: workload memory usage bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-memory-usage-bytes-sum-details
labels:
app: metric-expression
component: workload
details: "true"
level: project
metric: memory-usage-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}) by (pod_name)
legendFormat: '[[pod_name]]'
description: workload memory usage bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-memory-usage-percent
labels:
app: metric-expression
component: workload
details: "false"
level: project
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name!=""}) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace",
pod=~"$InstanceName"},"pod_name", "", "pod"))
legendFormat: Usage percent
description: workload memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-memory-usage-percent-details
labels:
app: metric-expression
component: workload
details: "true"
level: project
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name!=""}) by (pod_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace",
pod=~"$InstanceName"},"pod_name", "", "pod")) by (pod_name)
legendFormat: Usage percent([[pod_name]])
description: workload memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-transmit-bytes-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: network-io
level: project
metric: network-transmit-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) * 8 / 1024
legendFormat: Transmit
description: workload network transmit bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-transmit-bytes-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: network-io
level: project
metric: network-transmit-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name) * 8 / 1024
legendFormat: Transmit([[pod_name]])
description: workload network transmit bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-transmit-errors-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: network-packet
level: project
metric: network-transmit-errors-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_errors_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: Transmit errors
description: workload network transmit errors sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-transmit-errors-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: network-packet
level: project
metric: network-transmit-errors-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_errors_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name)
legendFormat: Transmit errors([[pod_name]])
description: workload network transmit errors sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-transmit-packets-dropped-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: network-packet
level: project
metric: network-transmit-packets-dropped-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: Transmit dropped
description: workload network transmit packets dropped sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-network-transmit-packets-dropped-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: network-packet
level: project
metric: network-transmit-packets-dropped-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_network_transmit_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name)
legendFormat: Transmit dropped([[pod_name]])
description: workload network transmit packets dropped sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-cpu-user-seconds-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
graph: container-cpu-usage-details
level: project
metric: cpu-user-seconds-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: CPU user seconds
description: workload cpu user seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-cpu-user-seconds-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
graph: container-cpu-usage-details
level: project
metric: cpu-user-seconds-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name)
legendFormat: CPU user seconds([[pod_name]])
description: workload cpu user seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-cpu-cfs-throttled-seconds-sum-rate
labels:
app: metric-expression
component: workload
details: "false"
level: project
metric: cpu-cfs-throttled-seconds-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m]))
legendFormat: CPU cfs throttled
description: workload cpu cfs throttled seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: workload-cpu-cfs-throttled-seconds-sum-rate-details
labels:
app: metric-expression
component: workload
details: "true"
level: project
metric: cpu-cfs-throttled-seconds-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name!=""}[5m])) by (pod_name)
legendFormat: CPU cfs throttled([[pod_name]])
description: workload cpu cfs throttled seconds sum rate
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: apiserver
cluster-graph: kube-component
name: apiserver-request-latency
spec:
resourceType: apiserver
displayResourceType: kube-component
priority: 300
title: apiserver-request-latency
detailsMetricsSelector:
component: apiserver
details: "true"
metric: request-latency-milliseconds-avg
metricsSelector:
details: "false"
component: apiserver
metric: request-latency-milliseconds-avg
yAxis:
unit: ms
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: apiserver
cluster-graph: kube-component
name: apiserver-request-count
spec:
resourceType: apiserver
displayResourceType: kube-component
priority: 301
title: apiserver-request-count
detailsMetricsSelector:
component: apiserver
details: "true"
metric: request-count-sum-rate
metricsSelector:
details: "false"
component: apiserver
metric: request-count-sum-rate
yAxis:
unit: number
\ No newline at end of file
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-cpu-usage
spec:
resourceType: cluster
priority: 100
title: cluster-cpu-usage
metricsSelector:
details: "false"
component: cluster
metric: cpu-usage-seconds-sum-rate
detailsMetricsSelector:
details: "true"
component: cluster
metric: cpu-usage-seconds-sum-rate
yAxis:
unit: percent
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-cpu-load
spec:
resourceType: cluster
priority: 101
title: cluster-cpu-load
metricsSelector:
details: "false"
component: cluster
graph: cpu-load
detailsMetricsSelector:
details: "true"
component: cluster
graph: cpu-load
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-memory-usage
spec:
resourceType: cluster
priority: 102
title: cluster-memory-usage
metricsSelector:
details: "false"
component: cluster
metric: memory-usage-percent
detailsMetricsSelector:
details: "true"
component: cluster
metric: memory-usage-percent
yAxis:
unit: percent
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-fs-usage-percent
spec:
resourceType: cluster
priority: 103
title: cluster-fs-usage-percent
thresholds: 10
metricsSelector:
details: "false"
component: cluster
metric: fs-usage-percent
detailsMetricsSelector:
details: "true"
component: cluster
metric: fs-usage-percent
yAxis:
unit: percent
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-disk-io
spec:
resourceType: cluster
priority: 104
title: cluster-disk-io
thresholds: 10
metricsSelector:
details: "false"
component: cluster
graph: disk-io
detailsMetricsSelector:
details: "true"
component: cluster
graph: disk-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-network-io
spec:
resourceType: cluster
priority: 105
title: cluster-network-io
thresholds: 10
metricsSelector:
details: "false"
component: cluster
graph: network-io
detailsMetricsSelector:
details: "true"
component: cluster
graph: network-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-network-packet
spec:
resourceType: cluster
priority: 106
title: cluster-network-packet
thresholds: 10
metricsSelector:
details: "false"
component: cluster
graph: network-packet
detailsMetricsSelector:
details: "true"
component: cluster
graph: network-packet
yAxis:
unit: pps
\ No newline at end of file
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: controllermanager
cluster-graph: kube-component
name: controllermanager-queue-depth
spec:
resourceType: controllermanager
displayResourceType: kube-component
priority: 310
title: controllermanager-queue-depth
metricsSelector:
details: "false"
component: controllermanager
detailsMetricsSelector:
details: "true"
component: controllermanager
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-server-leader-sum
spec:
resourceType: etcd
priority: 200
title: etcd-server-leader-sum
description: etcd server leader sum
metricsSelector:
details: "false"
component: etcd
metric: server-leader-sum
detailsMetricsSelector:
details: "true"
component: etcd
metric: server-leader-sum
yAxis:
unit: number
graphType: singlestat
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-server-failed-proposal
spec:
resourceType: etcd
priority: 201
title: etcd-server-failed-proposal
description: etcd server failed proposal
metricsSelector:
details: "false"
component: etcd
metric: server-failed-proposal
detailsMetricsSelector:
details: "true"
component: etcd
metric: server-failed-proposal
yAxis:
unit: number
graphType: singlestat
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-leader-change
spec:
resourceType: etcd
priority: 202
title: etcd-leader-change
description: etcd leader change
metricsSelector:
details: "false"
component: etcd
metric: server-leader-changes-seen-sum-increase
detailsMetricsSelector:
details: "true"
component: etcd
metric: server-leader-changes-seen-sum-increase
yAxis:
unit: number
graphType: singlestat
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-grpc-client
spec:
resourceType: etcd
priority: 203
title: etcd-grpc-client
description: etcd grpc client receive/send bytes sum rate
metricsSelector:
details: "false"
component: etcd
graph: rpc-client-traffic
detailsMetricsSelector:
details: "true"
component: etcd
graph: rpc-client-traffic
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
metric: db-bytes-sum
name: etcd-db-bytes-sum
spec:
resourceType: etcd
priority: 204
title: etcd-db-bytes-sum
description: etcd db bytes sum
metricsSelector:
details: "false"
component: etcd
metric: db-bytes-sum
detailsMetricsSelector:
details: "true"
component: etcd
metric: db-bytes-sum
yAxis:
unit: byte
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-stream
spec:
resourceType: etcd
priority: 205
title: etcd-stream
description: Etcd lease/watch stream
metricsSelector:
details: "false"
component: etcd
graph: etcd-stream
detailsMetricsSelector:
details: "true"
component: etcd
graph: etcd-stream
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-peer-traffic
spec:
resourceType: etcd
priority: 206
title: etcd-peer-traffic
description: Etcd peer traffic in/out
metricsSelector:
details: "false"
component: etcd
graph: etcd-peer-traffic
detailsMetricsSelector:
details: "true"
component: etcd
graph: etcd-peer-traffic
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-raft-proposals
spec:
resourceType: etcd
priority: 207
title: etcd-raft-proposals
description: Etcd raft proposals
metricsSelector:
details: "false"
component: etcd
graph: proposal
detailsMetricsSelector:
details: "true"
component: etcd
graph: proposal
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-rpc-rate
spec:
resourceType: etcd
priority: 208
title: etcd-rpc-rate
description: Etcd rpc-rate
metricsSelector:
details: "false"
component: etcd
graph: rpc-rate
detailsMetricsSelector:
details: "true"
component: etcd
graph: rpc-rate
yAxis:
unit: ops
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-disk-operate
spec:
resourceType: etcd
priority: 209
title: etcd-disk-operate
description: Etcd disk operate
metricsSelector:
details: "false"
component: etcd
graph: disk-operate
detailsMetricsSelector:
details: "true"
component: etcd
graph: disk-operate
yAxis:
unit: seconds
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-sync-duration
spec:
resourceType: etcd
priority: 209
title: etcd-sync-duration
description: Etcd sync-duration
metricsSelector:
details: "false"
component: etcd
graph: sync-duration
detailsMetricsSelector:
details: "true"
component: etcd
graph: sync-duration
yAxis:
unit: seconds
\ No newline at end of file
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: fluentd
cluster-graph: rancher-component
name: fluentd-input-record-number
spec:
resourceType: fluentd
displayResourceType: rancher-component
priority: 300
title: fluentd-input-record-number
metricsSelector:
details: "false"
component: fluentd
metric: input-record
detailsMetricsSelector:
details: "true"
component: fluentd
metric: input-record
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: fluentd
cluster-graph: rancher-component
name: fluentd-output-record-number
spec:
resourceType: fluentd
displayResourceType: rancher-component
priority: 301
title: fluentd-output-record-number
metricsSelector:
details: "false"
component: fluentd
metric: output-record
detailsMetricsSelector:
details: "true"
component: fluentd
metric: output-record
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: fluentd
cluster-graph: rancher-component
name: fluentd-output-errors
spec:
resourceType: fluentd
displayResourceType: rancher-component
priority: 301
title: fluentd-output-errors
metricsSelector:
details: "false"
component: fluentd
metric: output-errors
detailsMetricsSelector:
details: "true"
component: fluentd
metric: output-errors
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: fluentd
cluster-graph: rancher-component
name: fluentd-buffer-queue-length
spec:
resourceType: fluentd
displayResourceType: rancher-component
priority: 301
title: fluentd-buffer-queue-length
metricsSelector:
details: "false"
component: fluentd
metric: buffer-queue-length
detailsMetricsSelector:
details: "true"
component: fluentd
metric: buffer-queue-length
yAxis:
unit: number
\ No newline at end of file
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: ingresscontroller
cluster-graph: kube-component
name: ingresscontroller-nginx-connection
spec:
resourceType: ingresscontroller
displayResourceType: kube-component
priority: 330
title: ingresscontroller-nginx-connection
metricsSelector:
details: "false"
component: ingresscontroller
graph: nginx-connection
detailsMetricsSelector:
details: "true"
component: ingresscontroller
graph: nginx-connection
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: ingresscontroller
cluster-graph: kube-component
name: ingresscontroller-request-process-time
spec:
resourceType: ingresscontroller
displayResourceType: kube-component
priority: 331
title: ingresscontroller-request-process-time
metricsSelector:
details: "false"
component: ingresscontroller
metric: request-process-seconds
detailsMetricsSelector:
details: "true"
component: ingresscontroller
metric: request-process-seconds
yAxis:
unit: seconds
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: ingresscontroller
cluster-graph: kube-component
name: ingresscontroller-upstream-response-seconds
spec:
resourceType: ingresscontroller
displayResourceType: kube-component
priority: 332
title: ingresscontroller-upstream-response-seconds
metricsSelector:
details: "false"
component: ingresscontroller
metric: upstream-response-seconds
detailsMetricsSelector:
details: "true"
component: ingresscontroller
metric: upstream-response-seconds
yAxis:
unit: seconds
graphType: singlestat
\ No newline at end of file
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-cpu-usage
spec:
resourceType: node
priority: 500
title: node-cpu-usage
metricsSelector:
details: "false"
component: node
metric: cpu-usage-seconds-sum-rate
detailsMetricsSelector:
details: "true"
component: node
metric: cpu-usage-seconds-sum-rate
yAxis:
unit: percent
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-cpu-load
spec:
resourceType: node
priority: 501
title: node-cpu-load
metricsSelector:
details: "false"
component: node
graph: cpu-load
detailsMetricsSelector:
details: "true"
component: node
graph: cpu-load
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-memory-usage
spec:
resourceType: node
priority: 502
title: node-memory-usage
metricsSelector:
details: "false"
component: node
metric: memory-usage-percent
detailsMetricsSelector:
details: "true"
component: node
metric: memory-usage-percent
yAxis:
unit: percent
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-fs-usage-percent
spec:
resourceType: node
priority: 503
title: node-fs-usage-percent
thresholds: 10
metricsSelector:
details: "false"
component: node
metric: fs-usage-percent
detailsMetricsSelector:
details: "true"
component: node
metric: fs-usage-percent
yAxis:
unit: percent
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-disk-io
spec:
resourceType: node
priority: 504
title: node-disk-io
thresholds: 10
metricsSelector:
details: "false"
component: node
graph: disk-io
detailsMetricsSelector:
details: "true"
component: node
graph: disk-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-network-io
spec:
resourceType: node
priority: 505
title: node-network-io
thresholds: 10
metricsSelector:
details: "false"
component: node
graph: network-io
detailsMetricsSelector:
details: "true"
component: node
graph: network-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-network-packet
spec:
resourceType: node
priority: 506
title: node-network-packet
thresholds: 10
metricsSelector:
details: "false"
component: node
graph: network-packet
detailsMetricsSelector:
details: "true"
component: node
graph: network-packet
yAxis:
unit: pps
\ No newline at end of file
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: scheduler
cluster-graph: kube-component
name: scheduler-e-2-e-scheduling-latency-seconds-quantile
spec:
resourceType: scheduler
displayResourceType: kube-component
priority: 320
title: scheduler-e-2-e-scheduling-latency-seconds-quantile
thresholds: 10
metricsSelector:
details: "false"
component: scheduler
metric: e-2-e-scheduling-latency-seconds-quantile
detailsMetricsSelector:
details: "true"
component: scheduler
metric: e-2-e-scheduling-latency-seconds-quantile
yAxis:
unit: seconds
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: scheduler
cluster-graph: kube-component
name: scheduler-total-preemption-attempts
spec:
resourceType: scheduler
displayResourceType: kube-component
priority: 321
title: scheduler-total-preemption-attempts
thresholds: 10
metricsSelector:
details: "false"
component: scheduler
metric: total-preemption-attempts
detailsMetricsSelector:
details: "true"
component: scheduler
metric: total-preemption-attempts
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: scheduler
cluster-graph: kube-component
name: scheduler-pod-unscheduler
spec:
resourceType: scheduler
displayResourceType: kube-component
priority: 322
title: scheduler-pod-unscheduler
thresholds: 10
metricsSelector:
details: "false"
component: scheduler
metric: pod-unscheduler
detailsMetricsSelector:
details: "true"
component: scheduler
metric: pod-unscheduler
yAxis:
unit: number
\ No newline at end of file
apiVersion: v1
description: Creates Metrics CRD of Rancher monitoring graph
engine: gotpl
maintainers:
- name: aiwantaozi
email: michelia.feng@gmail.com
name: metric-expression-project
version: 0.0.1
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: container
name: container-cpu-usage
spec:
projectName: {{ .ProjectName }}
resourceType: container
priority: 800
title: container-cpu-usage
metricsSelector:
details: "false"
component: container
graph: container-cpu-usage
detailsMetricsSelector:
details: "true"
component: container
graph: container-cpu-usage-details
yAxis:
unit: mcpu
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: container
name: container-memory-usage-bytes-sum
spec:
projectName: {{ .ProjectName }}
resourceType: container
priority: 801
title: container-memory-usage-bytes-sum
metricsSelector:
details: "false"
component: container
metric: memory-usage-bytes-sum
detailsMetricsSelector:
details: "true"
component: container
metric: memory-usage-bytes-sum
yAxis:
unit: byte
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: container
name: container-network-io
spec:
projectName: {{ .ProjectName }}
resourceType: container
priority: 802
title: container-network-io
metricsSelector:
details: "false"
component: container
graph: network-io
detailsMetricsSelector:
details: "true"
component: container
graph: network-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: container
name: container-network-packet
spec:
projectName: {{ .ProjectName }}
resourceType: container
priority: 803
title: container-network-packet
metricsSelector:
details: "false"
component: container
graph: network-packet
detailsMetricsSelector:
details: "true"
component: container
graph: network-packet
yAxis:
unit: pps
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: container
name: container-disk-io
spec:
projectName: {{ .ProjectName }}
resourceType: container
priority: 804
title: container-disk-io
metricsSelector:
details: "false"
component: container
graph: disk-io
detailsMetricsSelector:
details: "true"
component: container
graph: disk-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: pod
name: pod-cpu-usage
spec:
projectName: {{ .ProjectName }}
resourceType: pod
priority: 700
title: pod-cpu-usage
metricsSelector:
details: "false"
component: pod
graph: container-cpu-usage
detailsMetailsMetricsSelector:
details: "true"
component: pod
graph: container-cpu-usage-details
yAxis:
unit: mcpu
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: pod
name: pod-memory-usage-bytes-sum
spec:
projectName: {{ .ProjectName }}
resourceType: pod
priority: 701
title: pod-memory-usage-bytes-sum
metricsSelector:
details: "false"
component: pod
metric: memory-usage-bytes-sum
detailsMetricsSelector:
details: "true"
component: pod
metric: memory-usage-bytes-sum
yAxis:
unit: byte
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: pod
name: pod-network-io
spec:
projectName: {{ .ProjectName }}
resourceType: pod
priority: 702
title: pod-network-io
metricsSelector:
details: "false"
component: pod
graph: network-io
detailsMetricsSelector:
details: "true"
component: pod
graph: network-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: pod
name: pod-network-packet
spec:
projectName: {{ .ProjectName }}
resourceType: pod
priority: 703
title: pod-network-packet
metricsSelector:
details: "false"
component: pod
graph: network-packet
detailsMetricsSelector:
details: "true"
component: pod
graph: network-packet
yAxis:
unit: pps
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: pod
name: pod-disk-io
spec:
projectName: {{ .ProjectName }}
resourceType: pod
priority: 704
title: pod-disk-io
metricsSelector:
details: "false"
component: pod
graph: disk-io
detailsMetricsSelector:
details: "true"
component: pod
graph: disk-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: workload
name: workload-cpu-usage
spec:
projectName: {{ .ProjectName }}
resourceType: workload
priority: 600
title: workload-cpu-usage
metricsSelector:
details: "false"
component: workload
graph: container-cpu-usage
detailsMetailsMetricsSelector:
details: "true"
component: pod
graph: container-cpu-usage-details
yAxis:
unit: mcpu
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: workload
name: workload-memory-usage-bytes-sum
spec:
projectName: {{ .ProjectName }}
resourceType: workload
priority: 601
title: workload-memory-usage-bytes-sum
metricsSelector:
details: "false"
component: workload
metric: memory-usage-bytes-sum
detailsMetricsSelector:
details: "true"
component: workload
metric: memory-usage-bytes-sum
yAxis:
unit: byte
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: workload
name: workload-network-io
spec:
projectName: {{ .ProjectName }}
resourceType: workload
priority: 602
title: workload-network-io
metricsSelector:
details: "false"
component: workload
graph: network-io
detailsMetricsSelector:
details: "true"
component: workload
graph: network-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: workload
name: workload-network-packet
spec:
projectName: {{ .ProjectName }}
resourceType: workload
priority: 603
title: workload-network-packet
metricsSelector:
details: "false"
component: workload
graph: network-packet
detailsMetricsSelector:
details: "true"
component: workload
graph: network-packet
yAxis:
unit: pps
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: workload
name: workload-disk-io
spec:
projectName: {{ .ProjectName }}
resourceType: workload
priority: 604
title: workload-disk-io
metricsSelector:
details: "false"
component: workload
graph: disk-io
detailsMetricsSelector:
details: "true"
component: workload
graph: disk-io
yAxis:
unit: kbps
---
apiVersion: v1
description: Creates Prometheus CRD instance for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: prometheus
version: 0.0.1
apiVersion: v1
kind: ConfigMap
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.hooks.fullname" . }}
data:
replace-config-by-auth.sh: |-
#!/bin/sh
srcpath="/template/nginx.conf"
dstpath="/host/nginx.conf"
if [ -f $srcpath ] && [ -d /host ]; then
token=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
sed "s/REPLACE_PARAM_AUTHORIZATION/Bearer ${token}/g" $srcpath > $dstpath
cat $dstpath
exit 0
fi
exit 1
apiVersion: v1
kind: Service
metadata:
name: expose-prometheus-metrics
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
spec:
type: ClusterIP
selector:
{{- if .Values.labels }}
{{ toYaml .Values.labels | indent 4 }}
{{- else }}
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
{{- end }}
ports:
- name: http
port: 9090
targetPort: web
\ No newline at end of file
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ template "app.nginx.fullname" . }}
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
component: nginx
data:
nginx.conf: |-
user nginx;
worker_processes auto;
error_log /dev/null warn;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
log_format main '[$time_local - $status] $remote_addr - $remote_user $request ($http_referer)';
server {
listen 80;
access_log off;
gzip on;
gzip_min_length 1k;
gzip_comp_level 2;
gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript image/jpeg image/gif image/png;
gzip_vary on;
gzip_disable "MSIE [1-6]\.";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization "REPLACE_PARAM_AUTHORIZATION";
proxy_pass_header Authorization;
location / {
proxy_pass http://prometheus-operated:9090/;
sub_filter_types text/html;
sub_filter_once off;
sub_filter 'var PATH_PREFIX = "";' 'var PATH_PREFIX = ".";';
}
}
}
apiVersion: {{ template "deployment_api_version" . }}
kind: Deployment
metadata:
name: {{ template "app.nginx.fullname" . }}
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
component: nginx
spec:
replicas: 1
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
component: nginx
template:
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
component: nginx
spec:
{{- if .Values.enabledRBAC }}
serviceAccountName: {{ .Values.serviceAccountName }}
{{- end }}
initContainers:
- name: nginx-init-auth-add
image: {{ .Values.image.inits.tools.repository }}:{{ .Values.image.inits.tools.tag }}
command:
- /usr/bin/replace-config-by-auth.sh
volumeMounts:
- name: prometheus-static-hooks
mountPath: /usr/bin/replace-config-by-auth.sh
subPath: replace-config-by-auth.sh
- name: prometheus-static-contents
mountPath: /host
- name: prometheus-nginx-template
mountPath: /template
containers:
- name: nginx
image: nginx:1.15.2
args:
- nginx
- -g
- daemon off;
- -c
- /nginx/nginx.conf
volumeMounts:
- mountPath: /nginx
name: prometheus-static-contents
ports:
- name: http
containerPort: 80
protocol: TCP
volumes:
- name: prometheus-static-hooks
configMap:
name: {{ template "app.hooks.fullname" . }}
defaultMode: 0777
- name: prometheus-static-contents
emptyDir: {}
- name: prometheus-nginx-template
configMap:
name: {{ template "app.nginx.fullname" . }}
defaultMode: 438
items:
- key: nginx.conf
mode: 438
path: nginx.conf
apiVersion: {{ template "operator_api_version" . }}
kind: Prometheus
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
{{- if .Values.labels }}
{{ toYaml .Values.labels | indent 4 }}
{{- end }}
name: {{ .Release.Name }}
spec:
{{- if .Values.listenLocal }}
listenLocal: true
{{- end }}
podMetadata:
labels:
{{- if .Values.labels }}
{{ toYaml .Values.labels | indent 6 }}
{{- else }}
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
{{- end }}
{{- if .Values.alertingEndpoints }}
alerting:
alertmanagers:
{{ toYaml .Values.alertingEndpoints | indent 6 }}
{{- else }}
alerting:
alertmanagers:
- namespace: {{ .Release.Namespace }}
name: alertmanager-operated
port: http
{{- end }}
baseImage: "{{ .Values.image.repository }}"
{{- if .Values.externalLabels }}
externalLabels:
{{ toYaml .Values.externalLabels | indent 4}}
{{- end }}
{{- if .Values.externalUrl }}
externalUrl: "{{ .Values.externalUrl }}"
{{- end }}
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 4 }}
{{- end }}
paused: {{ .Values.paused }}
replicas: {{ .Values.replicaCount }}
logLevel: {{ .Values.logLevel }}
resources:
{{ toYaml .Values.resources | indent 4 }}
retention: "{{ .Values.retention }}"
{{- if .Values.routePrefix }}
routePrefix: "{{ .Values.routePrefix }}"
{{- end }}
{{- if .Values.secrets }}
secrets:
{{ toYaml .Values.secrets | indent 4 }}
{{- end }}
{{- if .Values.enabledRBAC }}
serviceAccountName: {{ .Values.serviceAccountName }}
{{- end }}
{{- if .Values.serviceMonitorNamespaceSelector }}
serviceMonitorNamespaceSelector:
{{ toYaml .Values.serviceMonitorNamespaceSelector | indent 4 }}
{{- end }}
serviceMonitorSelector:
{{- if .Values.serviceMonitorsSelector }}
{{ toYaml .Values.serviceMonitorsSelector | indent 4 }}
{{- else }}
matchLabels:
source: rancher-monitoring
release: {{ .Release.Name }}
{{- end }}
{{- if .Values.remoteRead }}
remoteRead:
{{ toYaml .Values.remoteRead | indent 4 }}
{{- end }}
{{- if .Values.remoteWrite }}
remoteWrite:
{{ toYaml .Values.remoteWrite | indent 4 }}
{{- end }}
{{- if .Values.ruleNamespaceSelector }}
ruleNamespaceSelector:
{{ toYaml .Values.ruleNamespaceSelector | indent 4 }}
{{- end }}
ruleSelector:
{{- if .Values.rulesSelector }}
{{ toYaml .Values.rulesSelector | indent 4 }}
{{- else }}
matchLabels:
source: rancher-monitoring
release: {{ .Release.Name }}
{{- end }}
{{- if or .Values.storageSpec .Values.persistence.enabled }}
storage:
volumeClaimTemplate:
spec:
{{- if .Values.storageSpec }}
{{ toYaml .Values.storageSpec | indent 8 }}
{{- else }}
{{ if and .Values.persistence.storageClass (ne "default" .Values.persistence.storageClass) }}
storageClassName: {{ .Values.persistence.storageClass }}
{{ end }}
accessModes:
- {{ default "ReadWriteOnce" .Values.persistence.accessMode }}
resources:
requests:
storage: {{ .Values.persistence.size | quote }}
{{- end }}
{{- end }}
version: "{{ .Values.image.tag }}"
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
topologyKey: kubernetes.io/hostname
labelSelector:
matchLabels:
app: {{ template "app.name" . }}
prometheus: {{ .Release.Name }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 4 }}
{{- end }}
imagePullSecrets:
{{ toYaml .Values.image.pullSecrets | indent 4 }}
{{- if and .Values.additionalScrapeConfigsEnabled .Values.additionalScrapeConfigs }}
additionalScrapeConfigs:
name: {{ template "app.fullname" . }}-additional-scrape-configs
key: additional-scrape-configs.yaml
{{- end }}
{{- if and .Values.additionalAlertManagerConfigsEnabled .Values.additionalAlertManagerConfigs }}
additionalAlertManagerConfigs:
name: {{ template "app.fullname" . }}-additional-alertmanager-configs
key: additional-alertmanager-configs.yaml
{{- end }}
{{- if .Values.sidecarsSpec }}
containers:
{{ toYaml .Values.sidecarsSpec | indent 4 }}
{{- end }}
{{- if and .Values.additionalScrapeConfigsEnabled .Values.additionalScrapeConfigs }}
apiVersion: v1
kind: Secret
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}-additional-scrape-configs
data:
additional-scrape-configs.yaml: {{ toYaml .Values.additionalScrapeConfigs | b64enc | quote }}
{{- end }}
{{- if and .Values.additionalAlertManagerConfigsEnabled .Values.additionalAlertManagerConfigs }}
---
apiVersion: v1
kind: Secret
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}-additional-alertmanager-configs
data:
additional-alertmanager-configs.yaml: {{ toYaml .Values.additionalAlertManagerConfigs | b64enc | quote }}
{{- end }}
apiVersion: v1
kind: Service
metadata:
name: access-prometheus
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
kubernetes.io/cluster-service: "true"
spec:
type: ClusterIP
selector:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
component: nginx
ports:
- name: http
port: 80
targetPort: http
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: prometheus
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
endpoints:
- port: http
interval: 30s
enabledRBAC: true
## Already exist ServiceAccount
##
serviceAccountName: ""
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Alertmanagers to which alerts will be sent
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#alertmanagerendpoints
##
alertingEndpoints: []
# - name: ""
# namespace: ""
# port: http
# scheme: http
## External labels to add to any time series or alerts when communicating with external systems
##
externalLabels: {}
## External URL at which Prometheus will be reachable
##
externalUrl: ""
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
##Custom Labels to be added to Prometheus Rules CRDs
##
additionalRulesLabels: {}
## Prometheus container image
##
image:
## Reference to one or more secrets to be used when pulling images
##
pullSecrets: []
repository: quay.io/prometheus/prometheus
tag: v2.4.3
inits:
tools:
repository: maiwj/curl
tag: 7.56.1-r0
## Labels to be added to the Prometheus
##
# labels: {}
## Node labels for Prometheus pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}
## Tolerations for use with node taints
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: {}
# - key: "key"
# operator: "Equal"
# value: "value"
# effect: "NoSchedule"
## If true, the Operator won't process any Prometheus configuration changes
##
paused: false
## Number of Prometheus replicas desired
##
replicaCount: 1
## The remote_read spec configuration for Prometheus.
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#remotereadspec
remoteRead: {}
# remoteRead:
# - url: http://remote1/read
## The remote_write spec configuriation for Prometheus.
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#remotewritespec
remoteWrite: {}
# remoteWrite:
# - url: http://remote1/push
## Resource limits & requests
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
##
resources: {}
# requests:
# memory: 400Mi
## How long to retain metrics
##
retention: 24h
## Prefix used to register routes, overriding externalUrl route.
## Useful for proxies that rewrite URLs.
##
routePrefix: ""
## Namespaces to be selected for PrometheusRules discovery.
## If unspecified, only the same namespace as the Prometheus object is in is used.
ruleNamespaceSelector: {}
## Rules CRD selector
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/design.md
##
## 1. If `matchLabels` is used, `rules.additionalLabels` must contain all the labels from
## `matchLabels` in order to be be matched by Prometheus
## 2. If `matchExpressions` is used `rules.additionalLabels` must contain at least one label
## from `matchExpressions` in order to be matched by Prometheus
## Ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels
rulesSelector: {}
# rulesSelector: {
# matchExpressions: [{key: prometheus, operator: In, values: [example-rules, example-rules-2]}]
# }
### OR
# rulesSelector: {
# matchLabels: {role: example-rules}
# }
## List of Secrets in the same namespace as the Prometheus
## object, which shall be mounted into the Prometheus Pods.
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec
##
secrets: []
serviceMonitorNamespaceSelector: {}
## Service monitors selector
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/design.md
##
serviceMonitorsSelector: {}
logLevel: "info"
## Prometheus StorageSpec for persistent data
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
##
storageSpec: {}
# storageClassName: gluster
# accessModes: ["ReadWriteOnce"]
# resources:
# requests:
# storage: 50Gi
# selector: {}
## Easy way to create persistent data
##
persistence: {}
# enabled: true
# storageClass: gluster
# accessMode: "ReadWriteOnce"
# size: 50Gi
## Prometheus AdditionalScrapeConfigs
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec
##
additionalScrapeConfigsEnabled: false
additionalScrapeConfigs: []
# - job_name: "prometheus"
# static_configs:
# - targets:
# - "localhost:9090"
## Prometheus AdditionalAlertManagerConfigs
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec
##
additionalAlertManagerConfigsEnabled: false
additionalAlertManagerConfigs: {}
# static_configs:
# - targets:
# - "localhost:9093"
listenLocal: false
sidecarsSpec: []
# - name: sidecar
# image: registry/name:tag
categories:
- Monitoring
questions:
dependencies:
- name: alertmanager
version: 0.0.1
condition: alertmanager.enabled
repository: "file://./charts/alertmanager/"
- name: exporter-coredns
version: 0.0.1
condition: exporter-coredns.enabled
repository: "file://./charts/exporter-coredns/"
- name: exporter-kube-controller-manager
version: 0.0.1
condition: exporter-kube-controller-manager.enabled
repository: "file://./charts/exporter-kube-controller-manager/"
- name: exporter-kube-dns
version: 0.0.1
condition: exporter-kube-dns.enabled
repository: "file://./charts/exporter-kube-dns/"
- name: exporter-kube-etcd
version: 0.0.1
condition: exporter-kube-etcd.enabled
repository: "file://./charts/exporter-kube-etcd/"
- name: exporter-kube-scheduler
version: 0.0.1
condition: exporter-kube-scheduler.enabled
repository: "file://./charts/exporter-kube-scheduler/"
- name: exporter-kube-state
version: 0.0.1
condition: exporter-kube-state.enabled
repository: "file://./charts/exporter-kube-state/"
- name: exporter-kubelets
version: 0.0.1
condition: exporter-kubelets.enabled
repository: "file://./charts/exporter-kubelets/"
- name: exporter-kubernetes
version: 0.0.1
condition: exporter-kubernetes.enabled
repository: "file://./charts/exporter-kubernetes/"
- name: exporter-node
version: 0.0.1
condition: exporter-node.enabled
repository: "file://./charts/exporter-node/"
- name: grafana
version: 0.0.1
condition: grafana.enabled
repository: "file://./charts/grafana/"
- name: prometheus
version: 0.0.1
condition: prometheus.enabled
repository: "file://./charts/prometheus/"
- name: metric-expression-project
version: 0.0.1
condition: metric-expression-project.enabled
repository: "file://./charts/metric-expression-project/"
- name: metric-expression-cluster
version: 0.0.1
condition: metric-expression-cluster.enabled
repository: "file://./charts/metric-expression-cluster/"
- name: exporter-fluentd
version: 0.0.1
condition: exporter-fluentd.enabled
repository: "file://./charts/exporter-fluentd/"
\ No newline at end of file
{{/* vim: set filetype=mustache: */}}
{{- define "charts.exporter-kubelets.fullname" -}}
{{- printf "exporter-kubelets-%s" .Release.Name -}}
{{- end -}}
{{- define "charts.prometheus.serviceaccount.fullname" -}}
{{- printf "prometheus-%s" .Release.Name -}}
{{- end -}}
{{- define "app.name" -}}
{{- default .Chart.Name .Values.nameOverride -}}
{{- end -}}
{{- define "app.version" -}}
{{- $name := include "app.name" . -}}
{{- $version := .Chart.Version | replace "+" "_" -}}
{{- printf "%s-%s" $name $version -}}
{{- end -}}
{{- define "app.fullname" -}}
{{- $name := include "app.name" . -}}
{{- printf "%s-%s" $name .Release.Name -}}
{{- end -}}
{{- define "app.dnsname" -}}
{{- include "app.fullname" . | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- define "app.psp.fullname" -}}
{{- $name := include "app.name" . -}}
{{- printf "%s-%s-psp" $name .Release.Name -}}
{{- end -}}
{{- define "app.nginx.fullname" -}}
{{- $name := include "app.name" . -}}
{{- printf "%s-%s-nginx" $name .Release.Name -}}
{{- end -}}
{{- define "app.dashboards.fullname" -}}
{{- $name := include "app.name" . -}}
{{- printf "%s-%s-dashboards" $name .Release.Name -}}
{{- end -}}
{{- define "app.hooks.fullname" -}}
{{- $name := include "app.name" . -}}
{{- printf "%s-%s-hooks" $name .Release.Name -}}
{{- end -}}
{{- define "app.cleanup.fullname" -}}
{{- $name := include "app.name" . -}}
{{- printf "%s-%s-cleanup" $name .Release.Name -}}
{{- end -}}
{{- define "kube_version" -}}
{{- printf "%s.%s" .Capabilities.KubeVersion.Major .Capabilities.KubeVersion.Minor -}}
{{- end -}}
{{- define "operator_api_version" -}}
{{- default "monitoring.coreos.com/v1" (.Values.apiGroup | printf "%s/v1") -}}
{{- end -}}
{{- define "operator_api_group" -}}
{{- $apiVersion := include "operator_api_version" . -}}
{{- index (regexSplit "/" $apiVersion 2) 0 | printf "%s" -}}
{{- end -}}
{{- define "deployment_api_version" -}}
{{- if .Capabilities.APIVersions.Has "apps/v1" -}}
{{- "apps/v1" -}}
{{- else if .Capabilities.APIVersions.Has "apps/v1beta2" -}}
{{- "apps/v1beta1" -}}
{{- else if .Capabilities.APIVersions.Has "apps/v1beta1" -}}
{{- "apps/v1beta1" -}}
{{- else -}}
{{- "extensions/v1beta1" -}}
{{- end -}}
{{- end -}}
{{- define "statefulset_api_version" -}}
{{- if .Capabilities.APIVersions.Has "apps/v1" -}}
{{- "apps/v1" -}}
{{- else if .Capabilities.APIVersions.Has "apps/v1beta2" -}}
{{- "apps/v1beta2" -}}
{{- else -}}
{{- "apps/v1beta1" -}}
{{- end -}}
{{- end -}}
{{- define "daemonset_api_version" -}}
{{- if .Capabilities.APIVersions.Has "apps/v1" -}}
{{- "apps/v1" -}}
{{- else if .Capabilities.APIVersions.Has "apps/v1beta2" -}}
{{- "apps/v1beta2" -}}
{{- else -}}
{{- "extensions/v1beta1" -}}
{{- end -}}
{{- end -}}
{{- define "rbac_api_version" -}}
{{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1" -}}
{{- "rbac.authorization.k8s.io/v1" -}}
{{- else if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1beta1" -}}
{{- "rbac.authorization.k8s.io/v1beta1" -}}
{{- else -}}
{{- "rbac.authorization.k8s.io/v1alpha1" -}}
{{- end -}}
{{- end -}}
{{- if .Values.enabled }}
apiVersion: {{ template "deployment_api_version" . }}
kind: Deployment
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
spec:
replicas: 1
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
spec:
containers:
- name: prometheus-operator
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
args:
- --kubelet-service={{ .Release.Namespace }}/expose-kubelets-metrics
- --log-format={{ .Values.logFormat }}
- --log-level={{ .Values.logLevel }}
- --prometheus-config-reloader={{ .Values.image.prometheusConfigReloader.repository }}:{{ .Values.image.prometheusConfigReloader.tag }}
- --config-reloader-image={{ .Values.image.configmapReload.repository }}:{{ .Values.image.configmapReload.tag }}
- --labels=monitoring.cattle.io=true
- --crd-apigroup={{ template "operator_api_group" . }}
- --manage-crds={{ .Values.manageCRDs }}
- --with-validation={{ .Values.withValidation }}
- --disable-auto-user-group={{ .Values.disableAutoUserGroup }}
ports:
- containerPort: 8080
name: http
resources:
{{ toYaml .Values.resources | indent 12 }}
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 8 }}
{{- end }}
{{- if .Values.enabledRBAC }}
serviceAccountName: {{ .Values.serviceAccountName }}
{{- end }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.enabled }}
apiVersion: v1
kind: Service
metadata:
name: expose-operator-metrics
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
spec:
type: ClusterIP
selector:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
ports:
- name: http
port: 8080
targetPort: http
{{- end }}
\ No newline at end of file
{{- if .Values.enabled }}
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: prometheus-operator
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
endpoints:
- port: http
interval: 30s
honorLabels: true
{{- end }}
enabled: false
nameOverride: "prometheus-operator"
enabledRBAC: true
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Prometheus-operator image
##
image:
# Reference to one or more secrets to be used when pulling images
pullSecrets: []
repository: quay.io/coreos/prometheus-operator
tag: v0.23.2
## Prometheus-config-reloader image to use for config and rule reloading
##
prometheusConfigReloader:
repository: quay.io/coreos/prometheus-config-reloader
tag: v0.23.2
## Configmap-reload image to use for reloading configmaps
##
configmapReload:
repository: quay.io/coreos/configmap-reload
tag: v0.0.1
## Node labels for prometheus-operator pod assignment
##
nodeSelector:
beta.kubernetes.io/os: linux
## Tolerations for use with node taints
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: {}
logFormat: "logfmt"
logLevel: "info"
manageCRDs: false
withValidation: true
disableAutoUserGroup: false
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
## Prometheus-operator resource limits & requests
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
##
resources: {}
# limits:
# cpu: 200m
# memory: 100Mi
# requests:
# cpu: 100m
# memory: 50Mi
## Already exist ServiceAccount
##
serviceAccountName: ""
alertmanager:
enabled: false
apiGroup: "monitoring.coreos.com"
image:
repository: quay.io/prometheus/alertmanager
tag: v0.15.2
nodeSelector:
beta.kubernetes.io/os: linux
config:
global:
resolve_timeout: 5m
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'null'
routes:
- match:
alertname: DeadMansSwitch
receiver: 'null'
receivers:
- name: 'null'
persistence:
enabled: false
storageClass: ""
accessMode: "ReadWriteOnce"
size: 50Gi
## Already exist ServiceAccount
##
serviceAccountName: ""
exporter-coredns:
enabled: false
apiGroup: "monitoring.coreos.com"
endpoints: []
ports:
metrics:
port: 9153
exporter-kube-controller-manager:
enabled: false
apiGroup: "monitoring.coreos.com"
endpoints: []
ports:
metrics:
port: 10252
exporter-kube-dns:
enabled: false
apiGroup: "monitoring.coreos.com"
endpoints: []
ports:
metrics:
dnsmasq:
port: 10054
skydns:
port: 10055
exporter-kube-etcd:
enabled: false
apiGroup: "monitoring.coreos.com"
endpoints: []
ports:
metrics:
scheme: "https"
port: 4001
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
certFile: ""
keyFile: ""
exporter-kube-scheduler:
enabled: false
apiGroup: "monitoring.coreos.com"
endpoints: []
ports:
metrics:
port: 10251
exporter-kube-state:
enabled: false
apiGroup: "monitoring.coreos.com"
ports:
metrics:
port: 8080
image:
repository: quay.io/coreos/kube-state-metrics
tag: v1.4.0
nodeSelector:
beta.kubernetes.io/os: linux
## Already exist ServiceAccount
##
serviceAccountName: ""
exporter-kubelets:
enabled: false
apiGroup: "monitoring.coreos.com"
exporter-kubernetes:
enabled: false
apiGroup: "monitoring.coreos.com"
exporter-node:
enabled: false
apiGroup: "monitoring.coreos.com"
image:
repository: quay.io/prometheus/node-exporter
tag: v0.16.0
nodeSelector:
beta.kubernetes.io/os: linux
ports:
metrics:
port: 9100
## Already exist ServiceAccount
##
serviceAccountName: ""
grafana:
enabled: false
level: cluster
apiGroup: "monitoring.coreos.com"
image:
repository: grafana/grafana
tag: 5.3.0
nodeSelector:
beta.kubernetes.io/os: linux
persistence:
enabled: false
storageClass: ""
accessMode: "ReadWriteOnce"
size: 50Gi
adminUser: "admin"
adminPassword: "admin"
## Already exist ServiceAccount
##
serviceAccountName: ""
prometheusDatasourceURL: "http://prometheus-operated:9090"
prometheus:
enabled: false
apiGroup: "monitoring.coreos.com"
image:
## Reference to one or more secrets to be used when pulling images
##
pullSecrets: []
repository: quay.io/prometheus/prometheus
tag: v2.4.3
inits:
tools:
repository: maiwj/curl
tag: 7.56.1-r0
nodeSelector:
beta.kubernetes.io/os: linux
persistence:
enabled: false
storageClass: ""
accessMode: "ReadWriteOnce"
size: 50Gi
alertingEndpoints: []
secrets: []
## Already exist ServiceAccount
##
serviceAccountName: ""
sidecarsSpec:
- args:
- --log.debug
- agent
- start
- --agent.proxy-url
- http://localhost:9090
- --listen.address
- $(POD_IP):9090
env:
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
image: maiwj/prometheus-auth:0.1.0
livenessProbe:
failureThreshold: 6
httpGet:
path: /-/healthy
port: web
scheme: HTTP
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 3
name: prometheus-agent
ports:
- containerPort: 9090
name: web
protocol: TCP
readinessProbe:
failureThreshold: 120
httpGet:
path: /-/ready
port: web
scheme: HTTP
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 3
listenLocal: true
serviceMonitorsSelector:
matchExpressions:
additionalScrapeConfigsEnabled: true
additionalScrapeConfigs:
- job_name: 'ingress-nginx-endpoints'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- ingress-nginx
- kube-system
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- source_labels: [__meta_kubernetes_service_name]
regex: prometheus-operated
action: drop
metric-expression-project:
enabled: false
metric-expression-cluster:
enabled: false
exporter-fluentd:
enabled: false
apiGroup: "monitoring.coreos.com"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment