Commit fe313655 by frank

Add Rancher-Monitoring Chart

(+) Only use for Rancher 2.0 Monitoring and Alerting (+) Support Grafana to proxy with authorization bearer token to Prometheus-Auth agent (+) Support Prometheus web to proxy with authorization bearer token to Prometheus-Auth agent (+) Rich metrics for Kubernetes and Rancher Co-authored-by: 's avataraiwantaozi <michelia.feng@gmail.com> Co-authored-by: 's avatarorangedeng <jxfa0043379@hotmail.com>
parent 0d9b4023
system-library
system-charts
============
Rancher 2.0 system library charts.
......
# Ignore everything in this directory
*
# Except this file
!.gitignore
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*~
# Various IDEs
.project
.idea/
*.tmproj
apiVersion: v1
description: Provides monitoring for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: rancher-monitoring
sources:
- https://github.com/coreos/prometheus-operator
version: 0.0.1
appVersion: "0.23.2"
home: https://github.com/coreos/prometheus-operator
keywords:
- operator
- prometheus
icon: https://coreos.com/sites/default/files/inline-images/Overview-prometheus_0.png
# rancher-monitoring
Installs [prometheus-operator](https://github.com/coreos/prometheus-operator) to create/configure/manage Prometheus clusters atop Kubernetes.
> **Tip**: Only use for Rancher Monitoring!!!
## Introduction
This chart bootstraps a [prometheus-operator](https://github.com/coreos/prometheus-operator) deployment on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
### Security
Alertmanager, Node exporter, Kube-state exporter, Grafana and Prometheus in same [Namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) will use the same [ServiceAccount](https://kubernetes.io/docs/reference/access-authn-authz/service-accounts-admin/) as Prometheus, which named like `prometheus-{{ .Release.Name }}`. Operator uses another one.
## Prerequisites
- Rancher 2.1+
apiVersion: v1
description: Creates Alertmanager CRD instance for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: alertmanager
version: 0.0.1
apiVersion: {{ template "operator_api_version" . }}
kind: Alertmanager
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
{{- if .Values.labels }}
{{ toYaml .Values.labels | indent 4 }}
{{- end }}
name: {{ .Release.Name }}
spec:
podMetadata:
labels:
{{- if .Values.labels }}
{{ toYaml .Values.labels | indent 6 }}
{{- else }}
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
{{- end }}
baseImage: "{{ .Values.image.repository }}"
{{- if .Values.externalUrl }}
externalUrl: "{{ .Values.externalUrl }}"
{{- end }}
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 4 }}
{{- end }}
paused: {{ .Values.paused }}
replicas: {{ .Values.replicaCount }}
logLevel: {{ .Values.logLevel }}
resources:
{{ toYaml .Values.resources | indent 4 }}
retention: "{{ .Values.retention }}"
{{- if .Values.routePrefix }}
routePrefix: "{{ .Values.routePrefix }}"
{{- end }}
{{- if .Values.secrets }}
secrets:
{{ toYaml .Values.secrets | indent 4 }}
{{- end }}
{{- if .Values.enabledRBAC }}
serviceAccountName: {{ .Values.serviceAccountName }}
{{- end }}
{{- if or .Values.storageSpec .Values.persistence.enabled }}
storage:
volumeClaimTemplate:
spec:
{{- if .Values.storageSpec }}
{{ toYaml .Values.storageSpec | indent 8 }}
{{- else }}
{{ if and .Values.persistence.storageClass (ne "default" .Values.persistence.storageClass) }}
storageClassName: {{ .Values.persistence.storageClass }}
{{ end }}
accessModes:
- {{ default "ReadWriteOnce" .Values.persistence.accessMode }}
resources:
requests:
storage: {{ .Values.persistence.size | quote }}
{{- end }}
{{- end }}
version: "{{ .Values.image.tag }}"
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
topologyKey: kubernetes.io/hostname
labelSelector:
matchLabels:
app: {{ template "app.name" . }}
alertmanager: {{ .Release.Name }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 4 }}
{{- end }}
imagePullSecrets:
{{ toYaml .Values.image.pullSecrets | indent 4 }}
{{- if .Values.sidecarsSpec }}
containers:
{{ toYaml .Values.sidecarsSpec | indent 4 }}
{{- end }}
apiVersion: v1
kind: Service
metadata:
name: expose-alertmanager-metrics
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
spec:
type: ClusterIP
selector:
{{- if .Values.labels }}
{{ toYaml .Values.labels | indent 4 }}
{{- else }}
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
{{- end }}
ports:
- name: http
port: 9093
targetPort: web
\ No newline at end of file
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ template "app.nginx.fullname" . }}
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
component: nginx
data:
nginx.conf: |-
user nginx;
worker_processes auto;
error_log /dev/null warn;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
log_format main '[$time_local - $status] $remote_addr - $remote_user $request ($http_referer)';
server {
listen 80;
access_log off;
gzip on;
gzip_min_length 1k;
gzip_comp_level 2;
gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript image/jpeg image/gif image/png;
gzip_vary on;
gzip_disable "MSIE [1-6]\.";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
location / {
proxy_pass http://alertmanager-operated:9093/;
}
}
}
\ No newline at end of file
apiVersion: {{ template "deployment_api_version" . }}
kind: Deployment
metadata:
name: {{ template "app.nginx.fullname" . }}
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
component: nginx
spec:
replicas: 1
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
component: nginx
template:
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
component: nginx
spec:
containers:
- name: nginx
image: nginx:1.15.2
args:
- nginx
- -g
- daemon off;
- -c
- /nginx/nginx.conf
volumeMounts:
- mountPath: /nginx/
name: alertmanager-nginx
ports:
- name: http
containerPort: 80
protocol: TCP
volumes:
- name: alertmanager-nginx
configMap:
defaultMode: 438
items:
- key: nginx.conf
mode: 438
path: nginx.conf
name: {{ template "app.nginx.fullname" . }}
\ No newline at end of file
{{- if not .Values.configFromSecret }}
apiVersion: v1
kind: Secret
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
data:
alertmanager.yaml: {{ toYaml .Values.config | b64enc | quote }}
{{- range $key, $val := .Values.templates }}
{{ $key }}: {{ $val | b64enc | quote }}
{{- end }}
{{- end }}
apiVersion: v1
kind: Service
metadata:
name: access-alertmanager
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
kubernetes.io/cluster-service: "true"
spec:
type: ClusterIP
selector:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
component: nginx
ports:
- name: http
port: 80
targetPort: http
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: altermanager
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
endpoints:
- port: http
interval: 30s
enabledRBAC: true
## Already exist ServiceAccount
##
serviceAccountName: ""
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## The name of a secret in the same kubernetes namespace which contains the Alertmanager config
## If defined this will be used instead of the `config` block values.
## The name of the secret must be alertmanager-{{ .Release.Name }} and its data must contain, at least, a key called `alertmanager.yaml`
## that contains the configuration as value.
##
configFromSecret: ""
## Alertmanager configuration directives
## Ref: https://prometheus.io/docs/alerting/configuration/
##
config: {}
#
# An example config:
# global:
# resolve_timeout: 5m
# route:
# group_by: ['job']
# group_wait: 30s
# group_interval: 5m
# repeat_interval: 12h
# receiver: 'null'
# routes:
# - match:
# alertname: DeadMansSwitch
# receiver: 'null'
# receivers:
# - name: 'null'
## Alertmanager template files to include
#
templates: {}
#
# An example template:
# template_1.tmpl: |-
# {{ define "cluster" }}{{ .ExternalURL | reReplaceAll ".*alertmanager\\.(.*)" "$1" }}{{ end }}
#
# {{ define "slack.myorg.text" }}
# {{- $root := . -}}
# {{ range .Alerts }}
# *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
# *Cluster:* {{ template "cluster" $root }}
# *Description:* {{ .Annotations.description }}
# *Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:>
# *Runbook:* <{{ .Annotations.runbook }}|:spiral_note_pad:>
# *Details:*
# {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
# {{ end }}
## External URL at which Alertmanager will be reachable
##
externalUrl: ""
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
## Alertmanager container image
##
image:
repository: quay.io/prometheus/alertmanager
tag: v0.15.2
## Labels to be added to the Alertmanager
##
# labels: {}
## Node labels for Alertmanager pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}
## Tolerations for use with node taints
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: {}
# - key: "key"
# operator: "Equal"
# value: "value"
# effect: "NoSchedule"
## If true, the Operator won't process any Alertmanager configuration changes
##
paused: false
## Number of Alertmanager replicas desired
##
replicaCount: 1
## Resource limits & requests
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
##
resources: {}
# requests:
# memory: 400Mi
## How long to retain metrics
##
retention: 24h
## Prefix used to register routes, overriding externalUrl route.
## Useful for proxies that rewrite URLs.
##
routePrefix: ""
## List of Secrets in the same namespace as the Alertmanager
## object, which shall be mounted into the Alertmanager Pods.
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec
##
secrets: []
service:
## Maintains session affinity. Should be set to ClientIP for HA setup
## Only options are ClientIP and None. Do not leave blank.
sessionAffinity: None
## Annotations to be added to the Service
##
annotations: {}
## Cluster-internal IP address for Alertmanager Service
##
clusterIP: ""
## List of external IP addresses at which the Alertmanager Service will be available
##
externalIPs: []
## Labels to be added to the Service
##
labels: {}
## External IP address to assign to Alertmanager Service
## Only used if service.type is 'LoadBalancer' and supported by cloud provider
##
loadBalancerIP: ""
## List of client IPs allowed to access Alertmanager Service
## Only used if service.type is 'LoadBalancer' and supported by cloud provider
##
loadBalancerSourceRanges: []
## Port to expose on each node
## Only used if service.type is 'NodePort'
##
# nodePort: 30903
## Service type
##
type: ClusterIP
logLevel: "info"
## Alertmanager StorageSpec for persistent data
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
##
storageSpec: {}
# storageClassName: gluster
# accessModes: ["ReadWriteOnce"]
# resources:
# requests:
# storage: 50Gi
# selector: {}
## Easy way to create persistent data
##
persistence: {}
# enabled: true
# storageClass: gluster
# accessMode: "ReadWriteOnce"
# size: 50Gi
sidecarsSpec: []
# - name: sidecar
# image: registry/name:tag
apiVersion: v1
description: Creates ServiceMonitor CRD of coredns for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-coredns
version: 0.0.1
{{- if .Values.endpoints }}
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
k8s-app: coredns
name: {{ template "app.dnsname" . }}
spec:
type: ClusterIP
clusterIP: None
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
targetPort: {{ .Values.ports.metrics.port }}
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.dnsname" . }}
subsets:
- addresses:
{{- range .Values.endpoints }}
- ip: {{ . }}
{{- end }}
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
{{- end }}
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: coredns
selector:
matchLabels:
k8s-app: coredns
namespaceSelector:
any: true
matchNames:
- "kube-system"
- {{ .Release.Namespace | quote }}
endpoints:
- port: metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Custom endpoints
##
endpoints: []
ports:
metrics:
port: 9153
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates Fluentd Metrics Exporter instance for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: aiwantaozi
email: michelia.feng@gmail.com
name: exporter-fluentd
version: 0.0.1
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: fluentd
selector:
matchLabels:
k8s-app: fluentd
namespaceSelector:
matchNames:
- cattle-logging
endpoints:
- port: metrics
interval: 15s
honorLabels: true
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Skip verification until we have resolved why the certificate validation
## for the kubelet on API server nodes fail.
##
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates ServiceMonitor CRD of controller manager for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kube-controller-manager
version: 0.0.1
{{- if .Values.endpoints }}
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
k8s-app: kube-controller-manager
name: {{ template "app.dnsname" . }}
spec:
type: ClusterIP
clusterIP: None
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
targetPort: {{ .Values.ports.metrics.port }}
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.dnsname" . }}
subsets:
- addresses:
{{- range .Values.endpoints }}
- ip: {{ . }}
{{- end }}
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
{{- end }}
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: kube-controller-manager
selector:
matchLabels:
k8s-app: kube-controller-manager
namespaceSelector:
any: true
matchNames:
- "kube-system"
- {{ .Release.Namespace | quote }}
endpoints:
- port: metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
{{- if .Values.insecureSkipVerify }}
insecureSkipVerify: true
{{- end }}
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Skip verification until we have resolved why the certificate validation
## for the kubelet on API server nodes fail.
##
insecureSkipVerify: true
## Custom endpoints
##
endpoints: []
ports:
metrics:
port: 10252
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates ServiceMonitor CRD of kube-dns for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kube-dns
version: 0.0.1
{{- if .Values.endpoints }}
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
k8s-app: coredns
name: {{ template "app.dnsname" . }}
spec:
type: ClusterIP
clusterIP: None
ports:
- name: dnsmasq-metrics
port: {{ .Values.ports.metrics.dnsmasq.port }}
protocol: TCP
targetPort: {{ .Values.ports.metrics.dnsmasq.port }}
- name: skydns-metrics
port: {{ .Values.ports.metrics.skydns.port }}
protocol: TCP
targetPort: {{ .Values.ports.metrics.skydns.port }}
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.dnsname" . }}
subsets:
- addresses:
{{- range .Values.endpoints }}
- ip: {{ . }}
{{- end }}
ports:
- name: dnsmasq-metrics
port: {{ .Values.ports.metrics.dnsmasq.port }}
protocol: TCP
- name: skydns-metrics
port: {{ .Values.ports.metrics.skydns.port }}
protocol: TCP
{{- end }}
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: kube-dns
selector:
matchLabels:
k8s-app: coredns
namespaceSelector:
any: true
matchNames:
- "kube-system"
- {{ .Release.Namespace | quote }}
endpoints:
- port: dnsmasq-metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- port: skydns-metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Custom endpoints
##
endpoints: []
ports:
metrics:
dnsmasq:
port: 10054
skydns:
port: 10055
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates ServiceMonitor CRD of etcd for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kube-etcd
version: 0.0.1
{{- if .Values.endpoints }}
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
k8s-app: etcd-server
name: {{ template "app.dnsname" . }}
spec:
type: ClusterIP
clusterIP: None
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
targetPort: {{ .Values.ports.metrics.port }}
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.dnsname" . }}
subsets:
- addresses:
{{- range .Values.endpoints }}
- ip: {{ . }}
{{- end }}
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
{{- end }}
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: kube-etcd
selector:
matchLabels:
k8s-app: etcd-server
namespaceSelector:
any: true
matchNames:
- "kube-system"
- {{ .Release.Namespace | quote }}
endpoints:
- port: metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
{{- if eq .Values.ports.metrics.scheme "https" }}
scheme: https
tlsConfig:
caFile: {{ .Values.caFile }}
{{- if .Values.certFile }}
certFile: {{ .Values.certFile }}
{{- end }}
{{- if .Values.keyFile }}
keyFile: {{ .Values.keyFile }}
{{- end}}
{{- if .Values.insecureSkipVerify }}
insecureSkipVerify: true
{{- end }}
{{- end }}
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Skip verification until we have resolved why the certificate validation
## for the kubelet on API server nodes fail.
##
insecureSkipVerify: true
## TLS Cofiguration for the service monitor, default to none, but append cert and keyfile if passed
##
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
certFile: ""
keyFile: ""
## Custom endpoints
##
endpoints: []
ports:
metrics:
scheme: "https"
port: 4001
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates ServiceMonitor CRD of scheduler for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kube-scheduler
version: 0.0.1
{{- if .Values.endpoints }}
apiVersion: v1
kind: Service
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
k8s-app: kube-scheduler
name: {{ template "app.dnsname" . }}
spec:
type: ClusterIP
clusterIP: None
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
targetPort: {{ .Values.ports.metrics.port }}
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.dnsname" . }}
subsets:
- addresses:
{{- range .Values.endpoints }}
- ip: {{ . }}
{{- end }}
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
protocol: TCP
{{- end }}
\ No newline at end of file
apiVersion: {{ template "operator_api_version" . }}
kind: PrometheusRule
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.prometheusRule.labels }}
{{ toYaml .Values.prometheusRule.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
groups:
- name: kube-scheduler.rules
rules:
- record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
expr: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.99"
- record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
expr: histogram_quantile(0.9, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.9"
- record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
expr: histogram_quantile(0.5, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.5"
- record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
expr: histogram_quantile(0.99, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.99"
- record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
expr: histogram_quantile(0.9, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.9"
- record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
expr: histogram_quantile(0.5, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.5"
- record: cluster:scheduler_binding_latency_seconds:quantile
expr: histogram_quantile(0.99, sum(scheduler_binding_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.99"
- record: cluster:scheduler_binding_latency_seconds:quantile
expr: histogram_quantile(0.9, sum(scheduler_binding_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.9"
- record: cluster:scheduler_binding_latency_seconds:quantile
expr: histogram_quantile(0.5, sum(scheduler_binding_latency_microseconds_bucket)
BY (le, cluster)) / 1e+06
labels:
quantile: "0.5"
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: kube-scheduler
selector:
matchLabels:
k8s-app: kube-scheduler
namespaceSelector:
any: true
matchNames:
- "kube-system"
- {{ .Release.Namespace | quote }}
endpoints:
- port: metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Custom endpoints
##
endpoints: []
ports:
metrics:
port: 10251
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
prometheusRule:
## Custom Labels to be added to PrometheusRule
##
labels: {}
apiVersion: v1
description: Creates Kube-state Exporter instance for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kube-state
version: 0.0.1
apiVersion: {{ template "deployment_api_version" . }}
kind: Deployment
metadata:
name: {{ template "app.fullname" . }}
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
spec:
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 8 }}
{{- end }}
containers:
- name: kube-state
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
ports:
- name: http
containerPort: 8080
protocol: TCP
livenessProbe:
httpGet:
path: /
port: 8080
initialDelaySeconds: 30
timeoutSeconds: 30
readinessProbe:
httpGet:
path: /
port: 8080
initialDelaySeconds: 30
timeoutSeconds: 5
resources:
limits:
cpu: 100m
memory: 200Mi
requests:
cpu: 100m
memory: 130Mi
{{- if .Values.enabledRBAC }}
serviceAccountName: {{ .Values.serviceAccountName }}
{{- end }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 8 }}
{{- end }}
apiVersion: v1
kind: Service
metadata:
name: expose-kubernetes-metrics
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
spec:
type: ClusterIP
selector:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
targetPort: 8080
protocol: TCP
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: kube-state
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
endpoints:
- port: metrics
interval: 15s
honorLabels: true
enabledRBAC: true
## Already exist ServiceAccount
##
serviceAccountName: ""
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Expertor listens on where and exports on host
##
ports:
metrics:
port: 8080
# Default values for kube-state-metrics.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
image:
repository: quay.io/coreos/kube-state-metrics
tag: v1.4.0
## Node Selector to constrain pods to run on particular nodes
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
##
nodeSelector: {}
## Tolerations for use with node taints
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: {}
# - key: "key"
# operator: "Equal"
# value: "value"
# effect: "NoSchedule"
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates ServiceMonitor CRD of kublets for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kubelets
version: 0.0.1
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "charts.exporter-kubelets.fullname" . }}
spec:
jobLabel: kubelet
selector:
matchLabels:
k8s-app: kubelet
namespaceSelector:
any: true
matchNames:
- "kube-system"
- {{ .Release.Namespace | quote }}
endpoints:
- port: https-metrics
scheme: https
interval: 15s
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
{{- if .Values.insecureSkipVerify }}
insecureSkipVerify: true
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- port: https-metrics
scheme: https
path: /metrics/cadvisor
interval: 30s
honorLabels: true
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
{{- if .Values.insecureSkipVerify }}
insecureSkipVerify: true
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- port: http-metrics
interval: 15s
- port: cadvisor
interval: 30s
honorLabels: true
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Skip verification until we have resolved why the certificate validation
## for the kubelet on API server nodes fail.
##
insecureSkipVerify: true
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
apiVersion: v1
description: Creates ServiceMonitor CRD of apiserver for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-kubernetes
version: 0.0.1
apiVersion: {{ template "operator_api_version" . }}
kind: PrometheusRule
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.prometheusRule.labels }}
{{ toYaml .Values.prometheusRule.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
groups:
- name: kubernetes.rules
rules:
- record: pod_name:container_memory_usage_bytes:sum
expr: sum(container_memory_usage_bytes{container_name!="POD",pod_name!=""}) BY
(pod_name)
- record: pod_name:container_spec_cpu_shares:sum
expr: sum(container_spec_cpu_shares{container_name!="POD",pod_name!=""}) BY (pod_name)
- record: pod_name:container_cpu_usage:sum
expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name!=""}[5m]))
BY (pod_name)
- record: pod_name:container_fs_usage_bytes:sum
expr: sum(container_fs_usage_bytes{container_name!="POD",pod_name!=""}) BY (pod_name)
- record: namespace:container_memory_usage_bytes:sum
expr: sum(container_memory_usage_bytes{container_name!=""}) BY (namespace)
- record: namespace:container_spec_cpu_shares:sum
expr: sum(container_spec_cpu_shares{container_name!=""}) BY (namespace)
- record: namespace:container_cpu_usage:sum
expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD"}[5m]))
BY (namespace)
- record: cluster:memory_usage:ratio
expr: sum(container_memory_usage_bytes{container_name!="POD",pod_name!=""}) BY
(cluster) / sum(machine_memory_bytes) BY (cluster)
- record: cluster:container_spec_cpu_shares:ratio
expr: sum(container_spec_cpu_shares{container_name!="POD",pod_name!=""}) / 1000
/ sum(machine_cpu_cores)
- record: cluster:container_cpu_usage:ratio
expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name!=""}[5m]))
/ sum(machine_cpu_cores)
- record: apiserver_latency_seconds:quantile
expr: histogram_quantile(0.99, rate(apiserver_request_latencies_bucket[5m])) /
1e+06
labels:
quantile: "0.99"
- record: apiserver_latency:quantile_seconds
expr: histogram_quantile(0.9, rate(apiserver_request_latencies_bucket[5m])) /
1e+06
labels:
quantile: "0.9"
- record: apiserver_latency_seconds:quantile
expr: histogram_quantile(0.5, rate(apiserver_request_latencies_bucket[5m])) /
1e+06
labels:
quantile: "0.5"
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: kubernetes
selector:
matchLabels:
component: apiserver
provider: kubernetes
namespaceSelector:
matchNames:
- "default"
endpoints:
- port: https
interval: 15s
scheme: https
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
{{- if .Values.insecureSkipVerify }}
insecureSkipVerify: true
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Skip verification until we have resolved why the certificate validation
## for the kubelet on API server nodes fail.
##
insecureSkipVerify: true
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
prometheusRule:
## Custom Labels to be added to PrometheusRule
##
labels: {}
\ No newline at end of file
apiVersion: v1
description: Creates Node Exporter instance for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: exporter-node
version: 0.0.1
\ No newline at end of file
apiVersion: {{ template "daemonset_api_version" . }}
kind: DaemonSet
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
spec:
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
spec:
tolerations:
- operator: "Exists"
- key: "node-role.kubernetes.io/master"
operator: "Exists"
- key: "node-role.kubernetes.io/etcd"
operator: "Exists"
- key: "node-role.kubernetes.io/controlplane"
operator: "Exists"
containers:
- name: exporter-node
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
args:
- --web.listen-address=0.0.0.0:{{ .Values.ports.metrics.port }}
{{- if and .Values.container .Values.container.args }}
{{ toYaml .Values.container.args | indent 10 }}
{{- end }}
ports:
- name: http
containerPort: {{ .Values.ports.metrics.port }}
hostPort: {{ .Values.ports.metrics.port }}
resources:
{{ toYaml .Values.resources | indent 12 }}
{{- if and .Values.container .Values.container.volumeMounts }}
volumeMounts:
{{ toYaml .Values.container.volumeMounts | indent 10 }}
{{- end }}
{{- if .Values.enabledRBAC }}
serviceAccountName: {{ .Values.serviceAccountName }}
{{- end }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 8 }}
{{- end }}
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 8 }}
{{- end }}
hostNetwork: true
hostPID: true
{{- if and .Values.container .Values.container.volumes }}
volumes:
{{ toYaml .Values.container.volumes | indent 6 }}
{{- end}}
apiVersion: {{ template "operator_api_version" . }}
kind: PrometheusRule
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.prometheusRule.labels }}
{{ toYaml .Values.prometheusRule.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
groups:
- name: node.rules
rules:
- record: instance:node_cpu:rate:sum
expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[3m]))
BY (instance)
- record: instance:node_filesystem_usage:sum
expr: sum((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"}))
BY (instance)
- record: instance:node_network_receive_bytes:rate:sum
expr: sum(rate(node_network_receive_bytes[3m])) BY (instance)
- record: instance:node_network_transmit_bytes:rate:sum
expr: sum(rate(node_network_transmit_bytes[3m])) BY (instance)
- record: instance:node_cpu:ratio
expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance)
GROUP_LEFT() count(sum(node_cpu) BY (instance, cpu)) BY (instance)
- record: cluster:node_cpu:sum_rate5m
expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m]))
- record: cluster:node_cpu:ratio
expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu))
apiVersion: v1
kind: Service
metadata:
name: expose-node-metrics
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
spec:
type: ClusterIP
selector:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
ports:
- name: metrics
port: {{ .Values.ports.metrics.port }}
targetPort: {{ .Values.ports.metrics.port }}
protocol: TCP
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: node
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
endpoints:
- port: metrics
interval: 15s
enabledRBAC: true
## Already exist ServiceAccount
##
serviceAccountName: ""
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Expertor listens on where and exports on host
##
ports:
metrics:
port: 9100
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
image:
repository: quay.io/prometheus/node-exporter
tag: v0.16.0
resources:
limits:
cpu: 200m
memory: 50Mi
requests:
cpu: 100m
memory: 30Mi
container:
args:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
volumes:
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
volumeMounts:
- name: proc
mountPath: /host/proc
readOnly: true
- name: sys
mountPath: /host/sys
readOnly: true
## Tolerations for use with node taints
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations:
- effect: NoSchedule
operator: Exists
## Node Selector to constrain pods to run on particular nodes
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
##
nodeSelector: {}
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
prometheusRule:
## Custom Labels to be added to PrometheusRule
##
labels: {}
apiVersion: v1
description: Creates Grafana instance for Kubernetes which maintaining by Rancher 2.
engine: gotpl
maintainers:
- name: thxCode
email: frank@rancher.com
name: grafana
version: 0.0.1
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"-","editable":true,"gnetId":5508,"graphTooltip":0,"id":null,"links":[],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":12,"x":0,"y":0},"id":6,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(instance) (rate(apiserver_request_count{code!~\"2..\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Error Rate","refId":"A","step":60},{"expr":"sum by(instance) (rate(apiserver_request_count[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Request Rate","refId":"B","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"API Server Request Rates","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":12,"x":12,"y":0},"id":7,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(verb) (rate(apiserver_latency_seconds:quantile[5m]) >= 0)","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"API Server Request Latency","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":9,"w":12,"x":0,"y":7},"id":11,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(nginx_requests_total[1m])","format":"time_series","intervalFactor":1,"legendFormat":"Request Rate","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Nginx Ingress Request Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":9,"w":12,"x":12,"y":7},"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"service_depth","format":"time_series","intervalFactor":1,"legendFormat":"Service Depth","refId":"A"},{"expr":"volumes_depth","format":"time_series","intervalFactor":1,"legendFormat":"Volumes Depth","refId":"B"},{"expr":"replicationmanager_depth","format":"time_series","intervalFactor":1,"legendFormat":"Replication Manager Depth","refId":"C"},{"expr":"statefulset_depth","format":"time_series","intervalFactor":1,"legendFormat":"StatefulSet Depth","refId":"D"},{"expr":"serviceaccount_depth","format":"time_series","intervalFactor":1,"legendFormat":"Service Account Depth","refId":"E"},{"expr":"endpoint_depth","format":"time_series","intervalFactor":1,"legendFormat":"Endpoint Depth","refId":"F"},{"expr":"deployment_depth","format":"time_series","intervalFactor":1,"legendFormat":"Deployment Depth","refId":"G"},{"expr":"daemonset_depth","format":"time_series","intervalFactor":1,"legendFormat":"DaemonSet Depth","refId":"H"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Controller Manager Queue Depth","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Kubernetes Components","uid":"Ld4acTYmz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"id":null,"links":[],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"This represents the total [CPU resource requests](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu) in the cluster.\nFor comparison the total [allocatable CPU cores](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) is also shown.","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":8,"w":18,"x":0,"y":0},"id":1,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"min(sum(kube_node_status_allocatable_cpu_cores) by (instance))","hide":false,"intervalFactor":2,"legendFormat":"Allocatable CPU Cores","refId":"A","step":20},{"expr":"max(sum(kube_pod_container_resource_requests_cpu_cores) by (instance))","hide":false,"intervalFactor":2,"legendFormat":"Requested CPU Cores","refId":"B","step":20}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Cores","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"CPU Cores","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":6,"x":18,"y":0},"hideTimeOverride":false,"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"max(sum(kube_pod_container_resource_requests_cpu_cores) by (instance)) / min(sum(kube_node_status_allocatable_cpu_cores) by (instance)) * 100","intervalFactor":2,"legendFormat":"","refId":"A","step":240}],"thresholds":"80, 90","title":"CPU Cores","transparent":false,"type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"This represents the total [memory resource requests](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory) in the cluster.\nFor comparison the total [allocatable memory](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) is also shown.","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":8,"w":18,"x":0,"y":8},"id":3,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"min(sum(kube_node_status_allocatable_memory_bytes) by (instance))","hide":false,"intervalFactor":2,"legendFormat":"Allocatable Memory","refId":"A","step":20},{"expr":"max(sum(kube_pod_container_resource_requests_memory_bytes) by (instance))","hide":false,"intervalFactor":2,"legendFormat":"Requested Memory","refId":"B","step":20}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Memory","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":6,"x":18,"y":8},"hideTimeOverride":false,"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"max(sum(kube_pod_container_resource_requests_memory_bytes) by (instance)) / min(sum(kube_node_status_allocatable_memory_bytes) by (instance)) * 100","intervalFactor":2,"legendFormat":"","refId":"A","step":240}],"thresholds":"80, 90","title":"Memory","transparent":false,"type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"now-3h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Kubernetes Resource Requests","uid":"0MdTILxik","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"A quick dashboard for displaying Fluentd metrics.","editable":true,"gnetId":3522,"graphTooltip":0,"id":null,"links":[],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"fluentd_buffer_queue_length","format":"time_series","intervalFactor":2,"metric":"fluentd_buffer_queue_length","refId":"A","step":2}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Fluentd buffer queue length","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"fluentd_buffer_total_queued_size","format":"time_series","intervalFactor":2,"metric":"fluentd_buffer_total_queued_size","refId":"A","step":2}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Fluentd buffer total queued size","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(245, 54, 54, 0.9)","rgba(45, 170, 3, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":12,"x":0,"y":14},"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"fluentd_up","intervalFactor":2,"refId":"A","step":40}],"thresholds":"0,1","title":"Fluentd Up","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":8,"w":12,"x":12,"y":14},"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"fluentd_retry_count{pluginCategory=\"output\",pluginId=\"apache_log\"}","intervalFactor":2,"metric":"fluentd_retry_count","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Fluentd retry count (apache)","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"2017-10-20T13:00:11.189Z","to":"2017-10-20T13:38:24.045Z"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Rancher Components","uid":"wDHD1TYmz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":1,"id":null,"iteration":1543396157762,"links":[],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"isNew":false,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})","interval":"10s","intervalFactor":1,"legendFormat":"Current: {{ container_name }}","metric":"container_memory_usage_bytes","refId":"A","step":15},{"expr":"kube_pod_container_resource_requests_memory_bytes{pod=\"$pod\", container=~\"$container\"}","interval":"10s","intervalFactor":2,"legendFormat":"Requested: {{ container }}","metric":"kube_pod_container_resource_requests_memory_bytes","refId":"B","step":20},{"expr":"kube_pod_container_resource_limits_memory_bytes{pod=\"$pod\", container=~\"$container\"}","interval":"10s","intervalFactor":2,"legendFormat":"Limit: {{ container }}","metric":"kube_pod_container_resource_limits_memory_bytes","refId":"C","step":20}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"isNew":false,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (container_name)(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))","intervalFactor":2,"legendFormat":"{{ container_name }}","refId":"A","step":30},{"expr":"kube_pod_container_resource_requests_cpu_cores{pod=\"$pod\", container=~\"$container\"}","interval":"10s","intervalFactor":2,"legendFormat":"Requested: {{ container }}","metric":"kube_pod_container_resource_requests_cpu_cores","refId":"B","step":20},{"expr":"kube_pod_container_resource_limits_cpu_cores{pod=\"$pod\", container=~\"$container\"}","interval":"10s","intervalFactor":2,"legendFormat":"Limit: {{ container }}","metric":"kube_pod_container_resource_limits_memory_bytes","refId":"C","step":20}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"isNew":false,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{pod_name=\"$pod\"}[1m])))","intervalFactor":2,"legendFormat":"{{ pod_name }}","refId":"A","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network I/O","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":true,"label":"Namespace","multi":false,"name":"namespace","options":[],"query":"label_values(kube_pod_info, namespace)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Pod","multi":false,"name":"pod","options":[],"query":"label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":true,"label":"Container","multi":false,"name":"container","options":[],"query":"label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Pods","uid":"XSOTSYxiz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false}
\ No newline at end of file
apiVersion: v1
kind: ConfigMap
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.dashboards.fullname" . }}
data:
{{- if eq .Values.level "cluster" }}
{{ (.Files.Glob "dashboards/c_*.json").AsConfig | indent 2 }}
{{- end }}
{{ (.Files.Glob "dashboards/w_*.json").AsConfig | indent 2 }}
prometheus-datasource.json: |+
{
"access": "proxy",
"basicAuth": false,
"editable": false,
"isDefault:": true,
"name": "Rancher-Monitoring",
"type": "prometheus",
"url": "{{ .Values.prometheusDatasourceURL }}"
}
apiVersion: {{ template "deployment_api_version" . }}
kind: Deployment
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
spec:
replicas: 1
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
spec:
initContainers:
- name: grafana-init-plugin-json-copy
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
volumeMounts:
- name: grafana-static-hooks
mountPath: /run.sh
subPath: copy-datasource-plugin-json.sh
- name: grafana-static-contents
mountPath: /host
- name: grafana-init-plugin-json-modify
image: {{ .Values.image.inits.tools.repository }}:{{ .Values.image.inits.tools.tag }}
command:
- /usr/bin/modify-datasource-plugin-json.sh
volumeMounts:
- name: grafana-static-hooks
mountPath: /usr/bin/modify-datasource-plugin-json.sh
subPath: modify-datasource-plugin-json.sh
- name: grafana-static-contents
mountPath: /host
containers:
- name: grafana
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
env:
- name: GF_AUTH_BASIC_ENABLED
value: "true"
- name: GF_AUTH_ANONYMOUS_ENABLED
value: "true"
- name: GF_SECURITY_ADMIN_USER
valueFrom:
secretKeyRef:
name: {{ template "app.fullname" . }}
key: user
- name: GF_SECURITY_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: {{ template "app.fullname" . }}
key: password
{{- if .Values.extraVars }}
{{ toYaml .Values.extraVars | indent 8 }}
{{- end }}
volumeMounts:
- name: grafana-storage
mountPath: /var/lib/grafana
- name: grafana-static-contents
mountPath: /usr/share/grafana/public/app/plugins/datasource/prometheus/plugin.json
subPath: grafana/plugin.json
{{- if .Values.mountGrafanaConfig }}
- name: grafana-config
mountPath: /etc/grafana
{{- end }}
ports:
- name: web
containerPort: 3000
protocol: TCP
readinessProbe:
httpGet:
path: /api/health
port: 3000
periodSeconds: 1
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 10
{{- if .Values.resources }}
resources:
{{ toYaml .Values.resources | indent 12 }}
{{- end }}
- name: grafana-watcher
image: {{ .Values.grafanaWatcher.repository }}:{{ .Values.grafanaWatcher.tag }}
args:
- '--grafana-url=http://127.0.0.1:3000'
- '--watch-dir=/var/grafana-dashboards'
{{- range .Values.dashboardConfigmaps }}
- '--watch-dir=/var/additional-dashboards/{{ . }}'
{{- end }}
env:
- name: GRAFANA_USER
valueFrom:
secretKeyRef:
name: {{ template "app.fullname" . }}
key: user
- name: GRAFANA_PASSWORD
valueFrom:
secretKeyRef:
name: {{ template "app.fullname" . }}
key: password
{{- if .Values.grafanaWatcher.resources }}
resources:
{{ toYaml .Values.grafanaWatcher.resources | indent 12 }}
{{- end }}
volumeMounts:
- name: grafana-dashboards
mountPath: /var/grafana-dashboards
{{- range .Values.dashboardConfigmaps }}
- name: {{ . }}
mountPath: /var/additional-dashboards/{{ . }}
{{- end }}
- name: grafana-proxy
image: {{ .Values.grafanaProxy.repository }}:{{ .Values.grafanaProxy.tag }}
args:
- nginx
- -g
- daemon off;
- -c
- /nginx/nginx.conf
ports:
- name: http
containerPort: 80
protocol: TCP
volumeMounts:
- mountPath: /nginx/
name: grafana-nginx
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 4 }}
{{- end }}
{{- if .Values.enabledRBAC }}
serviceAccountName: {{ .Values.serviceAccountName }}
{{- end }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 8 }}
{{- end }}
volumes:
- name: grafana-static-hooks
configMap:
name: {{ template "app.hooks.fullname" . }}
defaultMode: 0777
- name: grafana-static-contents
emptyDir: {}
- name: grafana-storage
{{- if or .Values.storageSpec .Values.persistence.enabled }}
persistentVolumeClaim:
claimName: {{ template "app.fullname" . }}
{{- else }}
emptyDir: {}
{{- end }}
- name: grafana-nginx
configMap:
defaultMode: 438
items:
- key: nginx.conf
mode: 438
path: nginx.conf
name: {{ template "app.nginx.fullname" . }}
- name: grafana-dashboards
configMap:
name: {{ template "app.dashboards.fullname" . }}
{{- range .Values.dashboardConfigmaps }}
- name: {{ . }}
configMap:
name: {{ . }}
{{- end }}
apiVersion: v1
kind: ConfigMap
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.hooks.fullname" . }}
data:
copy-datasource-plugin-json.sh: |-
#!/bin/bash
srcpath="/usr/share/grafana/public/app/plugins/datasource/prometheus/plugin.json"
dstpath="/host/grafana/raw-plugin.json"
if [[ -f $srcpath ]] && [[ -d /host ]]; then
mkdir -p /host/grafana
cp -f $srcpath $dstpath
cat $srcpath
exit 0
fi
exit 1
modify-datasource-plugin-json.sh: |-
#!/bin/sh
srcpath="/host/grafana/raw-plugin.json"
dstpath="/host/grafana/plugin.json"
if [ -f $srcpath ] && [ -d /host ]; then
mkdir -p /host/grafana
token=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
cat $srcpath | K8S_BEARERTOKEN="Bearer $token" jq 'to_entries | . + [{"key":"routes","value":[{"path":"api/v1","url":"{{ .Values.prometheusDatasourceURL }}/api/v1","headers":[{"name":"Authorization","content":env.K8S_BEARERTOKEN}]}]}] | from_entries' > $dstpath
cat $dstpath
exit 0
fi
exit 1
apiVersion: v1
kind: Service
metadata:
name: expose-grafana-metrics
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
spec:
type: ClusterIP
selector:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
ports:
- name: web
port: 3000
targetPort: web
\ No newline at end of file
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ template "app.nginx.fullname" . }}
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
component: nginx
data:
nginx.conf: |-
user nginx;
worker_processes auto;
error_log /dev/null warn;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
log_format main '[$time_local - $status] $remote_addr - $remote_user $request ($http_referer)';
server {
listen 80;
access_log off;
gzip on;
gzip_min_length 1k;
gzip_comp_level 2;
gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript image/jpeg image/gif image/png;
gzip_vary on;
gzip_disable "MSIE [1-6]\.";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
location /api/dashboards {
proxy_pass http://localhost:3000;
}
location /api/search {
proxy_pass http://localhost:3000;
sub_filter_types application/json;
sub_filter_once off;
sub_filter '"url":"/d' '"url":"d';
}
location / {
proxy_pass http://localhost:3000/;
sub_filter_types text/html;
sub_filter_once off;
sub_filter '"appSubUrl":""' '"appSubUrl":"."';
sub_filter '"url":"/' '"url":"./';
sub_filter ':"/avatar/' ':"avatar/';
}
}
}
{{- if or .Values.storageSpec .Values.persistence.enabled -}}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
spec:
{{- if .Values.storageSpec }}
{{ toYaml .Values.storageSpec | indent 2 }}
{{- else }}
accessModes:
- {{ default "ReadWriteOnce" .Values.persistence.accessMode }}
{{ if and .Values.persistence.storageClass (ne "default" .Values.persistence.storageClass) }}
storageClassName: {{ .Values.persistence.storageClass }}
{{ end }}
resources:
requests:
storage: {{ .Values.persistence.size | quote }}
{{- end }}
{{- end -}}
apiVersion: v1
kind: Secret
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
type: Opaque
data:
user: {{ .Values.adminUser | b64enc | quote }}
{{- if .Values.adminPassword }}
password: {{ .Values.adminPassword | b64enc | quote }}
{{- else }}
password: {{ randAlphaNum 10 | b64enc | quote }}
{{- end }}
apiVersion: v1
kind: Service
metadata:
name: access-grafana
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
kubernetes.io/cluster-service: "true"
spec:
type: ClusterIP
sessionAffinity: ClientIP
selector:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
ports:
- name: http
port: 80
targetPort: http
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: grafana
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
endpoints:
- port: web
interval: 30s
level: cluster
enabledRBAC: true
## Already exist ServiceAccount
##
serviceAccountName: ""
enabledPSP: true
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Node labels for Grafana pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}
## Tolerations for use with node taints
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: {}
# - key: "key"
# operator: "Equal"
# value: "value"
# effect: "NoSchedule"
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
## Pass extra environment variables to the Grafana container.
##
# extraVars:
# - name: EXTRA_VAR_1
# value: extra-var-value-1
# - name: EXTRA_VAR_2
# value: extra-var-value-2
extraVars:
adminUser: "admin"
adminPassword: "admin"
## Grafana Docker image
##
image:
repository: grafana/grafana
tag: 5.3.0
inits:
tools:
repository: maiwj/curl
tag: 7.56.1-r0
storageSpec: {}
# storageClassName: default
# accessModes:
# - ReadWriteOnce
# resources:
# requests:
# storage: 2Gi
# selector: {}
## Easy way to create persistent data
##
persistence: {}
# enabled: true
# storageClass: gluster
# accessMode: "ReadWriteOnce"
# size: 50Gi
## Resource limits & requests
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
resources: {}
# limits:
# memory: 200Mi
# cpu: 200m
# requests:
# memory: 100Mi
# cpu: 100m
## A list of additional configmaps that contain -dashboard.json and/or -datasource.json files
## that should be imported into grafana.
dashboardConfigmaps: []
prometheusDatasourceURL: ""
grafanaProxy:
repository: nginx
tag: 1.15.2
grafanaWatcher:
repository: quay.io/coreos/grafana-watcher
tag: v0.0.8
## Resource limits & requests
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
resources: {}
#requests:
# memory: "16Mi"
# cpu: "50m"
#limits:
# memory: "32Mi"
# cpu: "100m"
apiVersion: v1
description: Creates Metrics CRD of Rancher monitoring graph
engine: gotpl
maintainers:
- name: aiwantaozi
email: michelia.feng@gmail.com
name: metric-expression-cluster
version: 0.0.1
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-latency-milliseconds-avg
labels:
app: metric-expression
component: apiserver
details: "false"
level: cluster
metric: request-latency-milliseconds-avg
source: rancher-monitoring
spec:
expression: avg(apiserver_request_latencies_sum / apiserver_request_latencies_count)
by (instance) /1e+06
legendFormat: '[[instance]]'
description: apiserver request latency milliseconds avg
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-latency-milliseconds-avg-details
labels:
app: metric-expression
component: apiserver
details: "true"
level: cluster
metric: request-latency-milliseconds-avg
source: rancher-monitoring
spec:
expression: avg(apiserver_request_latencies_sum / apiserver_request_latencies_count)
by (instance, verb) /1e+06
legendFormat: '[[verb]]([[instance]])'
description: apiserver request latency milliseconds avg
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-count-sum-rate
labels:
app: metric-expression
component: apiserver
details: "false"
graph: request-count
level: cluster
metric: request-count-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(apiserver_request_count[5m])) by (instance)
legendFormat: '[[instance]]'
description: apiserver request count sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-count-sum-rate-details
labels:
app: metric-expression
component: apiserver
details: "true"
graph: request-count
level: cluster
metric: request-count-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(apiserver_request_count[5m])) by (instance,
code)
legendFormat: '[[code]]([[instance]])'
description: apiserver request count sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-error-count-sum-rate
labels:
app: metric-expression
component: apiserver
details: "false"
graph: request-count
level: cluster
metric: request-error-count-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(apiserver_request_count{instance=~"$instance", code!~"2.."}[5m]))
by (instance)
legendFormat: '[[instance]]'
description: apiserver request error count sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-error-count-sum-rate-details
labels:
app: metric-expression
component: apiserver
details: "true"
graph: request-count
level: cluster
metric: request-error-count-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(apiserver_request_count{instance=~"$instance", code!~"2.."}[5m]))
by (instance, code)
legendFormat: '[[code]]([[instance]])'
description: apiserver request error count sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-volumes-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: volumes-depth
source: rancher-monitoring
spec:
expression: sum(volumes_depth)
legendFormat: Volumes depth
description: controllermanager volumes depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-volumes-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: volumes-depth
source: rancher-monitoring
spec:
expression: sum(volumes_depth) by (instance)
legendFormat: Volumes depth([[instance]])
description: controllermanager volumes depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-deployment-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: deployment-depth
source: rancher-monitoring
spec:
expression: sum(deployment_depth)
legendFormat: Deployment depth
description: controllermanager deployment adds
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-deployment-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: deployment-depth
source: rancher-monitoring
spec:
expression: sum(deployment_depth) by (instance)
legendFormat: Deployment depth([[instance]])
description: controllermanager deployment adds
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-replicaset-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: replicaset-depth
source: rancher-monitoring
spec:
expression: sum(replicaset_depth)
legendFormat: Replicaset depth
description: controllermanager replicaset depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-replicaset-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: replicaset-depth
source: rancher-monitoring
spec:
expression: sum(replicaset_depth) by (instance)
legendFormat: Replicaset depth([[instance]])
description: controllermanager replicaset depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-service-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: service-depth
source: rancher-monitoring
spec:
expression: sum(service_depth)
legendFormat: Service depth
description: controllermanager service depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-service-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: service-depth
source: rancher-monitoring
spec:
expression: sum(service_depth) by (instance)
legendFormat: Service depth([[instance]])
description: controllermanager service depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-serviceaccount-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: serviceaccount-depth
source: rancher-monitoring
spec:
expression: sum(serviceaccount_depth)
legendFormat: Serviceaccount depth
description: controllermanager serviceaccount depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-serviceaccount-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: serviceaccount-depth
source: rancher-monitoring
spec:
expression: sum(serviceaccount_depth) by (instance)
legendFormat: Serviceaccount depth([[instance]])
description: controllermanager serviceaccount depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-endpoint-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: endpoint-depth
source: rancher-monitoring
spec:
expression: sum(endpoint_depth)
legendFormat: Endpoint depth
description: controllermanager endpoint depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-endpoint-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: endpoint-depth
source: rancher-monitoring
spec:
expression: sum(endpoint_depth) by (instance)
legendFormat: Endpoint depth([[instance]])
description: controllermanager endpoint depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-daemonset-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: daemonset-depth
source: rancher-monitoring
spec:
expression: sum(daemonset_depth)
legendFormat: Daemonset depth
description: controllermanager daemonset depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-daemonset-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: daemonset-depth
source: rancher-monitoring
spec:
expression: sum(daemonset_depth) by (instance)
legendFormat: Daemonset depth([[instance]])
description: controllermanager daemonset depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-deployment-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: deployment-depth
source: rancher-monitoring
spec:
expression: sum(deployment_depth)
legendFormat: Deployment depth
description: controllermanager deployment depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-deployment-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: deployment-depth
source: rancher-monitoring
spec:
expression: sum(deployment_depth) by (instance)
legendFormat: Deployment depth([[instance]])
description: controllermanager deployment depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-statefulset-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: statefulset-depth
source: rancher-monitoring
spec:
expression: sum(statefulset_depth)
legendFormat: Statefulset depth
description: controllermanager statefulset adds
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-statefulset-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: statefulset-depth
source: rancher-monitoring
spec:
expression: sum(statefulset_depth) by (instance)
legendFormat: Statefulset depth([[instance]])
description: controllermanager statefulset adds
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: input-record-number
labels:
app: metric-expression
component: fluentd
details: "false"
level: cluster
metric: input-record
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_input_status_num_records_total[5m]))
legendFormat: Input record number
description: Fluentd input status num records total
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: input-record-number-details
labels:
app: metric-expression
component: fluentd
details: "true"
level: cluster
metric: input-record
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_input_status_num_records_total[5m])) by (instance)
legendFormat: Input record number([[instance]])
description: Fluentd input status num records total
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: output-record-number
labels:
app: metric-expression
component: fluentd
details: "false"
level: cluster
metric: output-record
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_num_records_total[5m]))
legendFormat: Output record number
description: Fluentd output status num records total
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: output-record-number-details
labels:
app: metric-expression
component: fluentd
details: "true"
level: cluster
metric: output-record
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_num_records_total[5m])) by (instance)
legendFormat: Output record number([[instance]])
description: Fluentd output status num records total
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: output-errors
labels:
app: metric-expression
component: fluentd
details: "false"
level: cluster
metric: output-errors
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_num_errors[5m]))
legendFormat: Plugin Output errors
description: Fluentd output errors number
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: output-errors-details
labels:
app: metric-expression
component: fluentd
details: "true"
level: cluster
metric: output-errors
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_num_errors[5m])) by (type)
legendFormat: Plugin([[type]])
description: Fluentd output errors number
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: buffer-queue-length
labels:
app: metric-expression
component: fluentd
details: "false"
level: cluster
metric: buffer-queue-length
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_buffer_queue_length[5m]))
legendFormat: Buffer queue
description: Fluentd Buffer queue length
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: buffer-queue-length-details
labels:
app: metric-expression
component: fluentd
details: "true"
level: cluster
metric: buffer-queue-length
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_buffer_queue_length[5m])) by (instance)
legendFormat: '[[instance]]'
description: Fluentd Buffer queue length
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-reading
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="reading"})
legendFormat: Reading connections
description: ingresscontroller nginx connection reading
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-reading-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="reading"}) by (instance)
legendFormat: Reading connections
description: ingresscontroller nginx connection reading
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-waiting
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="waiting"})
legendFormat: Nginx waiting connection
description: ingresscontroller nginx connection waiting
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-waiting-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="waiting"}) by (instance)
legendFormat: Nginx waiting connection
description: ingresscontroller nginx connection waiting
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-writing
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="writing"})
legendFormat: Writing connections
description: ingresscontroller nginx connection writing
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-writing-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="writing"}) by (instance)
legendFormat: Writing connections
description: ingresscontroller nginx connection writing
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-accepted
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="accepted"})
legendFormat: Accepted connections
description: ingresscontroller nginx connection accepted
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-accepted-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="accepted"}) by (instance)
legendFormat: Accepted connections
description: ingresscontroller nginx connection accepted
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-active
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="active"})
legendFormat: Active connections
description: ingresscontroller nginx connection active
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-active-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="active"}) by (instance)
legendFormat: Active connections
description: ingresscontroller nginx connection active
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-handled
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="handled"})
legendFormat: Handled connections
description: ingresscontroller nginx connection handled
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-handled-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="handled"}) by (instance)
legendFormat: Handled connections
description: ingresscontroller nginx connection handled
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-upstream-response-seconds-by-host
labels:
app: metric-expression
component: ingresscontroller
details: "false"
level: cluster
metric: upstream-response-seconds
source: rancher-monitoring
spec:
expression: sort_desc(max(upstream_response_time_seconds_sum) by (host, path))
legendFormat: Upstream response seconds(host:[[host]] path:[[path]])
description: ingresscontroller nginx upstream response seconds by host
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-upstream-response-seconds-by-host-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
level: cluster
metric: upstream-response-seconds
source: rancher-monitoring
spec:
expression: sort_desc(max(upstream_response_time_seconds_sum) by (host, path))
legendFormat: Upstream response seconds(host:[[host]] path:[[path]])
description: ingresscontroller nginx upstream response seconds by host
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-process-seconds-by-path
labels:
app: metric-expression
component: ingresscontroller
details: "false"
level: cluster
metric: request-process-seconds
source: rancher-monitoring
spec:
expression: max(request_duration_seconds_bucket{le="1"}) by (host, path)
legendFormat: Request duration(host:[[host]] path:[[path]])
description: ingresscontroller nginx request duration by path
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-process-seconds-by-path-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
level: cluster
metric: request-process-seconds
source: rancher-monitoring
spec:
expression: max(request_duration_seconds_bucket{le="1"}) by (host, path)
legendFormat: Request duration(host:[[host]] path:[[path]])
description: ingresscontroller nginx request duration by path
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-e-2-e-scheduling-latency-seconds-quantile
labels:
app: metric-expression
component: scheduler
details: "false"
level: cluster
metric: e-2-e-scheduling-latency-seconds-quantile
source: rancher-monitoring
spec:
expression: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
by (le, instance)) / 1e+06
legendFormat: E2E latency
description: scheduler e 2 e scheduling latency seconds quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-e-2-e-scheduling-latency-seconds-quantile-details
labels:
app: metric-expression
component: scheduler
details: "true"
level: cluster
metric: e-2-e-scheduling-latency-seconds-quantile
source: rancher-monitoring
spec:
expression: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
by (le, instance)) / 1e+06
legendFormat: E2E latency([[instance]])
description: scheduler e 2 e scheduling latency seconds quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-total-preemption-attempts
labels:
app: metric-expression
component: scheduler
details: "false"
level: cluster
metric: total-preemption-attempts
source: rancher-monitoring
spec:
expression: sum(rate(scheduler_total_preemption_attempts[5m])) by (instance)
legendFormat: Preemption attempts
description: Scheduler scheduling algorithm latency seconds quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-total-preemption-attempts-details
labels:
app: metric-expression
component: scheduler
details: "true"
level: cluster
metric: total-preemption-attempts
source: rancher-monitoring
spec:
expression: sum(rate(scheduler_total_preemption_attempts[5m]))
legendFormat: Preemption attempts([[instance]])
description: Scheduler scheduling algorithm latency seconds quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-pod-unscheduler
labels:
app: metric-expression
component: scheduler
details: "false"
level: cluster
metric: pod-unscheduler
source: rancher-monitoring
spec:
expression: sum(kube_pod_status_scheduled{condition="false"})
legendFormat: Scheduling failed pods
description: pod unscheduler
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-pod-unscheduler-details
labels:
app: metric-expression
component: scheduler
details: "true"
level: cluster
metric: pod-unscheduler
source: rancher-monitoring
spec:
expression: sum(kube_pod_status_scheduled{condition="false"})
legendFormat: Scheduling failed pods
description: pod unscheduler
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: apiserver
cluster-graph: kube-component
name: apiserver-request-latency
spec:
resourceType: apiserver
displayResourceType: kube-component
priority: 300
title: apiserver-request-latency
detailsMetricsSelector:
component: apiserver
details: "true"
metric: request-latency-milliseconds-avg
metricsSelector:
details: "false"
component: apiserver
metric: request-latency-milliseconds-avg
yAxis:
unit: ms
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: apiserver
cluster-graph: kube-component
name: apiserver-request-count
spec:
resourceType: apiserver
displayResourceType: kube-component
priority: 301
title: apiserver-request-count
detailsMetricsSelector:
component: apiserver
details: "true"
metric: request-count-sum-rate
metricsSelector:
details: "false"
component: apiserver
metric: request-count-sum-rate
yAxis:
unit: number
\ No newline at end of file
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-cpu-usage
spec:
resourceType: cluster
priority: 100
title: cluster-cpu-usage
metricsSelector:
details: "false"
component: cluster
metric: cpu-usage-seconds-sum-rate
detailsMetricsSelector:
details: "true"
component: cluster
metric: cpu-usage-seconds-sum-rate
yAxis:
unit: percent
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-cpu-load
spec:
resourceType: cluster
priority: 101
title: cluster-cpu-load
metricsSelector:
details: "false"
component: cluster
graph: cpu-load
detailsMetricsSelector:
details: "true"
component: cluster
graph: cpu-load
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-memory-usage
spec:
resourceType: cluster
priority: 102
title: cluster-memory-usage
metricsSelector:
details: "false"
component: cluster
metric: memory-usage-percent
detailsMetricsSelector:
details: "true"
component: cluster
metric: memory-usage-percent
yAxis:
unit: percent
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-fs-usage-percent
spec:
resourceType: cluster
priority: 103
title: cluster-fs-usage-percent
thresholds: 10
metricsSelector:
details: "false"
component: cluster
metric: fs-usage-percent
detailsMetricsSelector:
details: "true"
component: cluster
metric: fs-usage-percent
yAxis:
unit: percent
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-disk-io
spec:
resourceType: cluster
priority: 104
title: cluster-disk-io
thresholds: 10
metricsSelector:
details: "false"
component: cluster
graph: disk-io
detailsMetricsSelector:
details: "true"
component: cluster
graph: disk-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-network-io
spec:
resourceType: cluster
priority: 105
title: cluster-network-io
thresholds: 10
metricsSelector:
details: "false"
component: cluster
graph: network-io
detailsMetricsSelector:
details: "true"
component: cluster
graph: network-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: cluster
name: cluster-network-packet
spec:
resourceType: cluster
priority: 106
title: cluster-network-packet
thresholds: 10
metricsSelector:
details: "false"
component: cluster
graph: network-packet
detailsMetricsSelector:
details: "true"
component: cluster
graph: network-packet
yAxis:
unit: pps
\ No newline at end of file
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: controllermanager
cluster-graph: kube-component
name: controllermanager-queue-depth
spec:
resourceType: controllermanager
displayResourceType: kube-component
priority: 310
title: controllermanager-queue-depth
metricsSelector:
details: "false"
component: controllermanager
detailsMetricsSelector:
details: "true"
component: controllermanager
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-server-leader-sum
spec:
resourceType: etcd
priority: 200
title: etcd-server-leader-sum
description: etcd server leader sum
metricsSelector:
details: "false"
component: etcd
metric: server-leader-sum
detailsMetricsSelector:
details: "true"
component: etcd
metric: server-leader-sum
yAxis:
unit: number
graphType: singlestat
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-server-failed-proposal
spec:
resourceType: etcd
priority: 201
title: etcd-server-failed-proposal
description: etcd server failed proposal
metricsSelector:
details: "false"
component: etcd
metric: server-failed-proposal
detailsMetricsSelector:
details: "true"
component: etcd
metric: server-failed-proposal
yAxis:
unit: number
graphType: singlestat
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-leader-change
spec:
resourceType: etcd
priority: 202
title: etcd-leader-change
description: etcd leader change
metricsSelector:
details: "false"
component: etcd
metric: server-leader-changes-seen-sum-increase
detailsMetricsSelector:
details: "true"
component: etcd
metric: server-leader-changes-seen-sum-increase
yAxis:
unit: number
graphType: singlestat
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-grpc-client
spec:
resourceType: etcd
priority: 203
title: etcd-grpc-client
description: etcd grpc client receive/send bytes sum rate
metricsSelector:
details: "false"
component: etcd
graph: rpc-client-traffic
detailsMetricsSelector:
details: "true"
component: etcd
graph: rpc-client-traffic
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
metric: db-bytes-sum
name: etcd-db-bytes-sum
spec:
resourceType: etcd
priority: 204
title: etcd-db-bytes-sum
description: etcd db bytes sum
metricsSelector:
details: "false"
component: etcd
metric: db-bytes-sum
detailsMetricsSelector:
details: "true"
component: etcd
metric: db-bytes-sum
yAxis:
unit: byte
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-stream
spec:
resourceType: etcd
priority: 205
title: etcd-stream
description: Etcd lease/watch stream
metricsSelector:
details: "false"
component: etcd
graph: etcd-stream
detailsMetricsSelector:
details: "true"
component: etcd
graph: etcd-stream
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-peer-traffic
spec:
resourceType: etcd
priority: 206
title: etcd-peer-traffic
description: Etcd peer traffic in/out
metricsSelector:
details: "false"
component: etcd
graph: etcd-peer-traffic
detailsMetricsSelector:
details: "true"
component: etcd
graph: etcd-peer-traffic
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-raft-proposals
spec:
resourceType: etcd
priority: 207
title: etcd-raft-proposals
description: Etcd raft proposals
metricsSelector:
details: "false"
component: etcd
graph: proposal
detailsMetricsSelector:
details: "true"
component: etcd
graph: proposal
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-rpc-rate
spec:
resourceType: etcd
priority: 208
title: etcd-rpc-rate
description: Etcd rpc-rate
metricsSelector:
details: "false"
component: etcd
graph: rpc-rate
detailsMetricsSelector:
details: "true"
component: etcd
graph: rpc-rate
yAxis:
unit: ops
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-disk-operate
spec:
resourceType: etcd
priority: 209
title: etcd-disk-operate
description: Etcd disk operate
metricsSelector:
details: "false"
component: etcd
graph: disk-operate
detailsMetricsSelector:
details: "true"
component: etcd
graph: disk-operate
yAxis:
unit: seconds
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: etcd
name: etcd-sync-duration
spec:
resourceType: etcd
priority: 209
title: etcd-sync-duration
description: Etcd sync-duration
metricsSelector:
details: "false"
component: etcd
graph: sync-duration
detailsMetricsSelector:
details: "true"
component: etcd
graph: sync-duration
yAxis:
unit: seconds
\ No newline at end of file
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: fluentd
cluster-graph: rancher-component
name: fluentd-input-record-number
spec:
resourceType: fluentd
displayResourceType: rancher-component
priority: 300
title: fluentd-input-record-number
metricsSelector:
details: "false"
component: fluentd
metric: input-record
detailsMetricsSelector:
details: "true"
component: fluentd
metric: input-record
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: fluentd
cluster-graph: rancher-component
name: fluentd-output-record-number
spec:
resourceType: fluentd
displayResourceType: rancher-component
priority: 301
title: fluentd-output-record-number
metricsSelector:
details: "false"
component: fluentd
metric: output-record
detailsMetricsSelector:
details: "true"
component: fluentd
metric: output-record
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: fluentd
cluster-graph: rancher-component
name: fluentd-output-errors
spec:
resourceType: fluentd
displayResourceType: rancher-component
priority: 301
title: fluentd-output-errors
metricsSelector:
details: "false"
component: fluentd
metric: output-errors
detailsMetricsSelector:
details: "true"
component: fluentd
metric: output-errors
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: fluentd
cluster-graph: rancher-component
name: fluentd-buffer-queue-length
spec:
resourceType: fluentd
displayResourceType: rancher-component
priority: 301
title: fluentd-buffer-queue-length
metricsSelector:
details: "false"
component: fluentd
metric: buffer-queue-length
detailsMetricsSelector:
details: "true"
component: fluentd
metric: buffer-queue-length
yAxis:
unit: number
\ No newline at end of file
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: ingresscontroller
cluster-graph: kube-component
name: ingresscontroller-nginx-connection
spec:
resourceType: ingresscontroller
displayResourceType: kube-component
priority: 330
title: ingresscontroller-nginx-connection
metricsSelector:
details: "false"
component: ingresscontroller
graph: nginx-connection
detailsMetricsSelector:
details: "true"
component: ingresscontroller
graph: nginx-connection
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: ingresscontroller
cluster-graph: kube-component
name: ingresscontroller-request-process-time
spec:
resourceType: ingresscontroller
displayResourceType: kube-component
priority: 331
title: ingresscontroller-request-process-time
metricsSelector:
details: "false"
component: ingresscontroller
metric: request-process-seconds
detailsMetricsSelector:
details: "true"
component: ingresscontroller
metric: request-process-seconds
yAxis:
unit: seconds
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: ingresscontroller
cluster-graph: kube-component
name: ingresscontroller-upstream-response-seconds
spec:
resourceType: ingresscontroller
displayResourceType: kube-component
priority: 332
title: ingresscontroller-upstream-response-seconds
metricsSelector:
details: "false"
component: ingresscontroller
metric: upstream-response-seconds
detailsMetricsSelector:
details: "true"
component: ingresscontroller
metric: upstream-response-seconds
yAxis:
unit: seconds
graphType: singlestat
\ No newline at end of file
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-cpu-usage
spec:
resourceType: node
priority: 500
title: node-cpu-usage
metricsSelector:
details: "false"
component: node
metric: cpu-usage-seconds-sum-rate
detailsMetricsSelector:
details: "true"
component: node
metric: cpu-usage-seconds-sum-rate
yAxis:
unit: percent
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-cpu-load
spec:
resourceType: node
priority: 501
title: node-cpu-load
metricsSelector:
details: "false"
component: node
graph: cpu-load
detailsMetricsSelector:
details: "true"
component: node
graph: cpu-load
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-memory-usage
spec:
resourceType: node
priority: 502
title: node-memory-usage
metricsSelector:
details: "false"
component: node
metric: memory-usage-percent
detailsMetricsSelector:
details: "true"
component: node
metric: memory-usage-percent
yAxis:
unit: percent
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-fs-usage-percent
spec:
resourceType: node
priority: 503
title: node-fs-usage-percent
thresholds: 10
metricsSelector:
details: "false"
component: node
metric: fs-usage-percent
detailsMetricsSelector:
details: "true"
component: node
metric: fs-usage-percent
yAxis:
unit: percent
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-disk-io
spec:
resourceType: node
priority: 504
title: node-disk-io
thresholds: 10
metricsSelector:
details: "false"
component: node
graph: disk-io
detailsMetricsSelector:
details: "true"
component: node
graph: disk-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-network-io
spec:
resourceType: node
priority: 505
title: node-network-io
thresholds: 10
metricsSelector:
details: "false"
component: node
graph: network-io
detailsMetricsSelector:
details: "true"
component: node
graph: network-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: node
name: node-network-packet
spec:
resourceType: node
priority: 506
title: node-network-packet
thresholds: 10
metricsSelector:
details: "false"
component: node
graph: network-packet
detailsMetricsSelector:
details: "true"
component: node
graph: network-packet
yAxis:
unit: pps
\ No newline at end of file
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: scheduler
cluster-graph: kube-component
name: scheduler-e-2-e-scheduling-latency-seconds-quantile
spec:
resourceType: scheduler
displayResourceType: kube-component
priority: 320
title: scheduler-e-2-e-scheduling-latency-seconds-quantile
thresholds: 10
metricsSelector:
details: "false"
component: scheduler
metric: e-2-e-scheduling-latency-seconds-quantile
detailsMetricsSelector:
details: "true"
component: scheduler
metric: e-2-e-scheduling-latency-seconds-quantile
yAxis:
unit: seconds
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: scheduler
cluster-graph: kube-component
name: scheduler-total-preemption-attempts
spec:
resourceType: scheduler
displayResourceType: kube-component
priority: 321
title: scheduler-total-preemption-attempts
thresholds: 10
metricsSelector:
details: "false"
component: scheduler
metric: total-preemption-attempts
detailsMetricsSelector:
details: "true"
component: scheduler
metric: total-preemption-attempts
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: scheduler
cluster-graph: kube-component
name: scheduler-pod-unscheduler
spec:
resourceType: scheduler
displayResourceType: kube-component
priority: 322
title: scheduler-pod-unscheduler
thresholds: 10
metricsSelector:
details: "false"
component: scheduler
metric: pod-unscheduler
detailsMetricsSelector:
details: "true"
component: scheduler
metric: pod-unscheduler
yAxis:
unit: number
\ No newline at end of file
apiVersion: v1
description: Creates Metrics CRD of Rancher monitoring graph
engine: gotpl
maintainers:
- name: aiwantaozi
email: michelia.feng@gmail.com
name: metric-expression-project
version: 0.0.1
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: container
name: container-cpu-usage
spec:
projectName: {{ .ProjectName }}
resourceType: container
priority: 800
title: container-cpu-usage
metricsSelector:
details: "false"
component: container
graph: container-cpu-usage
detailsMetricsSelector:
details: "true"
component: container
graph: container-cpu-usage-details
yAxis:
unit: mcpu
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: container
name: container-memory-usage-bytes-sum
spec:
projectName: {{ .ProjectName }}
resourceType: container
priority: 801
title: container-memory-usage-bytes-sum
metricsSelector:
details: "false"
component: container
metric: memory-usage-bytes-sum
detailsMetricsSelector:
details: "true"
component: container
metric: memory-usage-bytes-sum
yAxis:
unit: byte
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: container
name: container-network-io
spec:
projectName: {{ .ProjectName }}
resourceType: container
priority: 802
title: container-network-io
metricsSelector:
details: "false"
component: container
graph: network-io
detailsMetricsSelector:
details: "true"
component: container
graph: network-io
yAxis:
unit: kbps
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: container
name: container-network-packet
spec:
projectName: {{ .ProjectName }}
resourceType: container
priority: 803
title: container-network-packet
metricsSelector:
details: "false"
component: container
graph: network-packet
detailsMetricsSelector:
details: "true"
component: container
graph: network-packet
yAxis:
unit: pps
---
apiVersion: management.cattle.io/v3
kind: ProjectMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: project
component: container
name: container-disk-io
spec:
projectName: {{ .ProjectName }}
resourceType: container
priority: 804
title: container-disk-io
metricsSelector:
details: "false"
component: container
graph: disk-io
detailsMetricsSelector:
details: "true"
component: container
graph: disk-io
yAxis:
unit: kbps
---
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment