Commit 296f2ecc by aiwantaozi Committed by Craig Jellick

Support monitoring in windows

Problem: can't get windows container/node metrics Solution: 1. deploy windows node exporter 2. add add windows metrics service, endpoint, service monitor 3. add metric relabel rules to unify windows and linux expression Issue: https://github.com/rancher/rancher/issues/20513 https://github.com/rancher/rancher/issues/20076 https://github.com/rancher/rancher/issues/20122
parent 40a9e605
......@@ -55,6 +55,9 @@ spec:
{{- if .Values.insecureSkipVerify }}
insecureSkipVerify: true
{{- end }}
metricRelabelings:
- action: labeldrop
regex: (^id$|^image$|^name$|^cpu$)
relabelings:
- sourceLabels:
- __meta_kubernetes_pod_host_ip
......@@ -68,8 +71,34 @@ spec:
action: replace
regex: (.+)
replacement: $1
- port: https-metrics
scheme: https
path: /metrics/resource/v1alpha1
honorLabels: true
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
{{- if .Values.insecureSkipVerify }}
insecureSkipVerify: true
{{- end }}
metricRelabelings:
- action: replace
regex: (.+)
replacement: $1
sourceLabels:
- container
targetLabel: container_name
- action: replace
regex: (.+)
replacement: $1
sourceLabels:
- pod
targetLabel: pod_name
{{- else }}
- port: http-metrics
metricRelabelings:
- action: labeldrop
regex: (^id$|^image$|^name$|^cpu$)
relabelings:
- sourceLabels:
- __meta_kubernetes_pod_host_ip
......@@ -99,4 +128,20 @@ spec:
action: replace
regex: (.+)
replacement: $1
- port: http-metrics
path: /metrics/resource/v1alpha1
honorLabels: true
metricRelabelings:
- action: replace
regex: (.+)
replacement: $1
sourceLabels:
- container
targetLabel: container_name
- action: replace
regex: (.+)
replacement: $1
sourceLabels:
- pod
targetLabel: pod_name
{{- end }}
apiVersion: v1
description: Windows Node Exporter service/endpoint and service monitor.
engine: gotpl
maintainers:
- name: michelia
email: michelia@rancher.com
name: exporter-node-windows
version: 0.0.1
\ No newline at end of file
apiVersion: {{ template "daemonset_api_version" . }}
kind: DaemonSet
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
spec:
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
spec:
containers:
- name: exporter-node
image: {{ template "system_default_registry" . }}{{ .Values.image.repository }}:{{ .Values.image.tag }}
{{- if .Values.args }}
args: {{ .Values.args }}
{{ end }}
ports:
- name: http
containerPort: {{ .Values.ports.metrics.port }}
env:
- name: LISTEN_PORT
value: "{{ .Values.ports.metrics.port }}"
{{- if .Values.enabledCollectors }}
- name: ENABLED_COLLECTORS
value: {{ .Values.enabledCollectors }}
{{- end }}
{{- if .Values.extraEnv }}
{{ toYaml .Values.extraEnv | indent 10 }}
{{- end }}
resources:
{{ toYaml .Values.resources | indent 10 }}
volumeMounts:
- name: wins-pipe
mountPath: \\.\pipe\rancher_wins
{{- if .Values.enabledRBAC }}
serviceAccountName: {{ default (include "app.fullname" .) .Values.serviceAccountName }}
{{- end }}
tolerations:
- operator: Exists
nodeSelector:
{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion }}
beta.kubernetes.io/os: windows
{{- else}}
kubernetes.io/os: windows
{{- end}}
{{- range .Values.nodeSelectors }}
{{- $pair := regexSplit "=" . 2 }}
{{- if eq 2 (len $pair) }}
{{ (index $pair 0) }}: {{ (index $pair 1) }}
{{- else }}
{{ (index $pair 0) }}: ""
{{- end }}
{{- end }}
volumes:
- name: wins-pipe
hostPath:
path: \\.\pipe\rancher_wins
{{- if .Values.endpoints }}
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: expose-node-metrics-windows
subsets:
- addresses:
{{- range .Values.endpoints }}
- ip: {{ . }}
{{- end }}
ports:
- name: {{ .Values.ports.metrics.name }}
port: {{ .Values.ports.metrics.port }}
protocol: {{ .Values.ports.metrics.protocol }}
{{- end }}
\ No newline at end of file
{{- if and .Values.enabledRBAC (not .Values.serviceAccountName) }}
apiVersion: {{ template "rbac_api_version" . }}
kind: ClusterRole
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
rules:
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
imagePullSecrets:
{{ toYaml .Values.image.pullSecrets | indent 2 }}
---
apiVersion: {{ template "rbac_api_version" . }}
kind: ClusterRoleBinding
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "app.fullname" . }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ template "app.fullname" . }}
subjects:
- kind: ServiceAccount
name: {{ template "app.fullname" . }}
namespace: {{ .Release.Namespace }}
{{- end }}
\ No newline at end of file
apiVersion: v1
kind: Service
metadata:
name: expose-node-metrics-windows
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
{{ .Values.apiGroup }}: "true"
spec:
type: ClusterIP
clusterIP: None
ports:
- name: {{ .Values.ports.metrics.name }}
port: {{ .Values.ports.metrics.port }}
targetPort: {{ .Values.ports.metrics.port }}
protocol: {{ .Values.ports.metrics.protocol }}
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
name: {{ template "app.fullname" . }}
spec:
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
{{ .Values.apiGroup }}: "true"
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
endpoints:
- port: {{ .Values.ports.metrics.name }}
metricRelabelings:
- sourceLabels: [volume, nic]
regex: (.*);(.*)
separator: ''
targetLabel: device
action: replace
replacement: $1$2
- sourceLabels: [__name__]
regex: wmi_cs_logical_processors
replacement: 'system'
targetLabel: mode
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
role: windows-node-recording-rules
source: rancher-monitoring
name: windows-node-recording-rules
namespace: cattle-prometheus
spec:
groups:
- name: windows-cpu-recording.rules
rules:
- record: node_cpu_seconds_total
expr: wmi_cpu_time_total
- record: node_load1
expr: avg_over_time(wmi_system_processor_queue_length[1m])
- record: node_load5
expr: avg_over_time(wmi_system_processor_queue_length[5m])
- record: node_load15
expr: avg_over_time(wmi_system_processor_queue_length[15m])
- record: node_load15
expr: avg_over_time(wmi_system_processor_queue_length[15m])
- name: windows-memory-recording.rules
rules:
- record: node_memory_MemAvailable_bytes
expr: wmi_os_physical_memory_free_bytes
- record: node_memory_MemTotal_bytes
expr: wmi_cs_physical_memory_bytes
- name: windows-network-io-recording.rules
rules:
- record: node_network_receive_bytes_total
expr: wmi_net_bytes_received_total
- record: node_network_transmit_bytes_total
expr: wmi_net_bytes_sent_total
- name: windows-network-packet-recording.rules
rules:
- record: node_network_receive_packets_total
expr: wmi_net_packets_received_total
- record: node_network_transmit_packets_total
expr: wmi_net_packets_sent_total
- record: node_network_receive_drop_total
expr: wmi_net_packets_received_discarded
- record: node_network_receive_errs_total
expr: wmi_net_packets_received_errors
- record: node_network_transmit_drop_total
expr: wmi_net_packets_outbound_discarded
- name: windows-disk-io-recording.rules
rules:
- record: node_disk_written_bytes_total
expr: wmi_logical_disk_write_bytes_total
- record: node_disk_read_bytes_total
expr: wmi_logical_disk_read_bytes_total
- name: windows-file-usage-recording.rules
rules:
- record: node_filesystem_size_bytes
expr: wmi_logical_disk_size_bytes
- record: node_filesystem_free_bytes
expr: wmi_logical_disk_free_bytes
\ No newline at end of file
enabledRBAC: true
# Get more details on https://github.com/martinlindhe/wmi_exporter
# enabledCollectors: "net,os,service,system,cpu,cs,logical_disk"
\ No newline at end of file
......@@ -20,7 +20,7 @@ rules:
resources:
- subjectaccessreviews
verbs:
- creat
- create
---
apiVersion: v1
......
......@@ -49,6 +49,11 @@ dependencies:
condition: exporter-node.enabled
repository: "file://./charts/exporter-node/"
- name: exporter-node-windows
version: 0.0.1
condition: exporter-node-windows.enabled
repository: "file://./charts/exporter-node-windows/"
- name: grafana
version: 0.0.1
condition: grafana.enabled
......
......@@ -164,6 +164,21 @@ exporter-node:
##
serviceAccountName: ""
exporter-node-windows:
enabled: false
apiGroup: "monitoring.coreos.com"
endpoints: []
nodeSelectors: []
image:
repository: rancher/wmi_exporter-package
tag: v0.0.1
ports:
metrics:
name: windows-metrics
scheme: https
port: 9796
protocol: TCP
exporter-kube-state:
enabled: false
apiGroup: "monitoring.coreos.com"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment