- 需求1:多个Prometheus数据集群指标相互割裂,无法聚合在一个看板使用。
- 需求2: 多个Prometheus告警规则分散,需要集中管理。
-
部署kube-prometheus-stack
-
View CodenameOverride: "" namespaceOverride: "" kubeTargetVersionOverride: "" kubeVersionOverride: "" fullnameOverride: "" commonLabels: {} crds:enabled: trueupgradeJob:enabled: falseforceConflicts: falseimage:busybox:registry: docker.iorepository: busyboxtag: "latest"sha: ""pullPolicy: IfNotPresentkubectl:registry: registry.k8s.iorepository: kubectltag: "" # defaults to the Kubernetes versionsha: ""pullPolicy: IfNotPresentenv: {}resources: {}extraVolumes: []extraVolumeMounts: []nodeSelector: {}affinity: {}tolerations: []topologySpreadConstraints: []labels: {}annotations: {}podLabels: {}podAnnotations: {}serviceAccount:create: falsename: "prometheus-k8s"annotations:eks.amazonaws.com/role-arn: arn:aws:iam::123456789:role/infra-prometheus-rolelabels: {}automountServiceAccountToken: truecontainerSecurityContext:allowPrivilegeEscalation: falsereadOnlyRootFilesystem: truecapabilities:drop:- ALLpodSecurityContext:fsGroup: 65534runAsGroup: 65534runAsNonRoot: truerunAsUser: 65534seccompProfile:type: RuntimeDefault customRules: {} defaultRules:create: truerules:alertmanager: falseetcd: trueconfigReloaders: truegeneral: truek8sContainerCpuUsageSecondsTotal: truek8sContainerMemoryCache: truek8sContainerMemoryRss: truek8sContainerMemorySwap: truek8sContainerResource: truek8sContainerMemoryWorkingSetBytes: truek8sPodOwner: truekubeApiserverAvailability: truekubeApiserverBurnrate: truekubeApiserverHistogram: truekubeApiserverSlos: truekubeControllerManager: truekubelet: truekubeProxy: truekubePrometheusGeneral: truekubePrometheusNodeRecording: truekubernetesApps: truekubernetesResources: truekubernetesStorage: truekubernetesSystem: truekubeSchedulerAlerting: truekubeSchedulerRecording: truekubeStateMetrics: truenetwork: truenode: truenodeExporterAlerting: truenodeExporterRecording: trueprometheus: trueprometheusOperator: truewindows: trueappNamespacesOperator: "=~"appNamespacesTarget: ".*"keepFiringFor: ""labels: {}annotations: {}additionalRuleLabels: {}additionalRuleAnnotations: {}additionalRuleGroupLabels:alertmanager: {}etcd: {}configReloaders: {}general: {}k8sContainerCpuUsageSecondsTotal: {}k8sContainerMemoryCache: {}k8sContainerMemoryRss: {}k8sContainerMemorySwap: {}k8sContainerResource: {}k8sPodOwner: {}kubeApiserverAvailability: {}kubeApiserverBurnrate: {}kubeApiserverHistogram: {}kubeApiserverSlos: {}kubeControllerManager: {}kubelet: {}kubeProxy: {}kubePrometheusGeneral: {}kubePrometheusNodeRecording: {}kubernetesApps: {}kubernetesResources: {}kubernetesStorage: {}kubernetesSystem: {}kubeSchedulerAlerting: {}kubeSchedulerRecording: {}kubeStateMetrics: {}network: {}node: {}nodeExporterAlerting: {}nodeExporterRecording: {}prometheus: {}prometheusOperator: {}additionalRuleGroupAnnotations:alertmanager: {}etcd: {}configReloaders: {}general: {}k8sContainerCpuUsageSecondsTotal: {}k8sContainerMemoryCache: {}k8sContainerMemoryRss: {}k8sContainerMemorySwap: {}k8sContainerResource: {}k8sPodOwner: {}kubeApiserverAvailability: {}kubeApiserverBurnrate: {}kubeApiserverHistogram: {}kubeApiserverSlos: {}kubeControllerManager: {}kubelet: {}kubeProxy: {}kubePrometheusGeneral: {}kubePrometheusNodeRecording: {}kubernetesApps: {}kubernetesResources: {}kubernetesStorage: {}kubernetesSystem: {}kubeSchedulerAlerting: {}kubeSchedulerRecording: {}kubeStateMetrics: {}network: {}node: {}nodeExporterAlerting: {}nodeExporterRecording: {}prometheus: {}prometheusOperator: {}additionalAggregationLabels: []runbookUrl: "https://runbooks.prometheus-operator.dev/runbooks"node:fsSelector: 'fstype!=""'disabled: {} additionalPrometheusRulesMap: {} global:rbac:create: truepspEnabled: falsecreateAggregateClusterRoles: falseimageRegistry: ""imagePullSecrets: [] windowsMonitoring:enabled: false prometheus-windows-exporter:prometheus:monitor:enabled: truejobLabel: jobLabelreleaseLabel: truepodLabels:jobLabel: windows-exporterconfig: |-collectors:enabled: '[defaults],memory,container' alertmanager:enabled: falsenamespaceOverride: ""annotations: {}additionalLabels: {}apiVersion: v2enableFeatures: []forceDeployDashboards: falsenetworkPolicy:enabled: falsepolicyTypes:- Ingressgateway:namespace: ""podLabels: {}additionalIngress: []egress:enabled: falserules: []enableClusterRules: truemonitoringRules:prometheus: trueconfigReloader: trueserviceAccount:create: truename: "prometheus-k8s"annotations:eks.amazonaws.com/role-arn: arn:aws:iam::123456789:role/infra-prometheus-roleautomountServiceAccountToken: truepodDisruptionBudget:enabled: falseminAvailable: 1unhealthyPodEvictionPolicy: AlwaysAllowconfig:global:resolve_timeout: 5minhibit_rules:- source_matchers:- 'severity = critical'target_matchers:- 'severity =~ warning|info'equal:- 'namespace'- 'alertname'- source_matchers:- 'severity = warning'target_matchers:- 'severity = info'equal:- 'namespace'- 'alertname'- source_matchers:- 'alertname = InfoInhibitor'target_matchers:- 'severity = info'equal:- 'namespace'- target_matchers:- 'alertname = InfoInhibitor'route:group_by: ['namespace']group_wait: 30sgroup_interval: 5mrepeat_interval: 12hreceiver: 'null'routes:- receiver: 'null'matchers:- alertname = "Watchdog"receivers:- name: 'null'templates:- '/etc/alertmanager/config/*.tmpl'stringConfig: ""tplConfig: falsetemplateFiles: {}ingress:enabled: falseingressClassName: ""annotations: {}labels: {}hosts: []paths: []tls: []route:main:enabled: falseapiVersion: gateway.networking.k8s.io/v1kind: HTTPRouteannotations: {}labels: {}hostnames: []parentRefs: []httpsRedirect: falsematches:- path:type: PathPrefixvalue: /filters: []additionalRules: []secret:annotations: {}ingressPerReplica:enabled: falseingressClassName: ""annotations: {}labels: {}hostPrefix: ""hostDomain: ""paths: []tlsSecretName: ""tlsSecretPerReplica:enabled: falseprefix: "alertmanager"service:enabled: trueannotations: {}labels: {}clusterIP: ""ipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"port: 9093targetPort: 9093nodePort: 30903additionalPorts: []externalIPs: []loadBalancerIP: ""loadBalancerSourceRanges: []externalTrafficPolicy: ClustersessionAffinity: NonesessionAffinityConfig:clientIP:timeoutSeconds: 10800type: ClusterIPservicePerReplica:enabled: falseannotations: {}port: 9093targetPort: 9093nodePort: 30904loadBalancerSourceRanges: []externalTrafficPolicy: Clustertype: ClusterIPserviceMonitor:selfMonitor: trueinterval: ""additionalLabels: {}sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0proxyUrl: ""scheme: ""enableHttp2: truetlsConfig: {}bearerTokenFile:metricRelabelings: []relabelings: []additionalEndpoints: []alertmanagerSpec:persistentVolumeClaimRetentionPolicy: {}podMetadata: {}serviceName:image:registry: quay.iorepository: prometheus/alertmanagertag: v0.28.1sha: ""pullPolicy: IfNotPresentuseExistingSecret: falsesecrets: []automountServiceAccountToken: trueconfigMaps: []web: {}alertmanagerConfigSelector: {}alertmanagerConfigNamespaceSelector: {}alertmanagerConfiguration: {}alertmanagerConfigMatcherStrategy: {}additionalArgs: []logFormat: logfmtlogLevel: inforeplicas: 1retention: 7dstorage:volumeClaimTemplate:spec:storageClassName: "efs-sc"accessModes: ["ReadWriteMany"]resources:requests:storage: 200GiexternalUrl:routePrefix: /scheme: ""tlsConfig: {}paused: falsenodeSelector: {}resources: {}podAntiAffinity: "soft"podAntiAffinityTopologyKey: kubernetes.io/hostnameaffinity: {}tolerations: []topologySpreadConstraints: []securityContext:runAsGroup: 2000runAsNonRoot: truerunAsUser: 1000fsGroup: 2000seccompProfile:type: RuntimeDefaultlistenLocal: falsecontainers: []volumes: []volumeMounts: []initContainers: []priorityClassName: ""additionalPeers: []portName: "http-web"clusterAdvertiseAddress: falseclusterGossipInterval: ""clusterPeerTimeout: ""clusterPushpullInterval: ""clusterLabel: ""forceEnableClusterMode: falseminReadySeconds: 0additionalConfig: {}additionalConfigString: ""extraSecret:annotations: {}data: {} grafana:enabled: falsenamespaceOverride: ""forceDeployDatasources: falseforceDeployDashboards: falsedefaultDashboardsEnabled: trueoperator:dashboardsConfigMapRefEnabled: falseannotations: {}matchLabels: {}resyncPeriod: 10mfolder: GeneraldefaultDashboardsTimezone: utcdefaultDashboardsEditable: truedefaultDashboardsInterval: 1madminUser: adminadminPassword: prom-operatorrbac:pspEnabled: falseingress:enabled: falseannotations: {}labels: {}hosts: []path: /tls: []serviceAccount:create: trueautoMount: truesidecar:dashboards:enabled: truelabel: grafana_dashboardlabelValue: "1"searchNamespace: ALLenableNewTablePanelSyntax: falseannotations: {}multicluster:global:enabled: falseetcd:enabled: falseprovider:allowUiUpdates: falsedatasources:enabled: truedefaultDatasourceEnabled: trueisDefaultDatasource: truename: Prometheusuid: prometheusannotations: {}httpMethod: POSTcreatePrometheusReplicasDatasources: falseprometheusServiceName: prometheus-operatedlabel: grafana_datasourcelabelValue: "1"exemplarTraceIdDestinations: {}alertmanager:enabled: truename: Alertmanageruid: alertmanagerhandleGrafanaManagedAlerts: falseimplementation: prometheusextraConfigmapMounts: []deleteDatasources: []additionalDataSources: []prune: falseservice:portName: http-webipFamilies: []ipFamilyPolicy: ""serviceMonitor:enabled: truepath: "/metrics"labels: {}interval: ""scheme: httptlsConfig: {}scrapeTimeout: 30srelabelings: [] kubernetesServiceMonitors:enabled: true kubeApiServer:enabled: truetlsConfig:serverName: kubernetesinsecureSkipVerify: falseserviceMonitor:enabled: trueinterval: ""sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0proxyUrl: ""jobLabel: componentselector:matchLabels:component: apiserverprovider: kubernetesmetricRelabelings:- action: dropregex: (etcd_request|apiserver_request_slo|apiserver_request_sli|apiserver_request)_duration_seconds_bucket;(0\.15|0\.2|0\.3|0\.35|0\.4|0\.45|0\.6|0\.7|0\.8|0\.9|1\.25|1\.5|1\.75|2|3|3\.5|4|4\.5|6|7|8|9|15|20|40|45|50)(\.0)?sourceLabels:- __name__- lerelabelings: []additionalLabels: {}targetLabels: [] kubelet:enabled: truenamespace: kube-systemserviceMonitor:enabled: truekubelet: trueattachMetadata:node: falseinterval: ""honorLabels: truehonorTimestamps: truetrackTimestampsStaleness: truesampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0proxyUrl: ""https: trueinsecureSkipVerify: trueprobes: trueresource: falseresourcePath: "/metrics/resource/v1alpha1"resourceInterval: 10scAdvisor: truecAdvisorInterval: 10scAdvisorMetricRelabelings:- sourceLabels: [__name__]action: dropregex: 'container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total)'- sourceLabels: [__name__]action: dropregex: 'container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total)'- sourceLabels: [__name__]action: dropregex: 'container_memory_(mapped_file|swap)'- sourceLabels: [__name__]action: dropregex: 'container_(file_descriptors|tasks_state|threads_max)'- sourceLabels: [__name__]action: dropregex: 'container_spec.*'- sourceLabels: [id, pod]action: dropregex: '.+;'probesMetricRelabelings: []cAdvisorRelabelings:- action: replacesourceLabels: [__metrics_path__]targetLabel: metrics_pathprobesRelabelings:- action: replacesourceLabels: [__metrics_path__]targetLabel: metrics_pathresourceRelabelings:- action: replacesourceLabels: [__metrics_path__]targetLabel: metrics_pathmetricRelabelings:- action: dropsourceLabels: [__name__, le]regex: (csi_operations|storage_operation_duration)_seconds_bucket;(0.25|2.5|15|25|120|600)(\.0)?relabelings:- action: replacesourceLabels: [__metrics_path__]targetLabel: metrics_pathadditionalLabels: {}targetLabels: [] kubeControllerManager:enabled: trueendpoints: []service:enabled: trueport: nulltargetPort: nullipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"serviceMonitor:enabled: trueinterval: ""sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0proxyUrl: ""port: http-metricsjobLabel: jobLabelselector: {}https: nullinsecureSkipVerify: nullserverName: nullmetricRelabelings: []relabelings: []additionalLabels: {}targetLabels: [] coreDns:enabled: trueservice:enabled: trueport: 9153targetPort: 9153ipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"serviceMonitor:enabled: trueinterval: ""sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0proxyUrl: ""port: http-metricsjobLabel: jobLabelselector: {}metricRelabelings: []relabelings: []additionalLabels: {}targetLabels: [] kubeDns:enabled: falseservice:dnsmasq:port: 10054targetPort: 10054skydns:port: 10055targetPort: 10055ipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"serviceMonitor:interval: ""sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0proxyUrl: ""jobLabel: jobLabelselector: {}metricRelabelings: []relabelings: []dnsmasqMetricRelabelings: []dnsmasqRelabelings: []additionalLabels: {}targetLabels: [] kubeEtcd:enabled: trueendpoints: []service:enabled: trueport: 2381targetPort: 2381ipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"serviceMonitor:enabled: trueinterval: ""sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0proxyUrl: ""scheme: httpinsecureSkipVerify: falseserverName: ""caFile: ""certFile: ""keyFile: ""port: http-metricsjobLabel: jobLabelselector: {}metricRelabelings: []relabelings: []additionalLabels: {}targetLabels: [] kubeScheduler:enabled: trueendpoints: []service:enabled: trueport: nulltargetPort: nullipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"serviceMonitor:enabled: trueinterval: ""sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0proxyUrl: ""https: nullport: http-metricsjobLabel: jobLabelselector: {}insecureSkipVerify: nullserverName: nullmetricRelabelings: []relabelings: []additionalLabels: {}targetLabels: [] kubeProxy:enabled: trueendpoints: []service:enabled: trueport: 10249targetPort: 10249ipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"serviceMonitor:enabled: trueinterval: ""sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0proxyUrl: ""port: http-metricsjobLabel: jobLabelselector: {}https: falsemetricRelabelings: []relabelings: []additionalLabels: {}targetLabels: [] kubeStateMetrics:enabled: true kube-state-metrics:namespaceOverride: ""rbac:create: truereleaseLabel: trueprometheusScrape: falseprometheus:monitor:enabled: trueinterval: ""sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0scrapeTimeout: ""proxyUrl: ""honorLabels: truemetricRelabelings: []relabelings: []selfMonitor:enabled: false nodeExporter:enabled: trueoperatingSystems:linux:enabled: trueaix:enabled: truedarwin:enabled: trueforceDeployDashboards: false prometheus-node-exporter:namespaceOverride: ""podLabels:jobLabel: node-exporterreleaseLabel: trueextraArgs:- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)- --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs|erofs)$service:portName: http-metricsipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"labels:jobLabel: node-exporterprometheus:monitor:enabled: truejobLabel: jobLabelinterval: ""sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0scrapeTimeout: ""proxyUrl: ""metricRelabelings: []relabelings: []rbac:pspEnabled: false prometheusOperator:enabled: truefullnameOverride: ""revisionHistoryLimit: 10strategy: {}tls:enabled: truetlsMinVersion: VersionTLS13internalPort: 10250livenessProbe:enabled: truefailureThreshold: 10initialDelaySeconds: 60periodSeconds: 30successThreshold: 1timeoutSeconds: 30readinessProbe:enabled: truefailureThreshold: 10initialDelaySeconds: 60periodSeconds: 30successThreshold: 1timeoutSeconds: 30admissionWebhooks:failurePolicy: ""timeoutSeconds: 30enabled: trueserviceAccount:create: truename: "kube-prom-stack-kube-prome-admission"annotations: {}resources:requests:cpu: 500mmemory: 500Milimits:cpu: 2048mmemory: 4096MicaBundle: ""annotations: {}namespaceSelector: {}objectSelector: {}matchConditions: {}mutatingWebhookConfiguration:annotations: {}validatingWebhookConfiguration:annotations: {}deployment:enabled: falsereplicas: 1strategy: {}podDisruptionBudget:enabled: falseminAvailable: 1unhealthyPodEvictionPolicy: AlwaysAllowrevisionHistoryLimit: 10tls:enabled: truetlsMinVersion: VersionTLS13internalPort: 10250serviceAccount:annotations:eks.amazonaws.com/role-arn: arn:aws:iam::123456789:role/infra-prometheus-roleautomountServiceAccountToken: truecreate: truename: "prometheus-k8s"service:annotations: {}labels: {}clusterIP: ""ipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"nodePort: 31080nodePortTls: 31443additionalPorts: []loadBalancerIP: ""loadBalancerSourceRanges: []externalTrafficPolicy: Clustertype: ClusterIPexternalIPs: []labels: {}annotations: {}podLabels: {}podAnnotations: {}image:registry: quay.iorepository: prometheus-operator/admission-webhooktag: ""sha: ""pullPolicy: IfNotPresentresources:limits:cpu: 200mmemory: 200Mirequests:cpu: 100mmemory: 100MihostNetwork: falsenodeSelector: {}tolerations: []affinity: {}dnsConfig: {}securityContext:fsGroup: 65534runAsGroup: 65534runAsNonRoot: truerunAsUser: 65534seccompProfile:type: RuntimeDefaultcontainerSecurityContext:allowPrivilegeEscalation: falsereadOnlyRootFilesystem: truecapabilities:drop:- ALLautomountServiceAccountToken: truepatch:enabled: trueimage:registry: registry.k8s.iorepository: ingress-nginx/kube-webhook-certgentag: v1.6.0 # latest tag: https://github.com/kubernetes/ingress-nginx/blob/main/images/kube-webhook-certgen/TAGsha: ""pullPolicy: IfNotPresentresources: {}priorityClassName: ""ttlSecondsAfterFinished: 60annotations: {}podAnnotations: {}nodeSelector: {}affinity: {}tolerations: []securityContext:runAsGroup: 2000runAsNonRoot: truerunAsUser: 2000seccompProfile:type: RuntimeDefaultserviceAccount:create: truename: "prometheus-k8s"annotations:eks.amazonaws.com/role-arn: arn:aws:iam::123456789:role/infra-prometheus-roleautomountServiceAccountToken: truecreateSecretJob:securityContext:allowPrivilegeEscalation: falsereadOnlyRootFilesystem: truecapabilities:drop:- ALLpatchWebhookJob:securityContext:allowPrivilegeEscalation: falsereadOnlyRootFilesystem: truecapabilities:drop:- ALLcertManager:enabled: falserootCert:duration: "" # default to be 5yrevisionHistoryLimit:admissionCert:duration: "" # default to be 1yrevisionHistoryLimit:namespaces: {}denyNamespaces: []alertmanagerInstanceNamespaces: []alertmanagerConfigNamespaces: []prometheusInstanceNamespaces: []thanosRulerInstanceNamespaces: []networkPolicy:enabled: falseflavor: kubernetesserviceAccount:create: truename: "prometheus-k8s"automountServiceAccountToken: trueannotations:eks.amazonaws.com/role-arn: arn:aws:iam::123456789:role/infra-prometheus-role terminationGracePeriodSeconds: 60lifecycle:preStop:exec:command:- "/bin/sh"- "-c"- "kill -TERM $(pidof prometheus); while [ -f /data/lock ]; do sleep 1; done"service:annotations: {}labels: {}clusterIP: ""ipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"nodePort: 30080nodePortTls: 30443additionalPorts: []loadBalancerIP: ""loadBalancerSourceRanges: []externalTrafficPolicy: Clustertype: ClusterIPexternalIPs: []labels: {}annotations: {}podLabels: {}podAnnotations: {}podDisruptionBudget:enabled: falseminAvailable: 1unhealthyPodEvictionPolicy: AlwaysAllowkubeletService:enabled: truenamespace: kube-systemselector: ""name: ""kubeletEndpointsEnabled: truekubeletEndpointSliceEnabled: falseextraArgs: []serviceMonitor:selfMonitor: trueadditionalLabels: {}interval: ""sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0scrapeTimeout: ""metricRelabelings: []relabelings: []resources: {}env:GOGC: "30"hostNetwork: falsenodeSelector: {}tolerations: []affinity: {}dnsConfig: {}securityContext:fsGroup: 65534runAsGroup: 65534runAsNonRoot: truerunAsUser: 65534seccompProfile:type: RuntimeDefaultcontainerSecurityContext:allowPrivilegeEscalation: falsereadOnlyRootFilesystem: truecapabilities:drop:- ALLverticalPodAutoscaler:enabled: falsecontrolledResources: []maxAllowed: {}minAllowed: {}updatePolicy:updateMode: Autoimage:registry: quay.iorepository: prometheus-operator/prometheus-operatortag: ""sha: ""pullPolicy: IfNotPresentprometheusConfigReloader:image:registry: quay.iorepository: prometheus-operator/prometheus-config-reloadertag: ""sha: ""enableProbe: falseresources: {}thanosImage:registry: quay.iorepository: thanos/thanostag: v0.39.2sha: ""prometheusInstanceSelector: ""alertmanagerInstanceSelector: ""thanosRulerInstanceSelector: ""secretFieldSelector: "type!=kubernetes.io/dockercfg,type!=kubernetes.io/service-account-token,type!=helm.sh/release.v1"automountServiceAccountToken: trueextraVolumes: []extraVolumeMounts: [] prometheus:enabled: truelivenessProbe:httpGet:path: /-/healthyport: webinitialDelaySeconds: 60timeoutSeconds: 30periodSeconds: 30failureThreshold: 10successThreshold: 1readinessProbe:httpGet:path: /-/readyport: webinitialDelaySeconds: 60timeoutSeconds: 30periodSeconds: 30failureThreshold: 10successThreshold: 1agentMode: falseannotations: {}additionalLabels: {}networkPolicy:enabled: falseflavor: kubernetesserviceAccount:create: truename: prometheus-k8sannotations:eks.amazonaws.com/role-arn: arn:aws:iam::123456789:role/infra-prometheus-roleautomountServiceAccountToken: truethanosService:enabled: falseannotations: {}labels: {}externalTrafficPolicy: Clustertype: ClusterIPipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"portName: grpcport: 10901targetPort: "grpc"httpPortName: httphttpPort: 10902targetHttpPort: "http"clusterIP: "None"nodePort: 30901httpNodePort: 30902thanosServiceMonitor:enabled: falseinterval: ""additionalLabels: {}scheme: ""tlsConfig: {}bearerTokenFile:metricRelabelings: []relabelings: []thanosServiceExternal:enabled: falseannotations: {}labels: {}loadBalancerIP: ""loadBalancerSourceRanges: []portName: grpcport: 10901targetPort: "grpc"httpPortName: httphttpPort: 10902targetHttpPort: "http"externalTrafficPolicy: Clustertype: LoadBalancernodePort: 30901httpNodePort: 30902service:enabled: trueannotations: {}labels: {}clusterIP: ""ipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"port: 9090targetPort: 9090reloaderWebPort: 8080externalIPs: []nodePort: 30090loadBalancerIP: ""loadBalancerSourceRanges: []externalTrafficPolicy: Clustertype: ClusterIPadditionalPorts: []publishNotReadyAddresses: falsesessionAffinity: NonesessionAffinityConfig:clientIP:timeoutSeconds: 10800servicePerReplica:enabled: falseannotations: {}port: 9090targetPort: 9090nodePort: 30091loadBalancerSourceRanges: []externalTrafficPolicy: Clustertype: ClusterIPipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"podDisruptionBudget:enabled: falseminAvailable: 1unhealthyPodEvictionPolicy: AlwaysAllowthanosIngress:enabled: falseingressClassName: ""annotations: {}labels: {}servicePort: 10901nodePort: 30901hosts: []paths: []tls: []extraSecret:annotations: {}data: {}ingress:enabled: falseingressClassName: ""annotations: {}labels: {}hosts: []paths: []tls: []route:main:enabled: falseapiVersion: gateway.networking.k8s.io/v1kind: HTTPRouteannotations: {}labels: {}hostnames: []parentRefs: []httpsRedirect: falsematches:- path:type: PathPrefixvalue: /filters: []additionalRules: []ingressPerReplica:enabled: falseingressClassName: ""annotations: {}labels: {}hostPrefix: ""hostDomain: ""paths: []tlsSecretName: ""tlsSecretPerReplica:enabled: falseprefix: "prometheus"serviceMonitor:selfMonitor: trueinterval: ""additionalLabels: {}sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0scheme: ""tlsConfig: {}bearerTokenFile:metricRelabelings: []relabelings: []additionalEndpoints: []prometheusSpec:persistentVolumeClaimRetentionPolicy: {}extraArgs:- "--storage.tsdb.wal-replay-concurrency=16" # 增加 WAL 重放并发度- "--storage.tsdb.allow-overlapping-blocks" # 允许时间重叠块- "--storage.tsdb.wal-compression" # 启用 WAL 压缩- "--storage.tsdb.head-chunks-write-queue-size=300000" # 增加写入队列- "--storage.tsdb.wal-segment-size=500mb" # 增大 WAL 段大小disableCompaction: falseautomountServiceAccountToken: trueapiserverConfig: {}additionalArgs: []scrapeFailureLogFile: ""scrapeInterval: ""scrapeTimeout: ""scrapeClasses: []podTargetLabels: []evaluationInterval: ""listenLocal: falseenableOTLPReceiver: falseenableAdminAPI: falseversion: ""web: {}exemplars: {}enableFeatures: []otlp: {}serviceName:image:registry: quay.iorepository: prometheus/prometheustag: v3.5.0sha: ""pullPolicy: IfNotPresenttolerations: []topologySpreadConstraints: []alertingEndpoints: []externalLabels:prometheus_replica: "aws-jp-prod-ltp-infra-eks-prome"cluster: "aws-jp-prod-ltp-infra-eks"prometheus_instance: "aws-jp-prod-ltp-infra-eks-prome"enableRemoteWriteReceiver: falsereplicaExternalLabelName: ""replicaExternalLabelNameClear: falseprometheusExternalLabelName: ""prometheusExternalLabelNameClear: falseexternalUrl: ""nodeSelector: {}secrets: []configMaps: []query: {}ruleNamespaceSelector: {}ruleSelectorNilUsesHelmValues: trueruleSelector: {}serviceMonitorSelectorNilUsesHelmValues: false serviceMonitorSelector: {}serviceMonitorNamespaceSelector: {}podMonitorSelectorNilUsesHelmValues: truepodMonitorSelector: {}podMonitorNamespaceSelector: {}probeSelectorNilUsesHelmValues: trueprobeSelector: {}probeNamespaceSelector: {}scrapeConfigSelectorNilUsesHelmValues: truescrapeConfigSelector: {}scrapeConfigNamespaceSelector: {}retention: 30dretentionSize: "500GB"tsdb:outOfOrderTimeWindow: 0swalCompression: truepaused: falsereplicas: 1shards: 1logLevel: infologFormat: logfmtroutePrefix: /podMetadata: {}podAntiAffinity: "soft"podAntiAffinityTopologyKey: kubernetes.io/hostnameaffinity: {}remoteRead: []additionalRemoteRead: []remoteWrite:- url: "https://mimir.abc.com/api/v1/push"writeRelabelConfigs:- targetLabel: k8sreplacement: "aws-jp-prod-ltp-infra-eks" # 覆盖k8s标签- targetLabel: clusterreplacement: "aws-jp-prod-ltp-infra-eks" # 覆盖cluster标签- targetLabel: prometheus_replicareplacement: "aws-jp-prod-ltp-infra-eks-prome"- targetLabel: prometheusreplacement: "aws-jp-prod-ltp-infra-eks-prome"queueConfig:maxSamplesPerSend: 5000maxShards: 300capacity: 1000000minShards: 50batchSendDeadline: 5sminBackoff: 200msmaxBackoff: 5sretryOnRateLimit: trueadditionalRemoteWrite: []remoteWriteDashboards: falseresources:requests:memory: 1200Micpu: 500mlimits:memory: 4096Micpu: 4096mstorageSpec:volumeClaimTemplate:spec:storageClassName: "gp3"accessModes: ["ReadWriteOnce"]resources:requests:storage: 500Givolumes: []volumeMounts: []additionalScrapeConfigs:- job_name: 'flink-pushgateway'honor_labels: true # 关键配置!static_configs:- targets: ['pushgateway:9091']labels:env: prod- job_name: 'aws-ec2-nodes'ec2_sd_configs:- region: ap-northeast-1port: 9100- region: ap-southeast-1port: 9100- region: ap-east-1port: 9100relabel_configs:- source_labels: [__meta_ec2_tag_aws_eks_cluster_name]regex: .+action: drop- source_labels: [__meta_ec2_tag_eks_cluster_name]regex: .+action: drop- source_labels: [__meta_ec2_tag_ec2_sd]regex: "0"action: drop- source_labels: [__meta_ec2_instance_id]target_label: instance- source_labels: [__meta_ec2_private_ip]target_label: PrivateIpAddress # 内网IP- source_labels: [__meta_ec2_public_ip]target_label: PublicIp # 公网IP- source_labels: [__meta_ec2_instance_type]target_label: InstanceType # 实例类型- source_labels: [__meta_ec2_availability_zone]target_label: AvailabilityZone # 可用区- source_labels: [__meta_ec2_region]target_label: Region # 区域- source_labels: [__meta_ec2_state]target_label: Status # 实例状态- action: labelmapregex: __meta_ec2_tag_(.+)additionalScrapeConfigsSecret: {}additionalPrometheusSecretsAnnotations: {}additionalAlertManagerConfigs: []additionalAlertManagerConfigsSecret: {}additionalAlertRelabelConfigs: []additionalAlertRelabelConfigsSecret: {}securityContext:runAsGroup: 2000runAsNonRoot: truerunAsUser: 1000fsGroup: 2000seccompProfile:type: RuntimeDefaultpriorityClassName: ""thanos: {}containers: []initContainers: []portName: "http-web"arbitraryFSAccessThroughSMs: falseoverrideHonorLabels: falseoverrideHonorTimestamps: falseignoreNamespaceSelectors: falseenforcedNamespaceLabel: ""prometheusRulesExcludedFromEnforce: []excludedFromEnforcement: []queryLogFile: falsesampleLimit: falseenforcedKeepDroppedTargets: 0enforcedSampleLimit: falseenforcedTargetLimit: falseenforcedLabelLimit: falseenforcedLabelNameLengthLimit: falseenforcedLabelValueLengthLimit: falseallowOverlappingBlocks: falsenameValidationScheme: ""minReadySeconds: 0hostNetwork: falsehostAliases: []tracingConfig: {}serviceDiscoveryRole: ""additionalConfig: {}additionalConfigString: ""maximumStartupDurationSeconds: 0scrapeProtocols: []additionalRulesForClusterRole: []additionalServiceMonitors: []additionalPodMonitors: [] thanosRuler:enabled: falseannotations: {}serviceAccount:create: truename: ""annotations: {}podDisruptionBudget:enabled: falseminAvailable: 1unhealthyPodEvictionPolicy: AlwaysAllowingress:enabled: falseingressClassName: ""annotations: {}labels: {}hosts: []paths: []tls: []route:main:enabled: falseapiVersion: gateway.networking.k8s.io/v1kind: HTTPRouteannotations: {}labels: {}hostnames: []parentRefs: []httpsRedirect: falsematches:- path:type: PathPrefixvalue: /filters: []additionalRules: []service:enabled: trueannotations: {}labels: {}clusterIP: ""ipDualStack:enabled: falseipFamilies: ["IPv6", "IPv4"]ipFamilyPolicy: "PreferDualStack"port: 10902targetPort: 10902nodePort: 30905additionalPorts: []externalIPs: []loadBalancerIP: ""loadBalancerSourceRanges: []externalTrafficPolicy: Clustertype: ClusterIPserviceMonitor:selfMonitor: trueinterval: ""additionalLabels: {}sampleLimit: 0targetLimit: 0labelLimit: 0labelNameLengthLimit: 0labelValueLengthLimit: 0proxyUrl: ""scheme: ""tlsConfig: {}bearerTokenFile:metricRelabelings: []relabelings: []additionalEndpoints: []thanosRulerSpec:podMetadata: {}serviceName:image:registry: quay.iorepository: thanos/thanostag: v0.39.2sha: ""ruleNamespaceSelector: {}ruleSelectorNilUsesHelmValues: trueruleSelector: {}logFormat: logfmtlogLevel: inforeplicas: 1retention: 720hevaluationInterval: ""storage: {}alertmanagersConfig:existingSecret: {}secret: {}externalPrefix:externalPrefixNilUsesHelmValues: trueroutePrefix: /objectStorageConfig:existingSecret: {}secret: {}alertDropLabels: []queryEndpoints: []queryConfig:existingSecret: {}secret: {}labels: {}paused: falseadditionalArgs: []nodeSelector: {}resources: {}podAntiAffinity: "soft"podAntiAffinityTopologyKey: kubernetes.io/hostnameaffinity: {}tolerations: []topologySpreadConstraints: []securityContext:runAsGroup: 2000runAsNonRoot: truerunAsUser: 1000fsGroup: 2000seccompProfile:type: RuntimeDefaultlistenLocal: falsecontainers: []volumes: []volumeMounts: []initContainers: []priorityClassName: ""portName: "web"web: {}additionalConfig: {}additionalConfigString: ""extraSecret:annotations: {}data: {} cleanPrometheusOperatorObjectNames: false extraManifests: null -
部署Grafana mimir
-
View Codeglobal:serviceAccountName: mimir-serviceaccountnamespace: mimir # 替换为实际命名空间 serviceAccount:create: truename: mimir-serviceaccountannotations:eks.amazonaws.com/role-arn: arn:aws:iam::122345678:role/infra-mimir-role minio:enabled: false mimir:disableCachingValidation: truestructuredConfig:multitenancy_enabled: false # 禁用多租户模式common:storage:backend: s3s3:endpoint: s3.ap-northeast-1.amazonaws.combucket_name: aws-jp-prod-mimirregion: ap-northeast-1signature_version: v4 # AWS S3兼容的签名版本blocks_storage:backend: s3tsdb:retention_period: 8760h # 数据保留1年block_ranges_period: [2h, 12h, 24h, 168h, 672h] # 块聚合周期wal_compression_enabled: trueusage_stats:enabled: falsecompactor:compaction_concurrency: 4 # 合并并发度block_ranges: [2h, 12h, 24h, 168h, 672h] # 与块存储周期匹配limits:ingestion_rate: 400000 # 每秒 ingestion 速率限制ingestion_burst_size: 2000000 # 突发 ingestion 限制max_global_series_per_user: 5000000 # 全局序列限制max_global_series_per_metric: 800000 # 单指标序列限制max_query_lookback: 744hingester:ring:heartbeat_timeout: 1mheartbeat_period: 15sruntimeConfig:overrides:anonymous: # 匿名用户配置(单租户模式)ingestion_rate: 400000ingestion_burst_size: 2000000max_global_series_per_user: 5000000max_global_series_per_metric: 800000max_query_lookback: 744hingester_limits:max_ingestion_rate: 400000max_series: 500000 # Ingester最大序列数distributor_limits:max_ingestion_rate: 300000max_inflight_push_requests: 30000 # 最大并发推送请求 alertmanager:enabled: false distributor:enabled: truereplicas: 2 # 冗余副本数resources:requests: { cpu: 2024m, memory: 4096Mi }limits: { cpu: 4000m, memory: 8Gi }podDisruptionBudget:maxUnavailable: 1 ingester:enabled: truereplicas: 6 # 大于replication_factor(3)podManagementPolicy: Parallelresources:requests: { cpu: 500m, memory: 8Gi }limits: { cpu: 4096m, memory: 16Gi }zone:enabled: truezones:- name: zone-anodeSelector:topology.kubernetes.io/zone: ap-northeast-1areplicas: 2- name: zone-bnodeSelector:topology.kubernetes.io/zone: ap-northeast-1breplicas: 2- name: zone-cnodeSelector:topology.kubernetes.io/zone: ap-northeast-1creplicas: 2podAntiAffinity:requiredDuringSchedulingIgnoredDuringExecution: # 将 preferred 改为 required- labelSelector:matchExpressions:- key: app.kubernetes.io/componentoperator: Invalues: ["ingester"]topologyKey: "kubernetes.io/hostname"extraPorts:- name: http-metricscontainerPort: 8080protocol: TCPstartupProbe:httpGet:path: /readyport: http-metricsinitialDelaySeconds: 300periodSeconds: 15timeoutSeconds: 30failureThreshold: 10livenessProbe:httpGet:path: /readyport: http-metricsinitialDelaySeconds: 600periodSeconds: 15timeoutSeconds: 30failureThreshold: 3successThreshold: 1readinessProbe:httpGet:path: /readyport: http-metricsinitialDelaySeconds: 600periodSeconds: 15timeoutSeconds: 30failureThreshold: 3 successThreshold: 1podDisruptionBudget:minAvailable: 5 # 使用minAvailable而不是maxUnavailable querier:enabled: truereplicas: 3resources:requests: { cpu: 500m, memory: 2048Mi }limits: { cpu: 2000m, memory: 4Gi }podAntiAffinity:preferredDuringSchedulingIgnoredDuringExecution:- weight: 100podAffinityTerm:labelSelector:matchExpressions:- key: app.kubernetes.io/componentoperator: Invalues: ["querier"]topologyKey: "kubernetes.io/hostname"podDisruptionBudget:maxUnavailable: 1 query-frontend:enabled: truereplicas: 3resources:requests: { cpu: 500m, memory: 2048Mi }limits: { cpu: 4000m, memory: 16Gi }podAntiAffinity:preferredDuringSchedulingIgnoredDuringExecution:- weight: 100podAffinityTerm:labelSelector:matchExpressions:- key: app.kubernetes.io/componentoperator: Invalues: ["query-frontend"]topologyKey: "kubernetes.io/hostname"podDisruptionBudget:maxUnavailable: 1 compactor:enabled: truereplicas: 3podDisruptionBudget:maxUnavailable: 1 podManagementPolicy: Parallelstrategy:type: RollingUpdateresources:requests: { cpu: 500m, memory: 2048Mi }limits: { cpu: 4000m, memory: 4Gi }podAntiAffinity:preferredDuringSchedulingIgnoredDuringExecution:- weight: 100podAffinityTerm:labelSelector:matchExpressions:- key: app.kubernetes.io/componentoperator: Invalues: ["compactor"]topologyKey: "kubernetes.io/hostname" store_gateway:replicas: 3podManagementPolicy: Parallelresources:requests: { cpu: 1000m, memory: 4Gi } # 足够内存加载历史块limits: { cpu: 4000m, memory: 8Gi }podAntiAffinity:preferredDuringSchedulingIgnoredDuringExecution:- weight: 100podAffinityTerm:labelSelector:matchExpressions:- key: app.kubernetes.io/componentoperator: Invalues: ["store-gateway"]topologyKey: "kubernetes.io/hostname" nginx:enabled: falseimage:registry: public.ecr.awsrepository: nginx/nginx-unprivilegedtag: 1.27-alpinepullPolicy: IfNotPresentresources:requests: { cpu: 100m, memory: 128Mi }limits: { cpu: 500m, memory: 512Mi } gateway:enabled: trueenabledNonEnterprise: truereplicas: 2autoscaling:enabled: trueminReplicas: 2maxReplicas: 4strategy:type: RollingUpdaterollingUpdate:maxUnavailable: 2maxSurge: 15%resources:requests: { cpu: 1000m, memory: 2048Mi }limits: { cpu: 2000m, memory: 4096Mi } ruler:enabled: false memcached:image:repository: memcachedtag: 1.6.38-alpinepullPolicy: IfNotPresentpodSecurityContext: {}priorityClassName: nullcontainerSecurityContext:readOnlyRootFilesystem: truecapabilities:drop: [ALL]allowPrivilegeEscalation: false index-cache:enabled: truereplicas: 3port: 11211allocatedMemory: 2048maxItemMemory: 5connectionLimit: 16384podDisruptionBudget:maxUnavailable: 1podManagementPolicy: ParallelterminationGracePeriodSeconds: 30statefulStrategy:type: RollingUpdateextraArgs: {}resources: requests: { cpu: 100m, memory: 2048Mi }limits: { cpu: 1000m, memory: 3096Mi } metadata-cache:enabled: truereplicas: 3port: 11211allocatedMemory: 1024maxItemMemory: 5connectionLimit: 16384podDisruptionBudget:maxUnavailable: 1podManagementPolicy: ParallelterminationGracePeriodSeconds: 30statefulStrategy:type: RollingUpdateextraArgs: {}resources: requests: { cpu: 100m, memory: 1024Mi }limits: { cpu: 1000m, memory: 3096Mi } results-cache:enabled: truereplicas: 3port: 11211allocatedMemory: 2048maxItemMemory: 5connectionLimit: 16384podDisruptionBudget:maxUnavailable: 1podManagementPolicy: ParallelterminationGracePeriodSeconds: 30statefulStrategy:type: RollingUpdateextraArgs: {}resources: requests: { cpu: 100m, memory: 2048Mi }limits: { cpu: 1000m, memory: 3096Mi } -
部署 N9e夜莺
-
values-n9e.yamlexpose:type: clusterIPtls:enabled: falsecertSource: autoauto:commonName: ""secret:secretName: ""ingress:hosts:web: n9e.ltpin.concontroller: defaultkubeVersionOverride: ""annotations: {}nightingale:annotations: {}clusterIP:name: n9eannotations: {}ports:httpPort: 80httpsPort: 443nodePort:name: nightingaleports:http:port: 80nodePort: 30007https:port: 443nodePort: 30009loadBalancer:name: nightingaleIP: ""ports:httpPort: 80httpsPort: 443annotations: {}sourceRanges: [] externalURL: http://hello.n9e.info ipFamily:ipv6:enabled: falseipv4:enabled: true persistence:enabled: trueresourcePolicy: "keep"persistentVolumeClaim:database:existingClaim: ""storageClass: "efs-sc"subPath: ""accessMode: ReadWriteOncesize: 50Giredis:existingClaim: ""storageClass: "efs-sc"subPath: ""accessMode: ReadWriteOncesize: 50Giprometheus:existingClaim: ""storageClass: ""subPath: ""accessMode: ReadWriteOncesize: 4Gi imagePullPolicy: IfNotPresent imagePullSecrets: updateStrategy:type: RollingUpdate logLevel: info caSecretName: "" secretKey: "not-a-secure-key" nginx:image:repository: docker.io/library/nginxtag: stable-alpineserviceAccountName: ""automountServiceAccountToken: falsereplicas: 2resources:requests:memory: 200Micpu: 100mlimits:memory: 512Micpu: 1000mnodeSelector: {}tolerations: []affinity: {}podAnnotations: {}priorityClassName: database:external:host: "infra-mysql.prod.internal.123.com"port: "3306"name: "n9e_v6"username: "root"password: "123456789"sslmode: "disable"maxIdleConns: 100maxOpenConns: 900podAnnotations: {} redis:type: internalinternal:serviceAccountName: ""automountServiceAccountToken: falseimage:repository: 123456789.dkr.ecr.ap-northeast-1.amazonaws.com/sretoolstag: redis6.2resources:requests:memory: 200Micpu: 100mlimits:memory: 512Micpu: 1000mnodeSelector: {}tolerations: []affinity: {}priorityClassName:external:addr: "192.168.0.2:6379"sentinelMasterSet: ""username: ""password: ""mode: "standalone"podAnnotations: {} prometheus:type: external external:host: "kube-prom-stack-kube-prome-prometheus.monitoring"port: "9090" categraf:type: external n9e:type: internalinternal:replicas: 1serviceAccountName: ""automountServiceAccountToken: falseimage:repository: flashcatcloud/nightingaletag: 8.2.2resources:requests:memory: 500Micpu: 200mlimits:memory: 2048Micpu: 2000mnodeSelector: { }tolerations: [ ]affinity: { }priorityClassName:ibexEnable: falseibexPort: "20090"external:port: "8080"ibexEnable: falseibexPort: "20090"podAnnotations: { } -
ingress-n9e.yamlapiVersion: networking.k8s.io/v1 kind: Ingress metadata:annotations:# 配置监听端口:HTTP 80 和 HTTPS 443alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'# 自动将HTTP流量重定向到HTTPSalb.ingress.kubernetes.io/ssl-redirect: '443'# 指定AWS证书ARNalb.ingress.kubernetes.io/certificate-arn: arn:aws:acm:ap-northeast-1:123456789:certificate/11111111111name: n9e-ingressnamespace: n9e spec:ingressClassName: albrules:- host: n9e.123.com http:paths:- backend:service:name: n9e-nightingale-centerport:number: 80path: /pathType: Prefix -
Lark通知模版{{ if $event.IsRecovered }} {{- if ne $event.Cate "host"}} **告警集群:** {{$event.Cluster}}{{end}} **级别状态:** S{{$event.Severity}} Recovered **告警名称:** {{$event.RuleName}} **事件标签:** {{range $i, $tag := $event.TagsJSON}} - {{$tag}} {{end}} **恢复时间:** {{timeformat $event.LastEvalTime}} {{$time_duration := sub now.Unix $event.FirstTriggerTime }}{{if $event.IsRecovered}}{{$time_duration = sub $event.LastEvalTime $event.FirstTriggerTime }}{{end}}**持续时长**: {{humanizeDurationInterface $time_duration}} **告警描述:** **服务已恢复** {{- else }} {{- if ne $event.Cate "host"}} **告警集群:** {{$event.Cluster}}{{end}} **级别状态:** S{{$event.Severity}} Triggered **告警名称:** {{$event.RuleName}} **事件标签:** {{range $i, $tag := $event.TagsJSON}} - {{$tag}} {{end}} **触发时间:** {{timeformat $event.TriggerTime}} **发送时间:** {{timestamp}} **触发时值:** {{$event.TriggerValue}} {{$time_duration := sub now.Unix $event.FirstTriggerTime }}{{if $event.IsRecovered}}{{$time_duration = sub $event.LastEvalTime $event.FirstTriggerTime }}{{end}}**持续时长**: {{humanizeDurationInterface $time_duration}} {{if $event.RuleNote }}**告警描述:** **{{$event.RuleNote}}**{{end}} {{- end -}} {{$domain := "https://n9e.123.com" }} [事件详情]({{$domain}}/alert-his-events/{{$event.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?busiGroup={{$event.GroupId}}&cate={{$event.Cate}}&datasource_ids={{$event.DatasourceId}}&prod={{$event.RuleProd}}{{range $key, $value := $event.TagsMap}}&tags={{$key}}%3D{{$value}}{{end}})|[查看曲线]({{$domain}}/metric/explorer?data_source_id={{$event.DatasourceId}}&data_source_name=prometheus&mode=graph&prom_ql={{$event.PromQl|escape}}) ---title----- {{if $event.IsRecovered}}✅ 恢复{{else}}⚠️ 告警{{end}} - {{$event.RuleName}} -
