List of example alerts
metoro.io/alert: "true"
in your ConfigMap.
kind: ConfigMap
apiVersion: v1
metadata:
name: alert-config
labels:
metoro.io/alert: "true"
data:
alert.yaml: |
alerts:
- metadata:
id: "cpu-usage-alert-001"
name: "High CPU Usage"
description: "Alert when CPU usage exceeds 80% for 5 minutes"
type: timeseries
timeseries:
expression:
metoroQLTimeseries:
query: sum(container_resources_cpu_usage_seconds_total{service_name="/k8s/default/myimportantservice"}) / 60 / sum(container_resources_cpu_limit_cores{service_name="/k8s/default/myimportantservice"})
bucketSize: 60
evaluationRules:
- name: "warning"
type: static
static:
operators:
- operator: greaterThan
threshold: 80
persistenceSettings:
datapointsToAlarm: 5
datapointsInEvaluationWindow: 5
missingDatapointBehavior: notBreaching
- metadata:
id: "error-log-alert-001"
name: "High Error Rate"
description: "Alert when error logs exceed 100 in 15 minutes"
type: timeseries
timeseries:
expression:
metoroQLTimeseries:
query: count(logs{log_level="error"})
bucketSize: 60
evaluationRules:
- name: "critical"
type: static
static:
operators:
- operator: greaterThan
threshold: 100
persistenceSettings:
datapointsToAlarm: 15
datapointsInEvaluationWindow: 15
missingDatapointBehavior: notBreaching
- metadata:
id: "high-latency-alert-001"
name: "High Latency"
description: "Alert when HTTP request duration exceeds 2 seconds for 5 minutes"
type: timeseries
timeseries:
expression:
metoroQLTimeseries:
query: trace_duration_quantile(0.99, traces)
bucketSize: 60
evaluationRules:
- name: "warning"
type: static
static:
operators:
- operator: greaterThan
threshold: 2
persistenceSettings:
datapointsToAlarm: 5
datapointsInEvaluationWindow: 5
missingDatapointBehavior: notBreaching
- metadata:
id: "latency-with-notifications-001"
name: "High Latency with Notifications"
description: "Alert when HTTP request duration exceeds 2 seconds for 5 minutes with notifications"
type: timeseries
timeseries:
expression:
metoroQLTimeseries:
query: trace_duration_quantile(0.99, traces)
bucketSize: 60
evaluationRules:
- name: "Warning"
type: static
static:
operators:
- operator: greaterThan
threshold: 2
persistenceSettings:
datapointsToAlarm: 5
datapointsInEvaluationWindow: 5
missingDatapointBehavior: notBreaching
actions:
- type: slack
slackDestination:
channel: "alerts-critical"
- type: email
emailDestination:
emails:
- "oncall@example.com"
- "sre-team@example.com"
Was this page helpful?