Spring Boot 可观测性
1. 可观测性概述
1.1 三大支柱
| 支柱 | 说明 | 工具 |
|---|---|---|
| 日志(Logs) | 记录事件和错误 | Logback、ELK |
| 指标(Metrics) | 量化系统状态 | Micrometer、Prometheus |
| 追踪(Traces) | 请求链路追踪 | Zipkin、Jaeger |
1.2 Micrometer 简介
Micrometer 是一个度量门面,为 Java 应用提供与供应商无关的指标收集接口。
2. Actuator 监控
2.1 添加依赖
xml
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>2.2 配置端点
yaml
management:
endpoints:
web:
exposure:
include: health,info,metrics,prometheus,loggers,env
base-path: /actuator
endpoint:
health:
show-details: always
probes:
enabled: true
loggers:
enabled: true
prometheus:
enabled: true
info:
env:
enabled: true
java:
enabled: true
os:
enabled: true2.3 常用端点
| 端点 | 说明 |
|---|---|
| /actuator/health | 健康检查 |
| /actuator/info | 应用信息 |
| /actuator/metrics | 指标列表 |
| /actuator/metrics/ | 具体指标 |
| /actuator/prometheus | Prometheus 格式 |
| /actuator/loggers | 日志配置 |
| /actuator/env | 环境变量 |
2.4 自定义健康检查
java
@Component
public class DatabaseHealthIndicator implements HealthIndicator {
private final DataSource dataSource;
@Override
public Health health() {
try (Connection connection = dataSource.getConnection()) {
if (connection.isValid(1)) {
return Health.up()
.withDetail("database", "MySQL")
.withDetail("validationQuery", "SELECT 1")
.build();
}
} catch (SQLException e) {
return Health.down()
.withException(e)
.build();
}
return Health.down().build();
}
}
@Component
public class RedisHealthIndicator implements HealthIndicator {
private final RedisTemplate<String, Object> redisTemplate;
@Override
public Health health() {
try {
String pong = redisTemplate.getConnectionFactory()
.getConnection()
.ping();
return Health.up()
.withDetail("redis", pong)
.build();
} catch (Exception e) {
return Health.down()
.withException(e)
.build();
}
}
}3. 指标收集
3.1 添加依赖
xml
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-registry-prometheus</artifactId>
</dependency>3.2 自定义指标
java
@Service
public class OrderService {
private final Counter orderCounter;
private final Timer orderTimer;
private final Gauge orderGauge;
public OrderService(MeterRegistry registry) {
this.orderCounter = Counter.builder("orders.created")
.description("订单创建数量")
.tag("type", "online")
.register(registry);
this.orderTimer = Timer.builder("orders.processing.time")
.description("订单处理时间")
.register(registry);
this.orderGauge = Gauge.builder("orders.pending", this::getPendingOrders)
.description("待处理订单数")
.register(registry);
}
public Order createOrder(OrderRequest request) {
return orderTimer.record(() -> {
orderCounter.increment();
// 创建订单逻辑
return doCreateOrder(request);
});
}
private int getPendingOrders() {
return orderRepository.countByStatus(OrderStatus.PENDING);
}
}3.3 指标类型
java
@Configuration
public class MetricsConfig {
@Bean
public MeterRegistryCustomizer<MeterRegistry> metricsCommonTags() {
return registry -> registry.config()
.commonTags("application", "myapp")
.commonTags("env", "prod");
}
}
@Service
public class MetricsService {
private final MeterRegistry registry;
public void recordCounter() {
Counter counter = Counter.builder("api.requests")
.tag("endpoint", "/users")
.tag("method", "GET")
.description("API 请求计数")
.register(registry);
counter.increment();
}
public void recordGauge() {
AtomicInteger value = new AtomicInteger(0);
Gauge.builder("queue.size", value, AtomicInteger::get)
.description("队列大小")
.register(registry);
value.set(100);
}
public void recordTimer() {
Timer timer = Timer.builder("operation.duration")
.description("操作耗时")
.register(registry);
timer.record(() -> {
// 执行操作
});
}
public void recordSummary() {
DistributionSummary summary = DistributionSummary.builder("response.size")
.description("响应大小")
.baseUnit("bytes")
.register(registry);
summary.record(1024);
}
}3.4 AOP 指标
java
@Aspect
@Component
public class MetricsAspect {
private final MeterRegistry registry;
@Around("@annotation(Timed)")
public Object timeMethod(ProceedingJoinPoint joinPoint) throws Throwable {
Timed timed = ((MethodSignature) joinPoint.getSignature())
.getMethod()
.getAnnotation(Timed.class);
Timer.Sample sample = Timer.start(registry);
try {
Object result = joinPoint.proceed();
sample.stop(Timer.builder(timed.value())
.description(timed.description())
.register(registry));
return result;
} catch (Exception e) {
sample.stop(Timer.builder(timed.value() + ".error")
.register(registry));
throw e;
}
}
}
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface Timed {
String value();
String description() default "";
}
@Service
public class UserService {
@Timed(value = "user.find", description = "查找用户耗时")
public User findById(Long id) {
return userRepository.findById(id).orElse(null);
}
}4. 分布式追踪
4.1 添加依赖
xml
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-tracing-bridge-brave</artifactId>
</dependency>
<dependency>
<groupId>io.zipkin.reporter2</groupId>
<artifactId>zipkin-reporter-brave</artifactId>
</dependency>4.2 配置追踪
yaml
management:
tracing:
enabled: true
sampling:
probability: 1.0
zipkin:
tracing:
endpoint: http://localhost:9411/api/v2/spans
spring:
application:
name: myapp4.3 自定义 Span
java
@Service
public class OrderService {
private final Tracer tracer;
public Order createOrder(OrderRequest request) {
Span span = tracer.nextSpan().name("create-order");
try (Tracer.SpanInScope ws = tracer.withSpan(span.start())) {
span.tag("order.type", request.getType());
span.event("order.created");
Order order = doCreateOrder(request);
span.tag("order.id", order.getId().toString());
return order;
} finally {
span.end();
}
}
@NewSpan("process-payment")
public Payment processPayment(Long orderId) {
// 自动创建 Span
return paymentService.process(orderId);
}
@NewSpan
@SpanTag(key = "order.id", expression = "#orderId")
public Order getOrder(@SpanTag("order.source") String source, Long orderId) {
return orderRepository.findById(orderId).orElse(null);
}
}4.4 追踪日志
yaml
logging:
pattern:
level: "%5p [${spring.application.name:},%X{traceId:-},%X{spanId:-}]"java
@Slf4j
@Service
public class TracedService {
public void tracedMethod() {
log.info("这条日志会包含 traceId 和 spanId");
}
}5. 日志管理
5.1 日志配置
yaml
logging:
level:
root: INFO
com.example: DEBUG
file:
name: logs/application.log
max-size: 10MB
max-history: 30
pattern:
file: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] [%X{traceId:-},%X{spanId:-}] %-5level %logger{36} - %msg%n"
console: "%d{yyyy-MM-dd HH:mm:ss.SSS} %highlight(%-5level) [%thread] [%X{traceId:-},%X{spanId:-}] %cyan(%logger{36}) - %msg%n"5.2 结构化日志
xml
<dependency>
<groupId>net.logstash.logback</groupId>
<artifactId>logstash-logback-encoder</artifactId>
<version>7.4</version>
</dependency>xml
<!-- logback-spring.xml -->
<configuration>
<appender name="JSON" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/application.json</file>
<encoder class="net.logstash.logback.encoder.LogstashEncoder">
<includeMdcKeyName>traceId</includeMdcKeyName>
<includeMdcKeyName>spanId</includeMdcKeyName>
<customFields>{"app":"myapp"}</customFields>
</encoder>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>logs/application.%d{yyyy-MM-dd}.json</fileNamePattern>
<maxHistory>30</maxHistory>
</rollingPolicy>
</appender>
<root level="INFO">
<appender-ref ref="JSON"/>
</root>
</configuration>5.3 MDC 日志
java
@Component
public class TraceFilter implements Filter {
@Override
public void doFilter(ServletRequest request, ServletResponse response,
FilterChain chain) throws IOException, ServletException {
String traceId = UUID.randomUUID().toString().replace("-", "");
MDC.put("traceId", traceId);
try {
chain.doFilter(request, response);
} finally {
MDC.clear();
}
}
}
@Slf4j
@Service
public class MdcService {
public void logWithContext() {
MDC.put("userId", "12345");
MDC.put("operation", "create-order");
try {
log.info("创建订单");
} finally {
MDC.remove("userId");
MDC.remove("operation");
}
}
}6. Prometheus 集成
6.1 配置 Prometheus
yaml
# prometheus.yml
scrape_configs:
- job_name: 'spring-boot'
metrics_path: '/actuator/prometheus'
static_configs:
- targets: ['localhost:8080']6.2 Grafana 仪表盘
java
@Configuration
public class GrafanaConfig {
@Bean
public MeterRegistryCustomizer<MeterRegistry> commonTags() {
return registry -> registry.config()
.commonTags("application", "myapp")
.commonTags("instance", InetAddress.getLocalHost().getHostName());
}
}6.3 告警规则
yaml
# alert.rules.yml
groups:
- name: spring-boot-alerts
rules:
- alert: HighErrorRate
expr: rate(http_server_requests_seconds_count{status=~"5.."}[5m]) > 0.1
for: 5m
labels:
severity: critical
annotations:
summary: "高错误率"
description: "错误率超过 10%"
- alert: HighLatency
expr: histogram_quantile(0.95, rate(http_server_requests_seconds_bucket[5m])) > 1
for: 5m
labels:
severity: warning
annotations:
summary: "高延迟"
description: "P95 延迟超过 1 秒"7. 健康检查
7.1 Kubernetes 探针
yaml
management:
endpoint:
health:
probes:
enabled: true
health:
livenessstate:
enabled: true
readinessstate:
enabled: trueyaml
# Kubernetes Deployment
livenessProbe:
httpGet:
path: /actuator/health/liveness
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /actuator/health/readiness
port: 8080
initialDelaySeconds: 10
periodSeconds: 57.2 自定义健康状态
java
@Component
public class ReadinessHealthIndicator implements HealthIndicator {
private volatile boolean ready = false;
@Override
public Health health() {
if (ready) {
return Health.up().build();
}
return Health.down().build();
}
public void setReady(boolean ready) {
this.ready = ready;
}
}8. 小结
本章学习了 Spring Boot 可观测性的核心内容:
| 内容 | 要点 |
|---|---|
| Actuator | 健康检查、端点配置 |
| 指标收集 | Counter、Gauge、Timer |
| 分布式追踪 | Span、Trace、Zipkin |
| 日志管理 | 结构化日志、MDC |
| Prometheus | 指标暴露、Grafana |
| 健康检查 | 自定义指标、K8s 探针 |
下一章将学习 Spring Boot 部署。