实现链路追踪
问题
如何用 Go 实现一个简易的分布式链路追踪系统?
答案
核心概念
- Trace:一次完整请求的调用链
- Span:一个操作单元(一次 RPC、一次 DB 查询)
- TraceID:贯穿整个调用链的唯一标识
- SpanID:每个 Span 的唯一标识
- ParentSpanID:父 Span
手写简易链路追踪
type Span struct {
TraceID string `json:"trace_id"`
SpanID string `json:"span_id"`
ParentSpanID string `json:"parent_span_id,omitempty"`
Operation string `json:"operation"`
StartTime time.Time `json:"start_time"`
Duration time.Duration `json:"duration"`
Tags map[string]string `json:"tags,omitempty"`
Status string `json:"status"` // ok / error
}
type traceKey struct{}
// 从 Context 提取 Span
func SpanFromContext(ctx context.Context) *Span {
if span, ok := ctx.Value(traceKey{}).(*Span); ok {
return span
}
return nil
}
// 开始新 Span
func StartSpan(ctx context.Context, operation string) (context.Context, *Span) {
parent := SpanFromContext(ctx)
span := &Span{
SpanID: generateID(),
Operation: operation,
StartTime: time.Now(),
Tags: make(map[string]string),
}
if parent != nil {
span.TraceID = parent.TraceID
span.ParentSpanID = parent.SpanID
} else {
span.TraceID = generateID() // 根 Span 创建新 TraceID
}
return context.WithValue(ctx, traceKey{}, span), span
}
// 结束 Span
func (s *Span) Finish() {
s.Duration = time.Since(s.StartTime)
// 上报到收集器
report(s)
}
func (s *Span) SetTag(key, value string) {
s.Tags[key] = value
}
func (s *Span) SetError(err error) {
s.Status = "error"
s.Tags["error"] = err.Error()
}
func generateID() string {
b := make([]byte, 8)
rand.Read(b)
return hex.EncodeToString(b)
}
HTTP 中间件注入
// 服务端中间件:提取或创建 TraceID
func TracingMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
traceID := c.GetHeader("X-Trace-ID")
ctx := c.Request.Context()
if traceID != "" {
// 从上游传来的 TraceID
parentSpan := &Span{TraceID: traceID, SpanID: c.GetHeader("X-Span-ID")}
ctx = context.WithValue(ctx, traceKey{}, parentSpan)
}
ctx, span := StartSpan(ctx, c.FullPath())
span.SetTag("http.method", c.Request.Method)
span.SetTag("http.url", c.Request.URL.String())
c.Request = c.Request.WithContext(ctx)
c.Next()
span.SetTag("http.status", strconv.Itoa(c.Writer.Status()))
if c.Writer.Status() >= 400 {
span.Status = "error"
} else {
span.Status = "ok"
}
span.Finish()
}
}
// 客户端:调用下游时传递 TraceID
func HTTPCallWithTrace(ctx context.Context, method, url string, body io.Reader) (*http.Response, error) {
ctx, span := StartSpan(ctx, "HTTP "+method+" "+url)
defer span.Finish()
req, _ := http.NewRequestWithContext(ctx, method, url, body)
// 注入 TraceID 到 Header
parentSpan := SpanFromContext(ctx)
req.Header.Set("X-Trace-ID", parentSpan.TraceID)
req.Header.Set("X-Span-ID", parentSpan.SpanID)
resp, err := http.DefaultClient.Do(req)
if err != nil {
span.SetError(err)
return nil, err
}
span.SetTag("http.status", strconv.Itoa(resp.StatusCode))
return resp, nil
}
数据库 Span
func DBQueryWithTrace(ctx context.Context, db *gorm.DB, dest interface{}, query string, args ...interface{}) error {
ctx, span := StartSpan(ctx, "MySQL "+query)
defer span.Finish()
span.SetTag("db.type", "mysql")
span.SetTag("db.statement", query)
err := db.WithContext(ctx).Raw(query, args...).Scan(dest).Error
if err != nil {
span.SetError(err)
}
return err
}
使用 OpenTelemetry(生产推荐)
import (
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/trace"
)
func HandleRequest(ctx context.Context) {
tracer := otel.Tracer("my-service")
ctx, span := tracer.Start(ctx, "HandleRequest")
defer span.End()
// 自动传递 Context
result, err := callDownstream(ctx)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
}
}
常见面试问题
Q1: TraceID 如何在微服务间传递?
答案:
- HTTP:通过 Header(
X-Trace-ID、W3Ctraceparent) - gRPC:通过 Metadata
- 消息队列:通过消息属性/Header
- 关键:每个服务的中间件负责提取和注入
Q2: 链路追踪对性能有影响吗?
答案:
- Span 创建开销极小(纳秒级)
- 主要开销在上报:用异步批量上报减少影响
- 可以配置采样率(如 10%)降低数据量