Back to Journal
System Design

Complete Guide to Saga Pattern Implementation with Go

A comprehensive guide to implementing Saga Pattern Implementation using Go, covering architecture, code examples, and production-ready patterns.

Muneer Puthiya Purayil 18 min read

Go's concurrency model and explicit error handling make it well-suited for implementing the saga pattern. This guide covers building a production-grade saga orchestrator in Go — from the core state machine and step execution to persistent storage, compensation handling, and observability. Every example compiles and runs.

Core Types

Start with the types that define saga structure and state.

go
1package saga
2 
3import (
4 "context"
5 "encoding/json"
6 "fmt"
7 "time"
8)
9 
10type StepStatus string
11type FlowStatus string
12 
13const (
14 StepPending StepStatus = "pending"
15 StepRunning StepStatus = "running"
16 StepCompleted StepStatus = "completed"
17 StepFailed StepStatus = "failed"
18 StepCompensated StepStatus = "compensated"
19 
20 FlowRunning FlowStatus = "running"
21 FlowCompleted FlowStatus = "completed"
22 FlowCompensating FlowStatus = "compensating"
23 FlowFailed FlowStatus = "failed"
24)
25 
26type SagaState struct {
27 ID string `json:"id" db:"id"`
28 SagaType string `json:"saga_type" db:"saga_type"`
29 Status FlowStatus `json:"status" db:"status"`
30 CurrentStep int `json:"current_step" db:"current_step"`
31 Context json.RawMessage `json:"context" db:"context"`
32 CompletedSteps []string `json:"completed_steps" db:"completed_steps"`
33 Error string `json:"error,omitempty" db:"error"`
34 Version int `json:"version" db:"version"`
35 CreatedAt time.Time `json:"created_at" db:"created_at"`
36 UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
37}
38 
39type StepDefinition[T any] struct {
40 Name string
41 Execute func(ctx context.Context, sagaCtx *T) error
42 Compensate func(ctx context.Context, sagaCtx *T) error
43 Timeout time.Duration
44 RetryPolicy *RetryPolicy
45}
46 
47type RetryPolicy struct {
48 MaxAttempts int
49 InitialBackoff time.Duration
50 Multiplier float64
51 MaxBackoff time.Duration
52}
53 

Saga Orchestrator

The orchestrator manages step execution, state persistence, and compensation.

go
1package saga
2 
3import (
4 "context"
5 "encoding/json"
6 "fmt"
7 "log/slog"
8 "math"
9 "time"
10 
11 "github.com/google/uuid"
12)
13 
14type Store interface {
15 Save(ctx context.Context, state *SagaState) error
16 Get(ctx context.Context, id string) (*SagaState, error)
17 SaveWithOptimisticLock(ctx context.Context, state *SagaState) error
18}
19 
20type Orchestrator[T any] struct {
21 steps []StepDefinition[T]
22 store Store
23 logger *slog.Logger
24}
25 
26func NewOrchestrator[T any](store Store, logger *slog.Logger, steps ...StepDefinition[T]) *Orchestrator[T] {
27 return &Orchestrator[T]{
28 steps: steps,
29 store: store,
30 logger: logger,
31 }
32}
33 
34func (o *Orchestrator[T]) Execute(ctx context.Context, sagaType string, sagaCtx *T) (string, error) {
35 ctxBytes, err := json.Marshal(sagaCtx)
36 if err != nil {
37 return "", fmt.Errorf("marshal saga context: %w", err)
38 }
39 
40 state := &SagaState{
41 ID: uuid.New().String(),
42 SagaType: sagaType,
43 Status: FlowRunning,
44 CurrentStep: 0,
45 Context: ctxBytes,
46 CompletedSteps: make([]string, 0),
47 Version: 1,
48 CreatedAt: time.Now(),
49 UpdatedAt: time.Now(),
50 }
51 
52 if err := o.store.Save(ctx, state); err != nil {
53 return "", fmt.Errorf("save initial state: %w", err)
54 }
55 
56 o.logger.Info("saga started",
57 "saga_id", state.ID,
58 "saga_type", sagaType,
59 "steps", len(o.steps),
60 )
61 
62 if err := o.executeSteps(ctx, state, sagaCtx); err != nil {
63 return state.ID, err
64 }
65 
66 return state.ID, nil
67}
68 
69func (o *Orchestrator[T]) executeSteps(ctx context.Context, state *SagaState, sagaCtx *T) error {
70 for i := state.CurrentStep; i < len(o.steps); i++ {
71 step := o.steps[i]
72 
73 o.logger.Info("executing step",
74 "saga_id", state.ID,
75 "step", step.Name,
76 "index", i,
77 )
78 
79 if err := o.executeStepWithRetry(ctx, step, sagaCtx); err != nil {
80 o.logger.Error("step failed",
81 "saga_id", state.ID,
82 "step", step.Name,
83 "error", err,
84 )
85 
86 state.Status = FlowCompensating
87 state.Error = err.Error()
88 state.UpdatedAt = time.Now()
89 _ = o.store.SaveWithOptimisticLock(ctx, state)
90 
91 compErr := o.compensate(ctx, state, sagaCtx)
92 if compErr != nil {
93 state.Status = FlowFailed
94 state.Error = fmt.Sprintf("execute: %s; compensate: %s", err, compErr)
95 } else {
96 state.Status = FlowFailed
97 }
98 state.UpdatedAt = time.Now()
99 _ = o.store.SaveWithOptimisticLock(ctx, state)
100 
101 return fmt.Errorf("saga failed at step %s: %w", step.Name, err)
102 }
103 
104 state.CurrentStep = i + 1
105 state.CompletedSteps = append(state.CompletedSteps, step.Name)
106 state.UpdatedAt = time.Now()
107 
108 // Re-marshal context in case the step modified it
109 ctxBytes, _ := json.Marshal(sagaCtx)
110 state.Context = ctxBytes
111 
112 if err := o.store.SaveWithOptimisticLock(ctx, state); err != nil {
113 return fmt.Errorf("persist state after step %s: %w", step.Name, err)
114 }
115 }
116 
117 state.Status = FlowCompleted
118 state.UpdatedAt = time.Now()
119 _ = o.store.SaveWithOptimisticLock(ctx, state)
120 
121 o.logger.Info("saga completed", "saga_id", state.ID)
122 return nil
123}
124 
125func (o *Orchestrator[T]) executeStepWithRetry(ctx context.Context, step StepDefinition[T], sagaCtx *T) error {
126 policy := step.RetryPolicy
127 if policy == nil {
128 policy = &RetryPolicy{MaxAttempts: 1}
129 }
130 
131 var lastErr error
132 for attempt := 0; attempt < policy.MaxAttempts; attempt++ {
133 if attempt > 0 {
134 backoff := time.Duration(float64(policy.InitialBackoff) * math.Pow(policy.Multiplier, float64(attempt-1)))
135 if backoff > policy.MaxBackoff {
136 backoff = policy.MaxBackoff
137 }
138 time.Sleep(backoff)
139 }
140 
141 stepCtx := ctx
142 if step.Timeout > 0 {
143 var cancel context.CancelFunc
144 stepCtx, cancel = context.WithTimeout(ctx, step.Timeout)
145 defer cancel()
146 }
147 
148 lastErr = step.Execute(stepCtx, sagaCtx)
149 if lastErr == nil {
150 return nil
151 }
152 
153 o.logger.Warn("step attempt failed",
154 "step", step.Name,
155 "attempt", attempt+1,
156 "max_attempts", policy.MaxAttempts,
157 "error", lastErr,
158 )
159 }
160 
161 return lastErr
162}
163 
164func (o *Orchestrator[T]) compensate(ctx context.Context, state *SagaState, sagaCtx *T) error {
165 var compensationErrors []error
166 
167 // Compensate in reverse order
168 for i := len(state.CompletedSteps) - 1; i >= 0; i-- {
169 stepName := state.CompletedSteps[i]
170 step := o.findStep(stepName)
171 if step == nil {
172 continue
173 }
174 
175 o.logger.Info("compensating step",
176 "saga_id", state.ID,
177 "step", stepName,
178 )
179 
180 compCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
181 if err := step.Compensate(compCtx, sagaCtx); err != nil {
182 o.logger.Error("compensation failed",
183 "saga_id", state.ID,
184 "step", stepName,
185 "error", err,
186 )
187 compensationErrors = append(compensationErrors, fmt.Errorf("compensate %s: %w", stepName, err))
188 }
189 cancel()
190 }
191 
192 if len(compensationErrors) > 0 {
193 return fmt.Errorf("compensation errors: %v", compensationErrors)
194 }
195 return nil
196}
197 
198func (o *Orchestrator[T]) findStep(name string) *StepDefinition[T] {
199 for i := range o.steps {
200 if o.steps[i].Name == name {
201 return &o.steps[i]
202 }
203 }
204 return nil
205}
206 

PostgreSQL State Store

go
1package saga
2 
3import (
4 "context"
5 "database/sql"
6 "encoding/json"
7 "fmt"
8 "time"
9 
10 "github.com/lib/pq"
11)
12 
13type PostgresStore struct {
14 db *sql.DB
15}
16 
17func NewPostgresStore(db *sql.DB) *PostgresStore {
18 return &PostgresStore{db: db}
19}
20 
21func (s *PostgresStore) Save(ctx context.Context, state *SagaState) error {
22 query := `
23 INSERT INTO saga_state (id, saga_type, status, current_step, context, completed_steps, error, version, created_at, updated_at)
24 VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
25 ON CONFLICT (id) DO UPDATE SET
26 status = EXCLUDED.status,
27 current_step = EXCLUDED.current_step,
28 context = EXCLUDED.context,
29 completed_steps = EXCLUDED.completed_steps,
30 error = EXCLUDED.error,
31 version = EXCLUDED.version,
32 updated_at = EXCLUDED.updated_at
33 `
34 
35 _, err := s.db.ExecContext(ctx, query,
36 state.ID,
37 state.SagaType,
38 state.Status,
39 state.CurrentStep,
40 state.Context,
41 pq.Array(state.CompletedSteps),
42 state.Error,
43 state.Version,
44 state.CreatedAt,
45 state.UpdatedAt,
46 )
47 return err
48}
49 
50func (s *PostgresStore) Get(ctx context.Context, id string) (*SagaState, error) {
51 query := `
52 SELECT id, saga_type, status, current_step, context, completed_steps, error, version, created_at, updated_at
53 FROM saga_state WHERE id = $1
54 `
55 
56 state := &SagaState{}
57 var completedSteps []string
58 
59 err := s.db.QueryRowContext(ctx, query, id).Scan(
60 &state.ID,
61 &state.SagaType,
62 &state.Status,
63 &state.CurrentStep,
64 &state.Context,
65 pq.Array(&completedSteps),
66 &state.Error,
67 &state.Version,
68 &state.CreatedAt,
69 &state.UpdatedAt,
70 )
71 if err == sql.ErrNoRows {
72 return nil, nil
73 }
74 if err != nil {
75 return nil, err
76 }
77 
78 state.CompletedSteps = completedSteps
79 return state, nil
80}
81 
82func (s *PostgresStore) SaveWithOptimisticLock(ctx context.Context, state *SagaState) error {
83 query := `
84 UPDATE saga_state
85 SET status = $1, current_step = $2, context = $3, completed_steps = $4,
86 error = $5, version = version + 1, updated_at = $6
87 WHERE id = $7 AND version = $8
88 `
89 
90 result, err := s.db.ExecContext(ctx, query,
91 state.Status,
92 state.CurrentStep,
93 state.Context,
94 pq.Array(state.CompletedSteps),
95 state.Error,
96 time.Now(),
97 state.ID,
98 state.Version,
99 )
100 if err != nil {
101 return err
102 }
103 
104 rows, err := result.RowsAffected()
105 if err != nil {
106 return err
107 }
108 if rows == 0 {
109 return fmt.Errorf("optimistic lock conflict for saga %s", state.ID)
110 }
111 
112 state.Version++
113 return nil
114}
115 

Example: Order Fulfillment Saga

go
1package order
2 
3import (
4 "context"
5 "fmt"
6 "log/slog"
7 
8 "yourapp/internal/saga"
9)
10 
11type OrderContext struct {
12 OrderID string `json:"order_id"`
13 CustomerID string `json:"customer_id"`
14 Items []Item `json:"items"`
15 TotalAmount float64 `json:"total_amount"`
16 PaymentMethod string `json:"payment_method"`
17 ShippingAddr Address `json:"shipping_address"`
18 ReservationID string `json:"reservation_id,omitempty"`
19 ChargeID string `json:"charge_id,omitempty"`
20 ShipmentID string `json:"shipment_id,omitempty"`
21 TrackingNumber string `json:"tracking_number,omitempty"`
22}
23 
24type Item struct {
25 ProductID string `json:"product_id"`
26 Quantity int `json:"quantity"`
27 Price float64 `json:"price"`
28}
29 
30type Address struct {
31 Street string `json:"street"`
32 City string `json:"city"`
33 State string `json:"state"`
34 ZipCode string `json:"zip_code"`
35 Country string `json:"country"`
36}
37 
38func NewOrderFulfillmentSaga(
39 store saga.Store,
40 logger *slog.Logger,
41 inventorySvc InventoryService,
42 paymentSvc PaymentService,
43 shippingSvc ShippingService,
44 notifySvc NotificationService,
45) *saga.Orchestrator[OrderContext] {
46 return saga.NewOrchestrator[OrderContext](
47 store,
48 logger,
49 saga.StepDefinition[OrderContext]{
50 Name: "reserve_inventory",
51 Execute: func(ctx context.Context, oc *OrderContext) error {
52 reservation, err := inventorySvc.Reserve(ctx, ReserveRequest{
53 OrderID: oc.OrderID,
54 Items: oc.Items,
55 IdempotencyKey: fmt.Sprintf("%s-reserve", oc.OrderID),
56 })
57 if err != nil {
58 return fmt.Errorf("reserve inventory: %w", err)
59 }
60 oc.ReservationID = reservation.ID
61 return nil
62 },
63 Compensate: func(ctx context.Context, oc *OrderContext) error {
64 if oc.ReservationID == "" {
65 return nil
66 }
67 return inventorySvc.Release(ctx, ReleaseRequest{
68 ReservationID: oc.ReservationID,
69 IdempotencyKey: fmt.Sprintf("%s-release", oc.OrderID),
70 })
71 },
72 Timeout: 5 * time.Second,
73 RetryPolicy: &saga.RetryPolicy{
74 MaxAttempts: 3,
75 InitialBackoff: 100 * time.Millisecond,
76 Multiplier: 2.0,
77 MaxBackoff: 2 * time.Second,
78 },
79 },
80 saga.StepDefinition[OrderContext]{
81 Name: "charge_payment",
82 Execute: func(ctx context.Context, oc *OrderContext) error {
83 charge, err := paymentSvc.Charge(ctx, ChargeRequest{
84 OrderID: oc.OrderID,
85 Amount: oc.TotalAmount,
86 PaymentMethod: oc.PaymentMethod,
87 IdempotencyKey: fmt.Sprintf("%s-charge", oc.OrderID),
88 })
89 if err != nil {
90 return fmt.Errorf("charge payment: %w", err)
91 }
92 oc.ChargeID = charge.ID
93 return nil
94 },
95 Compensate: func(ctx context.Context, oc *OrderContext) error {
96 if oc.ChargeID == "" {
97 return nil
98 }
99 return paymentSvc.Refund(ctx, RefundRequest{
100 ChargeID: oc.ChargeID,
101 IdempotencyKey: fmt.Sprintf("%s-refund", oc.OrderID),
102 })
103 },
104 Timeout: 30 * time.Second,
105 RetryPolicy: &saga.RetryPolicy{
106 MaxAttempts: 2,
107 InitialBackoff: time.Second,
108 Multiplier: 2.0,
109 MaxBackoff: 5 * time.Second,
110 },
111 },
112 saga.StepDefinition[OrderContext]{
113 Name: "create_shipment",
114 Execute: func(ctx context.Context, oc *OrderContext) error {
115 shipment, err := shippingSvc.Create(ctx, CreateShipmentRequest{
116 OrderID: oc.OrderID,
117 Address: oc.ShippingAddr,
118 Items: oc.Items,
119 IdempotencyKey: fmt.Sprintf("%s-ship", oc.OrderID),
120 })
121 if err != nil {
122 return fmt.Errorf("create shipment: %w", err)
123 }
124 oc.ShipmentID = shipment.ID
125 oc.TrackingNumber = shipment.TrackingNumber
126 return nil
127 },
128 Compensate: func(ctx context.Context, oc *OrderContext) error {
129 if oc.ShipmentID == "" {
130 return nil
131 }
132 return shippingSvc.Cancel(ctx, oc.ShipmentID)
133 },
134 Timeout: 10 * time.Second,
135 },
136 saga.StepDefinition[OrderContext]{
137 Name: "send_confirmation",
138 Execute: func(ctx context.Context, oc *OrderContext) error {
139 return notifySvc.SendOrderConfirmation(ctx, oc.CustomerID, oc.OrderID, oc.TrackingNumber)
140 },
141 Compensate: func(_ context.Context, _ *OrderContext) error {
142 return nil // Notifications don't need compensation
143 },
144 Timeout: 5 * time.Second,
145 },
146 )
147}
148 

Need a second opinion on your system design architecture?

I run free 30-minute strategy calls for engineering teams tackling this exact problem.

Book a Free Call

HTTP Handler

go
1package handler
2 
3import (
4 "encoding/json"
5 "net/http"
6 
7 "yourapp/internal/order"
8 "yourapp/internal/saga"
9)
10 
11type OrderHandler struct {
12 saga *saga.Orchestrator[order.OrderContext]
13}
14 
15func (h *OrderHandler) CreateOrder(w http.ResponseWriter, r *http.Request) {
16 var req CreateOrderRequest
17 if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
18 http.Error(w, "invalid request body", http.StatusBadRequest)
19 return
20 }
21 
22 orderCtx := &order.OrderContext{
23 OrderID: req.OrderID,
24 CustomerID: req.CustomerID,
25 Items: req.Items,
26 TotalAmount: req.TotalAmount,
27 PaymentMethod: req.PaymentMethodID,
28 ShippingAddr: req.ShippingAddress,
29 }
30 
31 sagaID, err := h.saga.Execute(r.Context(), "order-fulfillment", orderCtx)
32 if err != nil {
33 w.WriteHeader(http.StatusInternalServerError)
34 json.NewEncoder(w).Encode(map[string]string{
35 "error": err.Error(),
36 "saga_id": sagaID,
37 })
38 return
39 }
40 
41 w.WriteHeader(http.StatusCreated)
42 json.NewEncoder(w).Encode(map[string]string{
43 "saga_id": sagaID,
44 "order_id": orderCtx.OrderID,
45 "tracking_number": orderCtx.TrackingNumber,
46 "status": "completed",
47 })
48}
49 

Testing Sagas in Go

Go's interface-based design makes it straightforward to test sagas with mock services.

go
1package saga_test
2 
3import (
4 "context"
5 "errors"
6 "log/slog"
7 "os"
8 "testing"
9 
10 "yourapp/internal/saga"
11)
12 
13type InMemoryStore struct {
14 states map[string]*saga.SagaState
15}
16 
17func NewInMemoryStore() *InMemoryStore {
18 return &InMemoryStore{states: make(map[string]*saga.SagaState)}
19}
20 
21func (s *InMemoryStore) Save(_ context.Context, state *saga.SagaState) error {
22 s.states[state.ID] = state
23 return nil
24}
25 
26func (s *InMemoryStore) Get(_ context.Context, id string) (*saga.SagaState, error) {
27 state, ok := s.states[id]
28 if !ok {
29 return nil, nil
30 }
31 return state, nil
32}
33 
34func (s *InMemoryStore) SaveWithOptimisticLock(_ context.Context, state *saga.SagaState) error {
35 existing, ok := s.states[state.ID]
36 if ok && existing.Version != state.Version {
37 return errors.New("optimistic lock conflict")
38 }
39 state.Version++
40 s.states[state.ID] = state
41 return nil
42}
43 
44type TestContext struct {
45 Value string `json:"value"`
46 Step1Ran bool `json:"step1_ran"`
47 Step2Ran bool `json:"step2_ran"`
48 Comp1Ran bool `json:"comp1_ran"`
49}
50 
51func TestSagaHappyPath(t *testing.T) {
52 store := NewInMemoryStore()
53 logger := slog.New(slog.NewTextHandler(os.Stdout, nil))
54 
55 orch := saga.NewOrchestrator[TestContext](
56 store, logger,
57 saga.StepDefinition[TestContext]{
58 Name: "step1",
59 Execute: func(_ context.Context, tc *TestContext) error {
60 tc.Step1Ran = true
61 return nil
62 },
63 Compensate: func(_ context.Context, tc *TestContext) error {
64 tc.Comp1Ran = true
65 return nil
66 },
67 },
68 saga.StepDefinition[TestContext]{
69 Name: "step2",
70 Execute: func(_ context.Context, tc *TestContext) error {
71 tc.Step2Ran = true
72 return nil
73 },
74 Compensate: func(_ context.Context, _ *TestContext) error {
75 return nil
76 },
77 },
78 )
79 
80 ctx := &TestContext{Value: "test"}
81 id, err := orch.Execute(context.Background(), "test-saga", ctx)
82 
83 if err != nil {
84 t.Fatalf("expected no error, got %v", err)
85 }
86 if !ctx.Step1Ran || !ctx.Step2Ran {
87 t.Fatal("expected both steps to run")
88 }
89 
90 state, _ := store.Get(context.Background(), id)
91 if state.Status != saga.FlowCompleted {
92 t.Fatalf("expected completed, got %s", state.Status)
93 }
94}
95 
96func TestSagaCompensation(t *testing.T) {
97 store := NewInMemoryStore()
98 logger := slog.New(slog.NewTextHandler(os.Stdout, nil))
99 
100 orch := saga.NewOrchestrator[TestContext](
101 store, logger,
102 saga.StepDefinition[TestContext]{
103 Name: "step1",
104 Execute: func(_ context.Context, tc *TestContext) error {
105 tc.Step1Ran = true
106 return nil
107 },
108 Compensate: func(_ context.Context, tc *TestContext) error {
109 tc.Comp1Ran = true
110 return nil
111 },
112 },
113 saga.StepDefinition[TestContext]{
114 Name: "step2_fails",
115 Execute: func(_ context.Context, _ *TestContext) error {
116 return errors.New("step2 failed")
117 },
118 Compensate: func(_ context.Context, _ *TestContext) error {
119 return nil
120 },
121 },
122 )
123 
124 ctx := &TestContext{Value: "test"}
125 id, err := orch.Execute(context.Background(), "test-saga", ctx)
126 
127 if err == nil {
128 t.Fatal("expected error from failed saga")
129 }
130 if !ctx.Step1Ran {
131 t.Fatal("step1 should have run")
132 }
133 if !ctx.Comp1Ran {
134 t.Fatal("step1 compensation should have run")
135 }
136 
137 state, _ := store.Get(context.Background(), id)
138 if state.Status != saga.FlowFailed {
139 t.Fatalf("expected failed, got %s", state.Status)
140 }
141}
142 

Observability with OpenTelemetry

go
1package saga
2 
3import (
4 "context"
5 "fmt"
6 
7 "go.opentelemetry.io/otel"
8 "go.opentelemetry.io/otel/attribute"
9 "go.opentelemetry.io/otel/codes"
10 "go.opentelemetry.io/otel/trace"
11)
12 
13var tracer = otel.Tracer("saga-orchestrator")
14 
15func (o *Orchestrator[T]) executeStepWithTracing(
16 ctx context.Context,
17 step StepDefinition[T],
18 sagaCtx *T,
19 sagaID string,
20) error {
21 ctx, span := tracer.Start(ctx, fmt.Sprintf("saga.step.%s", step.Name),
22 trace.WithAttributes(
23 attribute.String("saga.id", sagaID),
24 attribute.String("saga.step", step.Name),
25 ),
26 )
27 defer span.End()
28 
29 err := o.executeStepWithRetry(ctx, step, sagaCtx)
30 if err != nil {
31 span.SetStatus(codes.Error, err.Error())
32 span.RecordError(err)
33 } else {
34 span.SetStatus(codes.Ok, "")
35 }
36 
37 return err
38}
39 

Conclusion

Go's type system, explicit error handling, and goroutine model create a natural fit for saga orchestration. The generic orchestrator (Orchestrator[T]) provides type-safe saga contexts without runtime casting, the interface-based store enables clean testing with in-memory implementations, and Go's context propagation carries deadlines and cancellation through the entire step execution chain.

The PostgreSQL store with optimistic locking handles concurrent saga modifications safely without distributed locks. For most workloads under 10,000 sagas per second, a single PostgreSQL instance with proper indexing is sufficient. Beyond that, consider partitioning the saga state table by saga ID hash.

Keep saga contexts lean — store IDs and amounts, not full request/response payloads. A common mistake in Go implementations is passing large structs through the saga context and serializing them to the database at every step. This inflates storage costs and slows state persistence. Let each step fetch the data it needs from its own service.

FAQ

Need expert help?

Building with system design?

I help teams ship production-grade systems. From architecture review to hands-on builds.

Muneer Puthiya Purayil

SaaS Architect & AI Systems Engineer. 10+ years shipping production infrastructure across fintech, automotive, e-commerce, and healthcare.

Engage

Start a
Conversation.

For teams building at scale: SaaS platforms, agentic AI systems, and enterprise mobile infrastructure. Scope and fit are evaluated before any engagement begins.

Limited availability · Q3 / Q4 2026