Back to Journal
DevOps

Complete Guide to Zero-Downtime Deployments with Go

A comprehensive guide to implementing Zero-Downtime Deployments using Go, covering architecture, code examples, and production-ready patterns.

Muneer Puthiya Purayil 18 min read

Go's standard library provides everything you need for zero-downtime deployments — graceful shutdown, health checks, and connection management are built in, not bolted on. This guide covers implementing zero-downtime deployment patterns natively in Go, from graceful server shutdown through blue-green switching and rolling update coordination.

Graceful HTTP Server Shutdown

Go's http.Server.Shutdown() is the foundation. It stops accepting new connections and waits for active requests to complete:

go
1package main
2 
3import (
4 "context"
5 "log"
6 "net/http"
7 "os"
8 "os/signal"
9 "sync/atomic"
10 "syscall"
11 "time"
12)
13 
14func main() {
15 var healthy int32 = 1
16 
17 mux := http.NewServeMux()
18 
19 // Application routes
20 mux.HandleFunc("/api/", apiHandler)
21 
22 // Health check — returns 503 during shutdown
23 mux.HandleFunc("/health/ready", func(w http.ResponseWriter, r *http.Request) {
24 if atomic.LoadInt32(&healthy) != 1 {
25 w.WriteHeader(http.StatusServiceUnavailable)
26 return
27 }
28 w.WriteHeader(http.StatusOK)
29 })
30 
31 // Liveness — always returns 200 unless the process is truly broken
32 mux.HandleFunc("/health/live", func(w http.ResponseWriter, r *http.Request) {
33 w.WriteHeader(http.StatusOK)
34 })
35 
36 server := &http.Server{
37 Addr: ":8080",
38 Handler: mux,
39 ReadTimeout: 15 * time.Second,
40 WriteTimeout: 30 * time.Second,
41 IdleTimeout: 60 * time.Second,
42 }
43 
44 // Start server
45 go func() {
46 log.Println("Server starting on :8080")
47 if err := server.ListenAndServe(); err != http.ErrServerClosed {
48 log.Fatalf("Server error: %v", err)
49 }
50 }()
51 
52 // Wait for shutdown signal
53 quit := make(chan os.Signal, 1)
54 signal.Notify(quit, syscall.SIGTERM, syscall.SIGINT)
55 sig := <-quit
56 log.Printf("Received signal: %v", sig)
57 
58 // Phase 1: Mark unhealthy for load balancer
59 atomic.StoreInt32(&healthy, 0)
60 log.Println("Health check returning 503")
61 
62 // Phase 2: Wait for LB to deregister
63 time.Sleep(15 * time.Second)
64 log.Println("Starting graceful shutdown")
65 
66 // Phase 3: Drain active connections
67 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
68 defer cancel()
69 
70 if err := server.Shutdown(ctx); err != nil {
71 log.Printf("Shutdown error: %v", err)
72 }
73 
74 log.Println("Server stopped")
75}
76 

Request Tracking Middleware

Track in-flight requests for visibility during shutdown:

go
1package middleware
2 
3import (
4 "net/http"
5 "sync/atomic"
6 
7 "github.com/prometheus/client_golang/prometheus"
8)
9 
10var (
11 activeRequests = prometheus.NewGauge(prometheus.GaugeOpts{
12 Name: "http_active_requests",
13 Help: "Number of currently active HTTP requests",
14 })
15 
16 requestsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
17 Name: "http_requests_total",
18 Help: "Total HTTP requests",
19 }, []string{"method", "path", "status"})
20)
21 
22type RequestTracker struct {
23 count int64
24 draining int32
25}
26 
27func (rt *RequestTracker) Middleware(next http.Handler) http.Handler {
28 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
29 if atomic.LoadInt32(&rt.draining) == 1 {
30 w.Header().Set("Connection", "close")
31 w.Header().Set("Retry-After", "5")
32 http.Error(w, "Service draining", http.StatusServiceUnavailable)
33 return
34 }
35 
36 atomic.AddInt64(&rt.count, 1)
37 activeRequests.Inc()
38 defer func() {
39 atomic.AddInt64(&rt.count, -1)
40 activeRequests.Dec()
41 }()
42 
43 next.ServeHTTP(w, r)
44 })
45}
46 
47func (rt *RequestTracker) StartDraining() {
48 atomic.StoreInt32(&rt.draining, 1)
49}
50 
51func (rt *RequestTracker) ActiveCount() int64 {
52 return atomic.LoadInt64(&rt.count)
53}
54 
55func (rt *RequestTracker) WaitForDrain(timeout time.Duration) {
56 deadline := time.After(timeout)
57 ticker := time.NewTicker(100 * time.Millisecond)
58 defer ticker.Stop()
59 
60 for {
61 select {
62 case <-deadline:
63 log.Printf("Drain timeout with %d active requests",
64 rt.ActiveCount())
65 return
66 case <-ticker.C:
67 if rt.ActiveCount() == 0 {
68 log.Println("All requests drained")
69 return
70 }
71 }
72 }
73}
74 

Health Check with Dependency Verification

A production health check verifies all critical dependencies:

go
1package health
2 
3import (
4 "context"
5 "database/sql"
6 "encoding/json"
7 "net/http"
8 "sync"
9 "time"
10 
11 "github.com/redis/go-redis/v9"
12)
13 
14type Checker struct {
15 db *sql.DB
16 redis *redis.Client
17 ready bool
18 mu sync.RWMutex
19}
20 
21type HealthResponse struct {
22 Status string `json:"status"`
23 Components map[string]string `json:"components"`
24 Version string `json:"version"`
25 Uptime string `json:"uptime"`
26}
27 
28func (c *Checker) ReadinessHandler() http.HandlerFunc {
29 return func(w http.ResponseWriter, r *http.Request) {
30 c.mu.RLock()
31 ready := c.ready
32 c.mu.RUnlock()
33 
34 if !ready {
35 w.WriteHeader(http.StatusServiceUnavailable)
36 json.NewEncoder(w).Encode(HealthResponse{
37 Status: "not_ready",
38 })
39 return
40 }
41 
42 ctx, cancel := context.WithTimeout(r.Context(), 2*time.Second)
43 defer cancel()
44 
45 components := make(map[string]string)
46 
47 // Check database
48 if err := c.db.PingContext(ctx); err != nil {
49 components["database"] = "unhealthy"
50 } else {
51 components["database"] = "healthy"
52 }
53 
54 // Check Redis
55 if err := c.redis.Ping(ctx).Err(); err != nil {
56 components["redis"] = "unhealthy"
57 } else {
58 components["redis"] = "healthy"
59 }
60 
61 // Overall status
62 allHealthy := true
63 for _, status := range components {
64 if status != "healthy" {
65 allHealthy = false
66 break
67 }
68 }
69 
70 resp := HealthResponse{
71 Status: "healthy",
72 Components: components,
73 Version: version,
74 }
75 
76 if !allHealthy {
77 resp.Status = "degraded"
78 w.WriteHeader(http.StatusServiceUnavailable)
79 }
80 
81 json.NewEncoder(w).Encode(resp)
82 }
83}
84 
85func (c *Checker) SetReady(ready bool) {
86 c.mu.Lock()
87 c.ready = ready
88 c.mu.Unlock()
89}
90 

Blue-Green Deployment Server

Implement a local blue-green deployment manager for single-host deployments:

go
1package bluegreen
2 
3import (
4 "context"
5 "fmt"
6 "log"
7 "net/http"
8 "net/http/httputil"
9 "net/url"
10 "sync"
11 "sync/atomic"
12 "time"
13)
14 
15type Slot string
16 
17const (
18 Blue Slot = "blue"
19 Green Slot = "green"
20)
21 
22type DeploymentManager struct {
23 activeSlot atomic.Value // Slot
24 blueTarget *url.URL
25 greenTarget *url.URL
26 blueProxy *httputil.ReverseProxy
27 greenProxy *httputil.ReverseProxy
28 mu sync.RWMutex
29}
30 
31func NewDeploymentManager(bluePort, greenPort int) *DeploymentManager {
32 blueURL, _ := url.Parse(fmt.Sprintf("http://localhost:%d", bluePort))
33 greenURL, _ := url.Parse(fmt.Sprintf("http://localhost:%d", greenPort))
34 
35 dm := &DeploymentManager{
36 blueTarget: blueURL,
37 greenTarget: greenURL,
38 blueProxy: httputil.NewSingleHostReverseProxy(blueURL),
39 greenProxy: httputil.NewSingleHostReverseProxy(greenURL),
40 }
41 dm.activeSlot.Store(Blue)
42 
43 return dm
44}
45 
46func (dm *DeploymentManager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
47 slot := dm.activeSlot.Load().(Slot)
48 switch slot {
49 case Blue:
50 dm.blueProxy.ServeHTTP(w, r)
51 case Green:
52 dm.greenProxy.ServeHTTP(w, r)
53 }
54}
55 
56func (dm *DeploymentManager) Switch() error {
57 current := dm.activeSlot.Load().(Slot)
58 var target Slot
59 var targetURL *url.URL
60 
61 if current == Blue {
62 target = Green
63 targetURL = dm.greenTarget
64 } else {
65 target = Blue
66 targetURL = dm.blueTarget
67 }
68 
69 // Verify target is healthy before switching
70 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
71 defer cancel()
72 
73 if err := dm.waitForHealthy(ctx, targetURL); err != nil {
74 return fmt.Errorf("target %s not healthy: %w", target, err)
75 }
76 
77 log.Printf("Switching traffic from %s to %s", current, target)
78 dm.activeSlot.Store(target)
79 log.Printf("Traffic switched to %s", target)
80 
81 return nil
82}
83 
84func (dm *DeploymentManager) waitForHealthy(
85 ctx context.Context,
86 target *url.URL,
87) error {
88 healthURL := fmt.Sprintf("%s/health/ready", target)
89 ticker := time.NewTicker(500 * time.Millisecond)
90 defer ticker.Stop()
91 
92 for {
93 select {
94 case <-ctx.Done():
95 return ctx.Err()
96 case <-ticker.C:
97 resp, err := http.Get(healthURL)
98 if err == nil && resp.StatusCode == 200 {
99 resp.Body.Close()
100 return nil
101 }
102 if resp != nil {
103 resp.Body.Close()
104 }
105 }
106 }
107}
108 

Need a second opinion on your DevOps pipelines architecture?

I run free 30-minute strategy calls for engineering teams tackling this exact problem.

Book a Free Call

Rolling Update Coordination

For multi-instance Go services, coordinate updates across instances:

go
1package rolling
2 
3import (
4 "context"
5 "fmt"
6 "log"
7 "sync"
8 "time"
9)
10 
11type Instance struct {
12 ID string
13 Address string
14 Version string
15 Healthy bool
16}
17 
18type RollingUpdater struct {
19 instances []Instance
20 healthCheck func(addr string) bool
21 deploy func(instance Instance, newVersion string) error
22 maxParallel int
23 pauseBetween time.Duration
24}
25 
26func (ru *RollingUpdater) Update(
27 ctx context.Context,
28 newVersion string,
29) error {
30 sem := make(chan struct{}, ru.maxParallel)
31 var mu sync.Mutex
32 var errors []error
33 
34 for i, instance := range ru.instances {
35 select {
36 case <-ctx.Done():
37 return ctx.Err()
38 case sem <- struct{}{}:
39 }
40 
41 go func(idx int, inst Instance) {
42 defer func() { <-sem }()
43 
44 log.Printf("Updating instance %s (%d/%d)",
45 inst.ID, idx+1, len(ru.instances))
46 
47 if err := ru.updateInstance(ctx, inst, newVersion); err != nil {
48 mu.Lock()
49 errors = append(errors, fmt.Errorf(
50 "instance %s: %w", inst.ID, err))
51 mu.Unlock()
52 return
53 }
54 
55 log.Printf("Instance %s updated successfully", inst.ID)
56 
57 // Pause between instances
58 select {
59 case <-ctx.Done():
60 case <-time.After(ru.pauseBetween):
61 }
62 }(i, instance)
63 }
64 
65 // Wait for all goroutines
66 for i := 0; i < cap(sem); i++ {
67 sem <- struct{}{}
68 }
69 
70 if len(errors) > 0 {
71 return fmt.Errorf("rolling update failed: %v", errors)
72 }
73 
74 return nil
75}
76 
77func (ru *RollingUpdater) updateInstance(
78 ctx context.Context,
79 instance Instance,
80 newVersion string,
81) error {
82 // Step 1: Deploy new version
83 if err := ru.deploy(instance, newVersion); err != nil {
84 return fmt.Errorf("deploy failed: %w", err)
85 }
86 
87 // Step 2: Wait for health check
88 deadline := time.After(60 * time.Second)
89 ticker := time.NewTicker(2 * time.Second)
90 defer ticker.Stop()
91 
92 for {
93 select {
94 case <-ctx.Done():
95 return ctx.Err()
96 case <-deadline:
97 return fmt.Errorf("health check timeout")
98 case <-ticker.C:
99 if ru.healthCheck(instance.Address) {
100 return nil
101 }
102 }
103 }
104}
105 

Database Migration Manager

Handle database migrations safely in a Go deployment pipeline:

go
1package migrate
2 
3import (
4 "context"
5 "database/sql"
6 "fmt"
7 "log"
8 "sort"
9 "time"
10)
11 
12type Migration struct {
13 Version int
14 Description string
15 Up func(tx *sql.Tx) error
16 Down func(tx *sql.Tx) error
17 Safe bool // Can run without downtime
18}
19 
20type Migrator struct {
21 db *sql.DB
22 migrations []Migration
23}
24 
25func (m *Migrator) Apply(ctx context.Context) error {
26 // Ensure migrations table exists
27 _, err := m.db.ExecContext(ctx, `
28 CREATE TABLE IF NOT EXISTS schema_migrations (
29 version INT PRIMARY KEY,
30 applied_at TIMESTAMPTZ DEFAULT NOW(),
31 description TEXT
32 )
33 `)
34 if err != nil {
35 return fmt.Errorf("create migrations table: %w", err)
36 }
37 
38 // Get current version
39 var currentVersion int
40 err = m.db.QueryRowContext(ctx,
41 "SELECT COALESCE(MAX(version), 0) FROM schema_migrations",
42 ).Scan(&currentVersion)
43 if err != nil {
44 return fmt.Errorf("get current version: %w", err)
45 }
46 
47 // Sort and apply pending migrations
48 sort.Slice(m.migrations, func(i, j int) bool {
49 return m.migrations[i].Version < m.migrations[j].Version
50 })
51 
52 for _, migration := range m.migrations {
53 if migration.Version <= currentVersion {
54 continue
55 }
56 
57 if !migration.Safe {
58 log.Printf("WARNING: Migration %d (%s) is not marked as safe for zero-downtime",
59 migration.Version, migration.Description)
60 }
61 
62 log.Printf("Applying migration %d: %s",
63 migration.Version, migration.Description)
64 
65 tx, err := m.db.BeginTx(ctx, nil)
66 if err != nil {
67 return fmt.Errorf("begin tx for migration %d: %w",
68 migration.Version, err)
69 }
70 
71 if err := migration.Up(tx); err != nil {
72 tx.Rollback()
73 return fmt.Errorf("migration %d failed: %w",
74 migration.Version, err)
75 }
76 
77 _, err = tx.ExecContext(ctx,
78 "INSERT INTO schema_migrations (version, description) VALUES ($1, $2)",
79 migration.Version, migration.Description,
80 )
81 if err != nil {
82 tx.Rollback()
83 return fmt.Errorf("record migration %d: %w",
84 migration.Version, err)
85 }
86 
87 if err := tx.Commit(); err != nil {
88 return fmt.Errorf("commit migration %d: %w",
89 migration.Version, err)
90 }
91 
92 log.Printf("Migration %d applied successfully", migration.Version)
93 }
94 
95 return nil
96}
97 

Feature Flag Implementation

go
1package features
2 
3import (
4 "context"
5 "encoding/json"
6 "hash/fnv"
7 "sync"
8 "time"
9 
10 "github.com/redis/go-redis/v9"
11)
12 
13type Flag struct {
14 Key string `json:"key"`
15 Enabled bool `json:"enabled"`
16 RolloutPercent int `json:"rollout_percent"`
17 AllowedTenants []string `json:"allowed_tenants"`
18}
19 
20type FlagService struct {
21 redis *redis.Client
22 cache map[string]*Flag
23 mu sync.RWMutex
24}
25 
26func NewFlagService(r *redis.Client) *FlagService {
27 fs := &FlagService{
28 redis: r,
29 cache: make(map[string]*Flag),
30 }
31 go fs.refreshLoop()
32 return fs
33}
34 
35func (fs *FlagService) IsEnabled(
36 ctx context.Context,
37 key string,
38 tenantID string,
39) bool {
40 fs.mu.RLock()
41 flag, ok := fs.cache[key]
42 fs.mu.RUnlock()
43 
44 if !ok || !flag.Enabled {
45 return false
46 }
47 
48 // Check tenant allowlist
49 for _, t := range flag.AllowedTenants {
50 if t == tenantID {
51 return true
52 }
53 }
54 
55 // Percentage rollout with consistent hashing
56 h := fnv.New32a()
57 h.Write([]byte(key + ":" + tenantID))
58 return int(h.Sum32()%100) < flag.RolloutPercent
59}
60 
61func (fs *FlagService) refreshLoop() {
62 ticker := time.NewTicker(10 * time.Second)
63 defer ticker.Stop()
64 
65 for range ticker.C {
66 ctx := context.Background()
67 keys, err := fs.redis.Keys(ctx, "flag:*").Result()
68 if err != nil {
69 continue
70 }
71 
72 newCache := make(map[string]*Flag)
73 for _, k := range keys {
74 raw, err := fs.redis.Get(ctx, k).Bytes()
75 if err != nil {
76 continue
77 }
78 var flag Flag
79 if json.Unmarshal(raw, &flag) == nil {
80 newCache[flag.Key] = &flag
81 }
82 }
83 
84 fs.mu.Lock()
85 fs.cache = newCache
86 fs.mu.Unlock()
87 }
88}
89 

Putting It All Together

A complete Go service with zero-downtime deployment support:

go
1package main
2 
3import (
4 "context"
5 "database/sql"
6 "log"
7 "net/http"
8 "os"
9 "os/signal"
10 "syscall"
11 "time"
12 
13 _ "github.com/lib/pq"
14 "github.com/redis/go-redis/v9"
15)
16 
17func main() {
18 // Initialize dependencies
19 db, _ := sql.Open("postgres", os.Getenv("DATABASE_URL"))
20 rdb := redis.NewClient(&redis.Options{
21 Addr: os.Getenv("REDIS_URL"),
22 })
23 
24 // Health checker
25 healthChecker := &health.Checker{DB: db, Redis: rdb}
26 
27 // Feature flags
28 flags := features.NewFlagService(rdb)
29 
30 // Request tracker
31 tracker := &middleware.RequestTracker{}
32 
33 // Routes
34 mux := http.NewServeMux()
35 mux.HandleFunc("/health/ready", healthChecker.ReadinessHandler())
36 mux.HandleFunc("/health/live", func(w http.ResponseWriter, _ *http.Request) {
37 w.WriteHeader(200)
38 })
39 mux.Handle("/api/", tracker.Middleware(apiRouter(db, flags)))
40 
41 server := &http.Server{
42 Addr: ":8080",
43 Handler: mux,
44 }
45 
46 // Startup: warm caches, run migrations, then mark ready
47 warmCaches(db, rdb)
48 healthChecker.SetReady(true)
49 log.Println("Service ready")
50 
51 go server.ListenAndServe()
52 
53 // Shutdown sequence
54 quit := make(chan os.Signal, 1)
55 signal.Notify(quit, syscall.SIGTERM, syscall.SIGINT)
56 <-quit
57 
58 log.Println("Shutdown initiated")
59 healthChecker.SetReady(false)
60 time.Sleep(15 * time.Second) // Wait for LB deregistration
61 tracker.StartDraining()
62 tracker.WaitForDrain(30 * time.Second)
63 
64 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
65 defer cancel()
66 server.Shutdown(ctx)
67 db.Close()
68 rdb.Close()
69 log.Println("Shutdown complete")
70}
71 

FAQ

Need expert help?

Building with CI/CD pipelines?

I help teams ship production-grade systems. From architecture review to hands-on builds.

Muneer Puthiya Purayil

SaaS Architect & AI Systems Engineer. 10+ years shipping production infrastructure across fintech, automotive, e-commerce, and healthcare.

Engage

Start a
Conversation.

For teams building at scale: SaaS platforms, agentic AI systems, and enterprise mobile infrastructure. Scope and fit are evaluated before any engagement begins.

Limited availability · Q3 / Q4 2026