System Design: Multi-Region Architecture
Designing systems that operate across multiple regions for improved latency, availability, and disaster recovery, including data consistency strategies.
System Design: Multi-Region Architecture
Multi-region architecture involves deploying applications and infrastructure across multiple geographic regions to improve performance, availability, and disaster recovery capabilities. This architectural approach is essential for global applications that need to serve users with low latency, ensure high availability, and maintain business continuity in the face of regional failures. Building effective multi-region systems requires careful consideration of data distribution, consistency models, traffic routing, and operational complexity.
Understanding Multi-Region Architecture
Multi-region architecture distributes application components, data, and services across multiple geographic locations to achieve specific business objectives such as reduced latency, improved availability, and regulatory compliance. The architecture must balance the benefits of geographic distribution with the challenges of data consistency and system complexity.
Multi-Region Architecture Patterns
- Active-Passive: One primary region handles all traffic; other regions are on standby for failover
- Active-Active: All regions handle traffic simultaneously
- Active-Selective: Specific regions handle traffic for specific user segments or use cases
Traffic Routing and Load Balancing
Effective traffic routing is crucial in multi-region architectures to direct users to the optimal region based on factors like latency, load, and availability.
// Traffic routing and load balancing implementation
package main
import (
"context"
"fmt"
"math/rand"
"net/http"
"sync"
"time"
)
// Region represents a geographic region
type Region struct {
Name string
Code string
Latency time.Duration // Average latency to this region
Load float64 // Current load percentage (0.0-1.0)
Health string // "healthy", "degraded", "unhealthy"
LatencyMap map[string]time.Duration // Latency from different locations to this region
}
// TrafficRouter manages routing of requests to different regions
type TrafficRouter struct {
regions map[string]*Region
regionWeights map[string]int
mutex sync.RWMutex
healthCheckUrl string
}
func NewTrafficRouter(healthCheckUrl string) *TrafficRouter {
router := &TrafficRouter{
regions: make(map[string]*Region),
regionWeights: make(map[string]int),
healthCheckUrl: healthCheckUrl,
}
// Initialize default regions
router.AddRegion("us-east-1", "US East", 20*time.Millisecond, 0.3, "healthy")
router.AddRegion("eu-west-1", "EU West", 80*time.Millisecond, 0.2, "healthy")
router.AddRegion("ap-southeast-1", "Asia Pacific", 150*time.Millisecond, 0.1, "healthy")
return router
}
// AddRegion adds a new region to the routing configuration
func (tr *TrafficRouter) AddRegion(code, name string, latency time.Duration, load float64, health string) {
tr.mutex.Lock()
defer tr.mutex.Unlock()
tr.regions[code] = &Region{
Name: name,
Code: code,
Latency: latency,
Load: load,
Health: health,
LatencyMap: make(map[string]time.Duration),
}
tr.regionWeights[code] = 100 // Default weight
fmt.Printf("Region %s added to routing configuration\n", code)
}
// GetOptimalRegion returns the best region for a user based on their location
func (tr *TrafficRouter) GetOptimalRegion(userLocation string) string {
tr.mutex.RLock()
defer tr.mutex.RUnlock()
if len(tr.regions) == 0 {
return ""
}
var bestRegion string
var bestScore float64 = -1
for code, region := range tr.regions {
if region.Health != "healthy" {
continue
}
score := tr.calculateRegionScore(region, userLocation)
if score > bestScore {
bestScore = score
bestRegion = code
}
}
return bestRegion
}
// calculateRegionScore calculates a score for a region based on latency, load, and health
func (tr *TrafficRouter) calculateRegionScore(region *Region, userLocation string) float64 {
// Base score starts with weight
score := float64(tr.regionWeights[region.Code])
// Factor in latency (lower is better)
latencyFactor := 1.0 / (float64(region.Latency.Milliseconds()) / 100.0)
score *= latencyFactor
// Factor in load (lower load is better)
loadFactor := 1.0 - region.Load
score *= loadFactor
// Factor in custom latency from user location if available
if customLatency, exists := region.LatencyMap[userLocation]; exists {
customLatencyFactor := 1.0 / (float64(customLatency.Milliseconds()) / 100.0)
score *= customLatencyFactor
}
return score
}
// GetWeightedRandomRegion returns a region based on weighted random selection
func (tr *TrafficRouter) GetWeightedRandomRegion() string {
tr.mutex.RLock()
defer tr.mutex.RUnlock()
totalWeight := 0
for _, weight := range tr.regionWeights {
totalWeight += weight
}
if totalWeight == 0 {
return ""
}
randValue := rand.Intn(totalWeight)
currentWeight := 0
for code, region := range tr.regions {
if region.Health != "healthy" {
continue
}
currentWeight += tr.regionWeights[code]
if randValue < currentWeight {
return code
}
}
// Fallback to first healthy region
for code, region := range tr.regions {
if region.Health == "healthy" {
return code
}
}
return ""
}
// UpdateRegionMetrics updates region metrics
func (tr *TrafficRouter) UpdateRegionMetrics(code string, load float64, health string) {
tr.mutex.Lock()
defer tr.mutex.Unlock()
if region, exists := tr.regions[code]; exists {
region.Load = load
region.Health = health
}
}
// UpdateRegionLatency updates region latency metrics
func (tr *TrafficRouter) UpdateRegionLatency(code, userLocation string, latency time.Duration) {
tr.mutex.Lock()
defer tr.mutex.Unlock()
if region, exists := tr.regions[code]; exists {
region.LatencyMap[userLocation] = latency
}
}
// IPBasedRouter routes based on user IP location
type IPBasedRouter struct {
*TrafficRouter
ipLocationService *IPLocationService
}
func NewIPBasedRouter(healthCheckUrl string) *IPBasedRouter {
return &IPBasedRouter{
TrafficRouter: NewTrafficRouter(healthCheckUrl),
ipLocationService: NewIPLocationService(),
}
}
// RouteByIP determines the optimal region based on user IP
func (ibr *IPBasedRouter) RouteByIP(userIP string) string {
location := ibr.ipLocationService.GetLocation(userIP)
return ibr.GetOptimalRegion(location)
}
// IPLocationService provides IP-based geographic location lookup
type IPLocationService struct {
// In a real system, this would connect to IP geolocation APIs
fallbackLocation string
}
func NewIPLocationService() *IPLocationService {
return &IPLocationService{
fallbackLocation: "unknown",
}
}
func (ils *IPLocationService) GetLocation(ip string) string {
// In a real system, this would call a geolocation service
// For this example, we'll return a simulated location
if ip[:4] == "192." {
return "US"
} else if ip[:4] == "10.0" {
return "EU"
}
return "US" // Default
}
// LoadBalancer implements regional load balancing
type LoadBalancer struct {
regionRouters map[string]*RegionLoadBalancer
globalRouter *IPBasedRouter
}
type RegionLoadBalancer struct {
instances []string // Instance endpoints in the region
current int
mutex sync.RWMutex
}
func NewLoadBalancer() *LoadBalancer {
return &LoadBalancer{
regionRouters: make(map[string]*RegionLoadBalancer),
globalRouter: NewIPBasedRouter("/health"),
}
}
// AddRegionInstances adds instances to a specific region
func (lb *LoadBalancer) AddRegionInstances(regionCode string, instances []string) {
lb.regionRouters[regionCode] = &RegionLoadBalancer{
instances: instances,
current: 0,
}
}
// GetInstance routes a request to an instance in the optimal region
func (lb *LoadBalancer) GetInstance(userIP string) (string, error) {
region := lb.globalRouter.RouteByIP(userIP)
if region == "" {
return "", fmt.Errorf("no healthy region available")
}
regionLB, exists := lb.regionRouters[region]
if !exists {
return "", fmt.Errorf("no instances available in region %s", region)
}
regionLB.mutex.Lock()
if regionLB.current >= len(regionLB.instances) {
regionLB.current = 0
}
instance := regionLB.instances[regionLB.current]
regionLB.current++
regionLB.mutex.Unlock()
return instance, nil
}
// HealthChecker monitors regional health
type HealthChecker struct {
regions map[string]string // region -> health endpoint
routers *LoadBalancer
ticker *time.Ticker
}
func NewHealthChecker(routers *LoadBalancer) *HealthChecker {
return &HealthChecker{
regions: make(map[string]string),
routers: routers,
ticker: time.NewTicker(30 * time.Second),
}
}
// AddRegion adds a region to health monitoring
func (hc *HealthChecker) AddRegion(regionCode, healthEndpoint string) {
hc.regions[regionCode] = healthEndpoint
}
// Start begins health monitoring
func (hc *HealthChecker) Start() {
go func() {
for range hc.ticker.C {
hc.checkHealth()
}
}()
}
// checkHealth performs health checks on all regions
func (hc *HealthChecker) checkHealth() {
client := &http.Client{Timeout: 10 * time.Second}
for region, endpoint := range hc.regions {
health, load := hc.checkRegionHealth(client, endpoint)
hc.routers.globalRouter.UpdateRegionMetrics(region, load, health)
fmt.Printf("Region %s health: %s, load: %.2f\n", region, health, load)
}
}
// checkRegionHealth checks the health of a specific region
func (hc *HealthChecker) checkRegionHealth(client *http.Client, endpoint string) (string, float64) {
start := time.Now()
resp, err := client.Get(endpoint)
duration := time.Since(start)
if err != nil || resp.StatusCode != 200 {
return "unhealthy", 0
}
// Simulate load calculation based on response time
load := min(1.0, float64(duration.Milliseconds())/1000.0)
return "healthy", load
}
// min function for float64
func min(a, b float64) float64 {
if a < b {
return a
}
return b
}
// Example usage
func main() {
fmt.Println("Multi-Region Traffic Routing Example:")
// Create load balancer
lb := NewLoadBalancer()
// Add instances to regions
lb.AddRegionInstances("us-east-1", []string{
"us-east-1.instance-1.com:8080",
"us-east-1.instance-2.com:8080",
})
lb.AddRegionInstances("eu-west-1", []string{
"eu-west-1.instance-1.com:8080",
"eu-west-1.instance-2.com:8080",
})
lb.AddRegionInstances("ap-southeast-1", []string{
"ap-southeast-1.instance-1.com:8080",
"ap-southeast-1.instance-2.com:8080",
})
// Simulate routing requests
userIPs := []string{
"192.168.1.1", // US
"10.0.0.1", // EU
"172.16.0.1", // Default to US
}
for _, ip := range userIPs {
instance, err := lb.GetInstance(ip)
if err != nil {
fmt.Printf("Error routing request for IP %s: %v\n", ip, err)
continue
}
fmt.Printf("Request from IP %s routed to: %s\n", ip, instance)
}
// Start health monitoring
healthChecker := NewHealthChecker(lb)
healthChecker.AddRegion("us-east-1", "http://us-east-1.instance-1.com:8080/health")
healthChecker.AddRegion("eu-west-1", "http://eu-west-1.instance-1.com:8080/health")
healthChecker.AddRegion("ap-southeast-1", "http://ap-southeast-1.instance-1.com:8080/health")
healthChecker.Start()
time.Sleep(2 * time.Second)
fmt.Println("Traffic routing example completed")
}