场景描述

我们要部署一个简单的电商平台,包含以下服务:

  • Web前端 (Nginx + React)
  • API网关 (Node.js)
  • 用户服务 (Python Flask)
  • 商品服务 (Java Spring Boot)
  • 订单服务 (Go)
  • 数据库 (PostgreSQL + Redis)
  • 监控 (Prometheus + Grafana)

1. 项目结构

ecommerce-swarm/
├── docker-compose.yml
├── stack.yml
├── configs/
│   ├── nginx.conf
│   ├── prometheus.yml
│   └── grafana-datasource.yml
├── secrets/
│   ├── postgres-password.txt
│   └── redis-password.txt
└── scripts/
    └── init-db.sql

2. 初始化 Swarm 集群

# 在管理节点上初始化 Swarm
docker swarm init --advertise-addr 192.168.1.100

# 获取加入令牌
docker swarm join-token worker
# 输出:docker swarm join --token SWMTKN-1... 192.168.1.100:2377

# 在工作节点1上执行
docker swarm join --token SWMTKN-1... 192.168.1.100:2377

# 在工作节点2上执行
docker swarm join --token SWMTKN-1... 192.168.1.100:2377

# 查看节点状态
docker node ls

3. 创建网络

# 创建 overlay 网络
docker network create --driver overlay --attachable ecommerce-net

# 创建内部通信网络
docker network create --driver overlay --internal ecommerce-internal

4. 创建配置和密钥

# 创建 PostgreSQL 密码密钥
echo "MySecurePassword123!" | docker secret create postgres_password -

# 创建 Redis 密码密钥
echo "RedisPass456!" | docker secret create redis_password -

# 创建 Nginx 配置
cat > configs/nginx.conf << 'EOF'
upstream api_gateway {
    server api_gateway:3000;
}

server {
    listen 80;
    server_name localhost;
    
    location / {
        root /usr/share/nginx/html;
        index index.html;
        try_files $uri $uri/ /index.html;
    }
    
    location /api/ {
        proxy_pass http://api_gateway;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
    }
    
    location /health {
        access_log off;
        return 200 "healthy\n";
        add_header Content-Type text/plain;
    }
}
EOF

docker config create nginx_config configs/nginx.conf

5. 部署栈文件 (stack.yml)

version: '3.8'

services:
  # 前端服务
  frontend:
    image: nginx:alpine
    ports:
      - "80:80"
    configs:
      - source: nginx_config
        target: /etc/nginx/conf.d/default.conf
    volumes:
      - frontend-static:/usr/share/nginx/html
    deploy:
      replicas: 2
      update_config:
        parallelism: 1
        delay: 10s
      restart_policy:
        condition: on-failure
        delay: 5s
      placement:
        constraints:
          - node.role == worker
    networks:
      - ecommerce-net

  # API 网关
  api_gateway:
    image: node:16-alpine
    command: ["node", "server.js"]
    deploy:
      replicas: 3
      update_config:
        order: start-first
        parallelism: 2
      restart_policy:
        condition: any
    environment:
      - NODE_ENV=production
      - USER_SERVICE_URL=http://user_service:5001
      - PRODUCT_SERVICE_URL=http://product_service:8080
      - ORDER_SERVICE_URL=http://order_service:8081
    networks:
      - ecommerce-net
      - ecommerce-internal

  # 用户服务
  user_service:
    image: python:3.9-slim
    command: ["python", "app.py"]
    deploy:
      replicas: 2
      resources:
        limits:
          cpus: '0.5'
          memory: 512M
        reservations:
          cpus: '0.1'
          memory: 128M
    environment:
      - DATABASE_URL=postgresql://postgres:${POSTGRES_PASSWORD}@postgres:5432/ecommerce
      - REDIS_URL=redis://:${REDIS_PASSWORD}@redis:6379/0
    secrets:
      - postgres_password
      - redis_password
    depends_on:
      - postgres
      - redis
    networks:
      - ecommerce-internal

  # 商品服务
  product_service:
    image: openjdk:11-jre-slim
    command: ["java", "-jar", "product-service.jar"]
    deploy:
      replicas: 2
      placement:
        constraints:
          - node.labels.zone == prod
    environment:
      - DB_HOST=postgres
      - DB_PORT=5432
    volumes:
      - product-logs:/var/log/product-service
    networks:
      - ecommerce-internal

  # 订单服务
  order_service:
    image: golang:1.18-alpine
    command: ["./order-service"]
    deploy:
      mode: replicated
      replicas: 3
      update_config:
        failure_action: rollback
        monitor: 30s
    environment:
      - DB_HOST=postgres
      - REDIS_HOST=redis
    networks:
      - ecommerce-internal

  # PostgreSQL 数据库
  postgres:
    image: postgres:14-alpine
    deploy:
      mode: replicated
      replicas: 1
      placement:
        constraints:
          - node.role == manager
    environment:
      - POSTGRES_DB=ecommerce
      - POSTGRES_USER=postgres
    secrets:
      - source: postgres_password
        target: POSTGRES_PASSWORD
    volumes:
      - postgres-data:/var/lib/postgresql/data
      - type: bind
        source: ./scripts/init-db.sql
        target: /docker-entrypoint-initdb.d/init.sql
    networks:
      - ecommerce-internal

  # Redis 缓存
  redis:
    image: redis:7-alpine
    command: ["redis-server", "--requirepass", "/run/secrets/redis_password"]
    deploy:
      replicas: 1
    secrets:
      - source: redis_password
        target: redis_password
    volumes:
      - redis-data:/data
    networks:
      - ecommerce-internal

  # Prometheus 监控
  prometheus:
    image: prom/prometheus
    configs:
      - source: prometheus_config
        target: /etc/prometheus/prometheus.yml
    volumes:
      - prometheus-data:/prometheus
    deploy:
      replicas: 1
      placement:
        constraints:
          - node.role == manager
    ports:
      - "9090:9090"
    networks:
      - ecommerce-net
      - ecommerce-internal

  # Grafana 可视化
  grafana:
    image: grafana/grafana
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin123
    volumes:
      - grafana-data:/var/lib/grafana
      - ./configs/grafana-datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml
    deploy:
      replicas: 1
    ports:
      - "3000:3000"
    networks:
      - ecommerce-net
      - ecommerce-internal

  # 服务健康检查器
  health_check:
    image: alpine:latest
    command: |
      sh -c "
      apk add --no-cache curl &&
      while true; do
        curl -f http://frontend/health || echo 'Frontend down';
        curl -f http://api_gateway:3000/health || echo 'API Gateway down';
        sleep 30;
      done"
    deploy:
      mode: global
      restart_policy:
        condition: on-failure
    networks:
      - ecommerce-net

volumes:
  frontend-static:
    driver: local
    driver_opts:
      type: nfs
      o: addr=192.168.1.200,nolock,soft,rw
      device: ":/data/frontend"
  postgres-data:
    driver: local
  redis-data:
    driver: local
  product-logs:
    driver: local
  prometheus-data:
    driver: local
  grafana-data:
    driver: local

configs:
  nginx_config:
    external: true
  prometheus_config:
    file: ./configs/prometheus.yml

secrets:
  postgres_password:
    external: true
  redis_password:
    external: true

networks:
  ecommerce-net:
    external: true
  ecommerce-internal:
    external: true

6. 部署应用栈

# 部署整个应用栈
docker stack deploy -c stack.yml ecommerce

# 查看部署状态
docker stack ps ecommerce

# 查看服务列表
docker stack services ecommerce

# 查看服务日志
docker service logs ecommerce_frontend -f

# 扩展特定服务
docker service scale ecommerce_api_gateway=5

# 查看网络详情
docker network inspect ecommerce-net

7. 管理命令示例

# 滚动更新前端服务
docker service update \
  --image nginx:latest \
  --update-parallelism 2 \
  --update-delay 10s \
  ecommerce_frontend

# 回滚更新
docker service rollback ecommerce_frontend

# 强制重新调度服务
docker service update --force ecommerce_user_service

# 查看服务详情
docker service inspect ecommerce_postgres --pretty

# 列出所有任务
docker service ps ecommerce_api_gateway

# 在服务节点上执行命令
docker service logs ecommerce_frontend --tail 100

# 添加节点标签(用于服务约束)
docker node update --label-add zone=prod worker1
docker node update --label-add zone=staging worker2

8. 监控和故障排除

# 查看所有服务的健康状态
for service in $(docker service ls -q); do
  echo "=== $(docker service inspect --format '{{.Spec.Name}}' $service) ==="
  docker service ps --format "table {{.Name}}\t{{.CurrentState}}\t{{.Error}}" $service
done

# 查看资源使用情况
docker stats $(docker ps -q)

# 进入运行中的容器
docker exec -it $(docker ps -q -f name=ecommerce_frontend) sh

# 备份服务配置
docker service inspect ecommerce_postgres > postgres-backup.json

# 灾难恢复 - 从备份恢复
docker stack rm ecommerce
docker stack deploy -c stack.yml ecommerce

9. 常用运维脚本

#!/bin/bash
# deploy.sh - 自动化部署脚本

#!/bin/bash
set -e

echo "1. 检查Docker Swarm状态..."
if ! docker node ls &> /dev/null; then
    echo "错误: 节点不在Swarm模式中"
    exit 1
fi

echo "2. 创建网络..."
docker network create --driver overlay --attachable ecommerce-net 2>/dev/null || true
docker network create --driver overlay --internal ecommerce-internal 2>/dev/null || true

echo "3. 创建配置..."
docker config create nginx_config ./configs/nginx.conf 2>/dev/null || true

echo "4. 创建密钥..."
if [ ! -f "./secrets/postgres-password.txt" ]; then
    openssl rand -base64 32 > ./secrets/postgres-password.txt
fi
cat ./secrets/postgres-password.txt | docker secret create postgres_password - 2>/dev/null || true

echo "5. 部署服务栈..."
docker stack deploy -c stack.yml ecommerce

echo "6. 等待服务启动..."
sleep 10

echo "7. 检查服务状态..."
docker stack ps ecommerce

echo "8. 验证服务..."
echo "前端: http://$(hostname -I | awk '{print $1}')"
echo "API网关: http://$(hostname -I | awk '{print $1}'):3000"
echo "监控: http://$(hostname -I | awk '{print $1}'):9090"
echo "Grafana: http://$(hostname -I | awk '{print $1}'):3000"

10. 清理资源

# 移除整个应用栈
docker stack rm ecommerce

# 等待所有服务停止
sleep 30

# 移除网络
docker network rm ecommerce-net ecommerce-internal

# 移除配置
docker config rm nginx_config

# 移除密钥
docker secret rm postgres_password redis_password

# 清理未使用的资源
docker system prune -a -f --volumes

这个案例展示了:

  1. 多服务应用:包含6个微服务
  2. 服务发现:通过服务名进行内部通信
  3. 负载均衡:Swarm内置的负载均衡器
  4. 滚动更新:零停机部署
  5. 健康检查:确保服务可用性
  6. 资源配置:CPU和内存限制
  7. 持久化存储:数据库数据持久化
  8. 安全配置:使用Docker Secrets管理敏感信息
  9. 监控集成:Prometheus + Grafana
  10. 高可用性:多副本部署

这个案例可以在实际生产环境中进行调整和使用,根据具体需求修改配置和服务。

Logo

电商企业物流数字化转型必备!快递鸟 API 接口,72 小时快速完成物流系统集成。全流程实战1V1指导,营造开放的API技术生态圈。

更多推荐