Scaling Node.js Apps with Clustering & Load Balancing
Introduction
Node.js applications run on a single thread by default, which means they can't fully utilize multi-core systems. As your application grows and handles more traffic, you'll need to scale beyond a single process. This comprehensive guide covers essential scaling strategies for Node.js applications, including clustering, process management, and load balancing techniques.
Understanding Node.js Single-Threaded Nature
The Event Loop Limitation
Node.js uses an event-driven, non-blocking I/O model that runs on a single thread. While this is efficient for I/O-intensive operations, it has limitations:
// CPU-intensive task blocking the event loop
const express = require('express');
const app = express();
app.get('/heavy-task', (req, res) => {
// This blocks the entire application
let result = 0;
for (let i = 0; i < 10000000000; i++) {
result += i;
}
res.json({ result });
});
app.get('/light-task', (req, res) => {
// This endpoint becomes unresponsive during heavy-task execution
res.json({ message: 'Hello World' });
});
app.listen(3000);
Why Scaling is Essential
- CPU Utilization: Single-threaded apps can't use all available CPU cores
- Fault Tolerance: Single process failure brings down the entire application
- Performance: Limited by single process memory and processing power
- Concurrency: Can't handle multiple CPU-intensive tasks simultaneously
Node.js Cluster Module
Understanding the Cluster Module
The built-in cluster
module allows you to create child processes (workers) that share the same server port:
const cluster = require('cluster');
const numCPUs = require('os').cpus().length;
const express = require('express');
if (cluster.isMaster) {
console.log(`Master ${process.pid} is running`);
// Fork workers
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
console.log(`Worker ${worker.process.pid} died`);
cluster.fork(); // Restart worker
});
} else {
// Workers can share any TCP port
const app = express();
app.get('/', (req, res) => {
res.json({
message: 'Hello from worker',
pid: process.pid,
worker: cluster.worker.id,
});
});
app.listen(3000, () => {
console.log(`Worker ${process.pid} started`);
});
}
Advanced Cluster Configuration
Custom Worker Management
const cluster = require('cluster');
const os = require('os');
class ClusterManager {
constructor(workerFile, options = {}) {
this.workerFile = workerFile;
this.maxWorkers = options.maxWorkers || os.cpus().length;
this.restartDelay = options.restartDelay || 1000;
this.maxRestarts = options.maxRestarts || 10;
this.workers = new Map();
}
start() {
if (!cluster.isMaster) {
return require(this.workerFile);
}
console.log(`Master ${process.pid} starting ${this.maxWorkers} workers`);
// Create workers
for (let i = 0; i < this.maxWorkers; i++) {
this.createWorker();
}
// Handle worker events
cluster.on('exit', (worker, code, signal) => {
this.handleWorkerExit(worker, code, signal);
});
cluster.on('online', (worker) => {
console.log(`Worker ${worker.process.pid} is online`);
});
// Graceful shutdown
process.on('SIGTERM', () => this.shutdown());
process.on('SIGINT', () => this.shutdown());
}
createWorker() {
const worker = cluster.fork();
this.workers.set(worker.id, {
worker,
restartCount: 0,
startTime: Date.now(),
});
return worker;
}
handleWorkerExit(worker, code, signal) {
const workerInfo = this.workers.get(worker.id);
if (!workerInfo) return;
console.log(`Worker ${worker.process.pid} exited with code ${code}`);
// Remove from tracking
this.workers.delete(worker.id);
// Check if we should restart
if (workerInfo.restartCount < this.maxRestarts) {
setTimeout(() => {
console.log(`Restarting worker ${worker.id}`);
this.createWorker();
}, this.restartDelay);
} else {
console.error(`Worker ${worker.id} exceeded max restarts`);
}
}
shutdown() {
console.log('Shutting down cluster...');
for (const [id, { worker }] of this.workers) {
worker.kill('SIGTERM');
}
setTimeout(() => {
process.exit(0);
}, 5000);
}
}
// Usage
if (require.main === module) {
const manager = new ClusterManager('./app.js', {
maxWorkers: 4,
restartDelay: 2000,
maxRestarts: 5,
});
manager.start();
}
Worker Communication
// master.js
const cluster = require('cluster');
if (cluster.isMaster) {
const workers = [];
// Create workers
for (let i = 0; i < 2; i++) {
const worker = cluster.fork();
workers.push(worker);
// Listen for messages from workers
worker.on('message', (msg) => {
console.log(`Master received: ${JSON.stringify(msg)}`);
if (msg.type === 'task-complete') {
// Broadcast to all other workers
workers.forEach((w) => {
if (w.id !== worker.id) {
w.send({
type: 'peer-update',
data: msg.data,
from: worker.id,
});
}
});
}
});
}
} else {
// worker.js
const express = require('express');
const app = express();
let taskCount = 0;
app.get('/task', (req, res) => {
taskCount++;
// Send message to master
process.send({
type: 'task-complete',
data: { taskCount, worker: cluster.worker.id },
});
res.json({ taskCount, worker: cluster.worker.id });
});
// Listen for messages from master
process.on('message', (msg) => {
if (msg.type === 'peer-update') {
console.log(
`Worker ${cluster.worker.id} received update from ${msg.from}`
);
}
});
app.listen(3000);
}
Process Management with PM2
Installing and Basic Usage
# Install PM2 globally
npm install -g pm2
# Start application
pm2 start app.js
# Start with cluster mode
pm2 start app.js -i max
# Start with specific number of instances
pm2 start app.js -i 4
PM2 Configuration File
Create an ecosystem.config.js
file:
module.exports = {
apps: [
{
name: 'my-app',
script: './app.js',
instances: 'max', // Use all available CPUs
exec_mode: 'cluster',
// Environment variables
env: {
NODE_ENV: 'development',
PORT: 3000,
},
env_production: {
NODE_ENV: 'production',
PORT: 3000,
},
// Resource limits
max_memory_restart: '1G',
// Logging
log_file: './logs/combined.log',
out_file: './logs/out.log',
error_file: './logs/error.log',
log_date_format: 'YYYY-MM-DD HH:mm Z',
// Auto restart
watch: false,
ignore_watch: ['node_modules', 'logs'],
// Graceful restart/reload
kill_timeout: 3000,
wait_ready: true,
listen_timeout: 10000,
// Health monitoring
min_uptime: '10s',
max_restarts: 10,
// Advanced settings
node_args: '--max-old-space-size=4096',
// Load balancing
instance_var: 'INSTANCE_ID',
},
],
};
Advanced PM2 Features
Graceful Shutdown Handling
// app.js
const express = require('express');
const app = express();
let server;
let isShuttingDown = false;
app.get('/', (req, res) => {
if (isShuttingDown) {
return res.status(503).json({ error: 'Service unavailable' });
}
res.json({ message: 'Hello World', pid: process.pid });
});
app.get('/health', (req, res) => {
res.status(isShuttingDown ? 503 : 200).json({
status: isShuttingDown ? 'shutting down' : 'healthy',
pid: process.pid,
uptime: process.uptime(),
});
});
// Graceful shutdown
const gracefulShutdown = (signal) => {
console.log(`Received ${signal}, starting graceful shutdown`);
isShuttingDown = true;
server.close(() => {
console.log('HTTP server closed');
process.exit(0);
});
// Force close after 30 seconds
setTimeout(() => {
console.log('Forcing shutdown');
process.exit(1);
}, 30000);
};
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
// PM2 specific signals
process.on('message', (msg) => {
if (msg === 'shutdown') {
gracefulShutdown('PM2 shutdown');
}
});
server = app.listen(process.env.PORT || 3000, () => {
console.log(`Server running on port ${process.env.PORT || 3000}`);
// Tell PM2 that app is ready
if (process.send) {
process.send('ready');
}
});
PM2 Monitoring and Metrics
// monitoring.js
const pmx = require('@pm2/io');
// Create custom metrics
const httpRequestCounter = pmx.counter({
name: 'HTTP requests',
});
const activeConnections = pmx.metric({
name: 'Active connections',
});
const responseTime = pmx.histogram({
name: 'HTTP response time',
measurement: 'mean',
});
// Middleware to track metrics
const metricsMiddleware = (req, res, next) => {
const start = Date.now();
httpRequestCounter.inc();
res.on('finish', () => {
const duration = Date.now() - start;
responseTime.update(duration);
});
next();
};
// Custom actions
pmx.action('get-stats', (reply) => {
reply({
requests: httpRequestCounter.val(),
connections: activeConnections.val(),
avgResponseTime: responseTime.val(),
});
});
module.exports = {
metricsMiddleware,
activeConnections,
};
Load Balancing Strategies
NGINX Load Balancer
Basic Configuration
# /etc/nginx/sites-available/my-app
upstream nodejs_backend {
# Round-robin (default)
server 127.0.0.1:3000;
server 127.0.0.1:3001;
server 127.0.0.1:3002;
server 127.0.0.1:3003;
}
server {
listen 80;
server_name your-domain.com;
location / {
proxy_pass http://nodejs_backend;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection 'upgrade';
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_cache_bypass $http_upgrade;
# Timeouts
proxy_connect_timeout 60s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
}
}
Advanced NGINX Configuration
upstream nodejs_backend {
# Weighted round-robin
server 127.0.0.1:3000 weight=3;
server 127.0.0.1:3001 weight=2;
server 127.0.0.1:3002 weight=1;
# Health checks
server 127.0.0.1:3003 backup;
# Connection limits
keepalive 32;
}
# Rate limiting
limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;
server {
listen 80;
server_name your-domain.com;
# Security headers
add_header X-Frame-Options SAMEORIGIN;
add_header X-XSS-Protection "1; mode=block";
add_header X-Content-Type-Options nosniff;
# Gzip compression
gzip on;
gzip_types text/plain application/json application/javascript text/css;
location / {
# Rate limiting
limit_req zone=api burst=20 nodelay;
proxy_pass http://nodejs_backend;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection 'upgrade';
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_cache_bypass $http_upgrade;
# Health check
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503;
}
# Health check endpoint
location /health {
access_log off;
proxy_pass http://nodejs_backend;
proxy_set_header Host $host;
}
# Static files
location /static/ {
root /var/www/your-app;
expires 1y;
add_header Cache-Control "public, immutable";
}
}
Application-Level Load Balancing
Custom Load Balancer
const http = require('http');
const httpProxy = require('http-proxy-middleware');
const express = require('express');
class LoadBalancer {
constructor(servers) {
this.servers = servers;
this.current = 0;
this.app = express();
this.setupMiddleware();
this.setupHealthChecks();
}
// Round-robin algorithm
getNextServer() {
const server = this.servers[this.current];
this.current = (this.current + 1) % this.servers.length;
return server;
}
// Weighted round-robin
getWeightedServer() {
const totalWeight = this.servers.reduce((sum, s) => sum + s.weight, 0);
const random = Math.random() * totalWeight;
let currentWeight = 0;
for (const server of this.servers) {
currentWeight += server.weight;
if (random <= currentWeight) {
return server;
}
}
return this.servers[0];
}
// Least connections
getLeastConnectionsServer() {
return this.servers.reduce((prev, curr) =>
prev.connections < curr.connections ? prev : curr
);
}
setupMiddleware() {
this.app.use((req, res, next) => {
const server = this.getNextServer();
if (!server.healthy) {
return res.status(503).json({ error: 'Service unavailable' });
}
// Track connections
server.connections++;
const proxy = httpProxy.createProxyMiddleware({
target: `http://${server.host}:${server.port}`,
changeOrigin: true,
onError: (err, req, res) => {
console.error(`Proxy error for ${server.host}:${server.port}:`, err);
server.healthy = false;
res.status(502).json({ error: 'Bad Gateway' });
},
onProxyRes: () => {
server.connections--;
},
});
proxy(req, res, next);
});
}
setupHealthChecks() {
setInterval(() => {
this.servers.forEach((server) => {
this.checkHealth(server);
});
}, 30000); // Check every 30 seconds
}
async checkHealth(server) {
try {
const response = await fetch(
`http://${server.host}:${server.port}/health`
);
server.healthy = response.ok;
} catch (error) {
server.healthy = false;
}
}
start(port = 8080) {
this.app.listen(port, () => {
console.log(`Load balancer running on port ${port}`);
console.log(`Balancing across ${this.servers.length} servers`);
});
}
}
// Usage
const loadBalancer = new LoadBalancer([
{ host: 'localhost', port: 3000, weight: 3, healthy: true, connections: 0 },
{ host: 'localhost', port: 3001, weight: 2, healthy: true, connections: 0 },
{ host: 'localhost', port: 3002, weight: 1, healthy: true, connections: 0 },
]);
loadBalancer.start(8080);
Session Management in Clustered Apps
Sticky Sessions with Redis
const express = require('express');
const session = require('express-session');
const RedisStore = require('connect-redis')(session);
const redis = require('redis');
const app = express();
// Redis client
const redisClient = redis.createClient({
host: process.env.REDIS_HOST || 'localhost',
port: process.env.REDIS_PORT || 6379,
retry_strategy: (options) => {
if (options.error && options.error.code === 'ECONNREFUSED') {
return new Error('Redis server connection refused');
}
if (options.total_retry_time > 1000 * 60 * 60) {
return new Error('Redis retry time exhausted');
}
if (options.attempt > 10) {
return undefined;
}
return Math.min(options.attempt * 100, 3000);
},
});
// Session configuration
app.use(
session({
store: new RedisStore({ client: redisClient }),
secret: process.env.SESSION_SECRET || 'your-secret-key',
resave: false,
saveUninitialized: false,
rolling: true,
cookie: {
secure: process.env.NODE_ENV === 'production',
httpOnly: true,
maxAge: 24 * 60 * 60 * 1000, // 24 hours
},
})
);
app.get('/', (req, res) => {
if (!req.session.views) {
req.session.views = 0;
}
req.session.views++;
res.json({
message: 'Session-based counter',
views: req.session.views,
worker: process.pid,
sessionId: req.sessionID,
});
});
app.listen(process.env.PORT || 3000);
JWT-Based Stateless Authentication
const express = require('express');
const jwt = require('jsonwebtoken');
const bcrypt = require('bcrypt');
const app = express();
app.use(express.json());
const JWT_SECRET = process.env.JWT_SECRET || 'your-jwt-secret';
// Mock user database
const users = [{ id: 1, username: 'user1', password: '$2b$10$hash...' }];
// Authentication middleware
const authenticateToken = (req, res, next) => {
const authHeader = req.headers['authorization'];
const token = authHeader && authHeader.split(' ')[1];
if (!token) {
return res.sendStatus(401);
}
jwt.verify(token, JWT_SECRET, (err, user) => {
if (err) return res.sendStatus(403);
req.user = user;
next();
});
};
// Login endpoint
app.post('/login', async (req, res) => {
const { username, password } = req.body;
const user = users.find((u) => u.username === username);
if (!user) {
return res.status(401).json({ error: 'Invalid credentials' });
}
const validPassword = await bcrypt.compare(password, user.password);
if (!validPassword) {
return res.status(401).json({ error: 'Invalid credentials' });
}
const token = jwt.sign({ id: user.id, username: user.username }, JWT_SECRET, {
expiresIn: '24h',
});
res.json({
token,
user: { id: user.id, username: user.username },
worker: process.pid,
});
});
// Protected route
app.get('/profile', authenticateToken, (req, res) => {
res.json({
user: req.user,
worker: process.pid,
timestamp: new Date().toISOString(),
});
});
app.listen(process.env.PORT || 3000);
Performance Monitoring and Optimization
Application Performance Monitoring
// monitoring.js
const express = require('express');
const promClient = require('prom-client');
// Create custom metrics registry
const register = new promClient.Registry();
// Default metrics
promClient.collectDefaultMetrics({
register,
prefix: 'nodejs_app_',
});
// Custom metrics
const httpRequestDuration = new promClient.Histogram({
name: 'http_request_duration_seconds',
help: 'Duration of HTTP requests in seconds',
labelNames: ['method', 'route', 'status_code'],
buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10],
});
const httpRequestTotal = new promClient.Counter({
name: 'http_requests_total',
help: 'Total number of HTTP requests',
labelNames: ['method', 'route', 'status_code'],
});
const activeConnections = new promClient.Gauge({
name: 'active_connections',
help: 'Number of active connections',
});
register.registerMetric(httpRequestDuration);
register.registerMetric(httpRequestTotal);
register.registerMetric(activeConnections);
// Middleware for metrics collection
const metricsMiddleware = (req, res, next) => {
const start = Date.now();
activeConnections.inc();
res.on('finish', () => {
const duration = (Date.now() - start) / 1000;
const route = req.route?.path || req.path;
httpRequestDuration
.labels(req.method, route, res.statusCode)
.observe(duration);
httpRequestTotal.labels(req.method, route, res.statusCode).inc();
activeConnections.dec();
});
next();
};
// Metrics endpoint
const setupMetricsEndpoint = (app) => {
app.get('/metrics', async (req, res) => {
try {
res.set('Content-Type', register.contentType);
const metrics = await register.metrics();
res.end(metrics);
} catch (error) {
res.status(500).end(error);
}
});
};
module.exports = {
metricsMiddleware,
setupMetricsEndpoint,
register,
};
Resource Usage Monitoring
// resource-monitor.js
const os = require('os');
const process = require('process');
class ResourceMonitor {
constructor(options = {}) {
this.interval = options.interval || 5000;
this.alerts = options.alerts || {};
this.monitoring = false;
this.stats = {
cpu: [],
memory: [],
eventLoop: [],
};
}
start() {
if (this.monitoring) return;
this.monitoring = true;
console.log('Resource monitoring started');
this.monitoringInterval = setInterval(() => {
this.collectMetrics();
}, this.interval);
}
stop() {
if (!this.monitoring) return;
this.monitoring = false;
clearInterval(this.monitoringInterval);
console.log('Resource monitoring stopped');
}
collectMetrics() {
const metrics = {
timestamp: Date.now(),
cpu: this.getCPUUsage(),
memory: this.getMemoryUsage(),
eventLoop: this.getEventLoopDelay(),
process: this.getProcessInfo(),
};
// Store metrics (keep last 100 measurements)
Object.keys(this.stats).forEach((key) => {
if (this.stats[key].length >= 100) {
this.stats[key].shift();
}
if (metrics[key] !== undefined) {
this.stats[key].push(metrics[key]);
}
});
// Check alerts
this.checkAlerts(metrics);
return metrics;
}
getCPUUsage() {
const usage = process.cpuUsage();
return {
user: usage.user / 1000000, // Convert to seconds
system: usage.system / 1000000,
total: (usage.user + usage.system) / 1000000,
};
}
getMemoryUsage() {
const memUsage = process.memoryUsage();
const systemMem = {
total: os.totalmem(),
free: os.freemem(),
used: os.totalmem() - os.freemem(),
};
return {
process: {
rss: memUsage.rss,
heapTotal: memUsage.heapTotal,
heapUsed: memUsage.heapUsed,
external: memUsage.external,
arrayBuffers: memUsage.arrayBuffers,
},
system: systemMem,
usage: {
processPercent: (memUsage.rss / systemMem.total) * 100,
systemPercent: (systemMem.used / systemMem.total) * 100,
},
};
}
getEventLoopDelay() {
const start = process.hrtime.bigint();
setImmediate(() => {
const delay = Number(process.hrtime.bigint() - start) / 1000000; // Convert to ms
this.lastEventLoopDelay = delay;
});
return this.lastEventLoopDelay || 0;
}
getProcessInfo() {
return {
pid: process.pid,
uptime: process.uptime(),
version: process.version,
platform: process.platform,
arch: process.arch,
};
}
checkAlerts(metrics) {
// Memory alert
if (
this.alerts.memory &&
metrics.memory.usage.processPercent > this.alerts.memory
) {
console.warn(
`High memory usage: ${metrics.memory.usage.processPercent.toFixed(2)}%`
);
}
// Event loop delay alert
if (
this.alerts.eventLoopDelay &&
metrics.eventLoop > this.alerts.eventLoopDelay
) {
console.warn(`High event loop delay: ${metrics.eventLoop.toFixed(2)}ms`);
}
}
getStats() {
return {
current: this.collectMetrics(),
history: this.stats,
averages: {
memory:
this.stats.memory.length > 0
? this.stats.memory.reduce(
(sum, m) => sum + m.usage.processPercent,
0
) / this.stats.memory.length
: 0,
eventLoop:
this.stats.eventLoop.length > 0
? this.stats.eventLoop.reduce((sum, e) => sum + e, 0) /
this.stats.eventLoop.length
: 0,
},
};
}
}
// Usage
const monitor = new ResourceMonitor({
interval: 5000,
alerts: {
memory: 80, // Alert at 80% memory usage
eventLoopDelay: 100, // Alert at 100ms event loop delay
},
});
monitor.start();
module.exports = ResourceMonitor;
Docker and Containerization
Dockerfile for Node.js Clustering
# Multi-stage build
FROM node:18-alpine AS builder
WORKDIR /app
# Copy package files
COPY package*.json ./
# Install dependencies
RUN npm ci --only=production && npm cache clean --force
# Copy source code
COPY . .
# Production stage
FROM node:18-alpine AS production
# Create app user
RUN addgroup -g 1001 -S nodejs && \
adduser -S nodejs -u 1001
WORKDIR /app
# Copy built application
COPY /app .
# Install PM2 globally
RUN npm install -g pm2
# Switch to non-root user
USER nodejs
# Expose port
EXPOSE 3000
# Health check
HEALTHCHECK \
CMD curl -f http://localhost:3000/health || exit 1
# Start with PM2
CMD ["pm2-runtime", "start", "ecosystem.config.js"]
Docker Compose for Load Balanced Setup
# docker-compose.yml
version: '3.8'
services:
nginx:
image: nginx:alpine
ports:
- '80:80'
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
depends_on:
- app1
- app2
- app3
restart: unless-stopped
app1:
build: .
environment:
- NODE_ENV=production
- PORT=3000
- INSTANCE_ID=1
volumes:
- ./logs:/app/logs
restart: unless-stopped
app2:
build: .
environment:
- NODE_ENV=production
- PORT=3000
- INSTANCE_ID=2
volumes:
- ./logs:/app/logs
restart: unless-stopped
app3:
build: .
environment:
- NODE_ENV=production
- PORT=3000
- INSTANCE_ID=3
volumes:
- ./logs:/app/logs
restart: unless-stopped
redis:
image: redis:alpine
ports:
- '6379:6379'
volumes:
- redis_data:/data
restart: unless-stopped
prometheus:
image: prom/prometheus
ports:
- '9090:9090'
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
restart: unless-stopped
grafana:
image: grafana/grafana
ports:
- '3001:3000'
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- grafana_data:/var/lib/grafana
restart: unless-stopped
volumes:
redis_data:
grafana_data:
Production Deployment Best Practices
Environment Configuration
// config/index.js
const development = {
port: 3000,
database: {
host: 'localhost',
port: 5432,
name: 'myapp_dev',
},
redis: {
host: 'localhost',
port: 6379,
},
logging: {
level: 'debug',
},
};
const production = {
port: process.env.PORT || 3000,
database: {
host: process.env.DB_HOST,
port: process.env.DB_PORT || 5432,
name: process.env.DB_NAME,
username: process.env.DB_USER,
password: process.env.DB_PASS,
ssl: true,
pool: {
min: 5,
max: 20,
acquire: 60000,
idle: 10000,
},
},
redis: {
host: process.env.REDIS_HOST,
port: process.env.REDIS_PORT || 6379,
password: process.env.REDIS_PASSWORD,
tls: process.env.REDIS_TLS === 'true',
},
logging: {
level: 'info',
},
clustering: {
instances: process.env.WEB_CONCURRENCY || 'max',
maxMemoryRestart: '1G',
},
};
const config = {
development,
production,
};
module.exports = config[process.env.NODE_ENV || 'development'];
Deployment Script
#!/bin/bash
# deploy.sh
set -e
echo "Starting deployment..."
# Build application
echo "Building application..."
npm run build
# Run tests
echo "Running tests..."
npm test
# Stop existing processes
echo "Stopping existing processes..."
pm2 stop ecosystem.config.js || true
# Update dependencies
echo "Installing dependencies..."
npm ci --only=production
# Database migrations (if applicable)
echo "Running database migrations..."
npm run migrate
# Start application
echo "Starting application..."
pm2 start ecosystem.config.js
# Save PM2 configuration
pm2 save
echo "Deployment completed successfully!"
# Optional: run health check
sleep 10
curl -f http://localhost/health || (echo "Health check failed" && exit 1)
echo "Application is healthy and running!"
Health Check Implementation
// health.js
const express = require('express');
const router = express.Router();
// Database connection check
const checkDatabase = async () => {
try {
// Replace with your database client
await db.raw('SELECT 1');
return { status: 'healthy', latency: Date.now() - start };
} catch (error) {
return { status: 'unhealthy', error: error.message };
}
};
// Redis connection check
const checkRedis = async () => {
const start = Date.now();
try {
await redisClient.ping();
return { status: 'healthy', latency: Date.now() - start };
} catch (error) {
return { status: 'unhealthy', error: error.message };
}
};
// External service check
const checkExternalServices = async () => {
const checks = {};
// Check each external service
const services = [
{ name: 'payment-service', url: process.env.PAYMENT_SERVICE_URL },
{ name: 'auth-service', url: process.env.AUTH_SERVICE_URL },
];
for (const service of services) {
const start = Date.now();
try {
const response = await fetch(`${service.url}/health`, {
timeout: 5000,
});
checks[service.name] = {
status: response.ok ? 'healthy' : 'unhealthy',
latency: Date.now() - start,
statusCode: response.status,
};
} catch (error) {
checks[service.name] = {
status: 'unhealthy',
error: error.message,
latency: Date.now() - start,
};
}
}
return checks;
};
// Health check endpoint
router.get('/health', async (req, res) => {
const start = Date.now();
const health = {
status: 'healthy',
timestamp: new Date().toISOString(),
uptime: process.uptime(),
pid: process.pid,
memory: process.memoryUsage(),
version: process.env.npm_package_version,
environment: process.env.NODE_ENV,
};
try {
// Perform health checks
const [database, redis, externalServices] = await Promise.all([
checkDatabase(),
checkRedis(),
checkExternalServices(),
]);
health.checks = {
database,
redis,
externalServices,
};
// Determine overall health
const isHealthy =
database.status === 'healthy' &&
redis.status === 'healthy' &&
Object.values(externalServices).every(
(service) => service.status === 'healthy'
);
health.status = isHealthy ? 'healthy' : 'unhealthy';
health.responseTime = Date.now() - start;
const statusCode = health.status === 'healthy' ? 200 : 503;
res.status(statusCode).json(health);
} catch (error) {
health.status = 'unhealthy';
health.error = error.message;
health.responseTime = Date.now() - start;
res.status(503).json(health);
}
});
// Readiness probe (Kubernetes)
router.get('/ready', (req, res) => {
// Check if application is ready to serve requests
res.status(200).json({
status: 'ready',
timestamp: new Date().toISOString(),
});
});
// Liveness probe (Kubernetes)
router.get('/live', (req, res) => {
// Simple check to see if process is alive
res.status(200).json({
status: 'alive',
timestamp: new Date().toISOString(),
});
});
module.exports = router;
Conclusion
Scaling Node.js applications requires a multi-faceted approach combining clustering, load balancing, and proper monitoring. Key takeaways:
Essential Strategies
- Use the Cluster Module for CPU-intensive applications
- Implement PM2 for production process management
- Deploy Load Balancers (NGINX/HAProxy) for high availability
- Monitor Performance with metrics and health checks
- Handle Sessions Properly in distributed environments
Production Checklist
- ✅ Cluster configuration optimized for server resources
- ✅ Graceful shutdown and restart mechanisms
- ✅ Session management strategy implemented
- ✅ Load balancer configured with health checks
- ✅ Monitoring and alerting systems in place
- ✅ Automated deployment processes
- ✅ Container orchestration (Docker/Kubernetes)
- ✅ Database connection pooling
- ✅ Caching strategies implemented
- ✅ Security hardening completed
Performance Tips
- Monitor event loop delay and memory usage
- Implement proper error handling and circuit breakers
- Use connection pooling for databases
- Implement caching at multiple levels
- Regular performance profiling and optimization
By following these patterns and best practices, you can build Node.js applications that scale efficiently and maintain high availability under increasing load.
Remember to continuously monitor your application's performance and adjust scaling strategies based on actual usage patterns and requirements.