Node.js performance optimization requires understanding the single-threaded event loop, V8's garbage collector, and how to leverage system resources effectively. This guide covers practical techniques to improve throughput, reduce latency, and handle more concurrent connections on your VPS.
Understanding the Event Loop
The event loop is the heart of Node.js. Performance problems almost always trace back to blocking it:
// BAD: Blocking the event loop with synchronous computation
app.get('/report', (req, res) => {
const data = fs.readFileSync('/large/file.csv'); // Blocks!
const result = processData(data); // CPU-intensive, blocks!
res.json(result);
});
// GOOD: Non-blocking approach
app.get('/report', async (req, res) => {
const data = await fs.promises.readFile('/large/file.csv');
const result = await processInWorker(data);
res.json(result);
});
Detecting Event Loop Lag
// Monitor event loop delay
const { monitorEventLoopDelay } = require('node:perf_hooks');
const h = monitorEventLoopDelay({ resolution: 20 });
h.enable();
setInterval(() => {
console.log(`Event loop p99: ${(h.percentile(99) / 1e6).toFixed(1)}ms`);
h.reset();
}, 5000);
Clustering for Multi-Core Utilization
Node.js runs on a single core by default. Use the cluster module to utilize all CPU cores:
const cluster = require('node:cluster');
const os = require('node:os');
if (cluster.isPrimary) {
const numWorkers = os.cpus().length;
console.log(`Primary ${process.pid} starting ${numWorkers} workers`);
for (let i = 0; i < numWorkers; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
console.log(`Worker ${worker.process.pid} died, restarting...`);
cluster.fork();
});
} else {
const app = require('./app');
app.listen(3000);
}
For production, use PM2 which handles clustering, monitoring, and restarts:
# Install PM2
npm install -g pm2
# Start with cluster mode (all cores)
pm2 start app.js -i max --name "myapp"
# Or specify exact worker count
pm2 start app.js -i 4 --name "myapp"
# ecosystem.config.js for full configuration
module.exports = {
apps: [{
name: 'myapp',
script: './app.js',
instances: 'max',
exec_mode: 'cluster',
max_memory_restart: '1G',
node_args: '--max-old-space-size=2048',
env: {
NODE_ENV: 'production',
UV_THREADPOOL_SIZE: 16
}
}]
};
Worker Threads for CPU-Intensive Tasks
// worker-pool.js
const { Worker } = require('node:worker_threads');
const os = require('node:os');
class WorkerPool {
constructor(workerFile, poolSize = os.cpus().length) {
this.workers = [];
this.queue = [];
for (let i = 0; i < poolSize; i++) {
this.addWorker(workerFile);
}
}
addWorker(workerFile) {
const worker = new Worker(workerFile);
worker.busy = false;
worker.on('message', (result) => {
worker.busy = false;
worker.resolve(result);
this.processQueue();
});
this.workers.push(worker);
}
runTask(data) {
return new Promise((resolve, reject) => {
const available = this.workers.find(w => !w.busy);
if (available) {
available.busy = true;
available.resolve = resolve;
available.postMessage(data);
} else {
this.queue.push({ data, resolve, reject });
}
});
}
processQueue() {
if (this.queue.length === 0) return;
const available = this.workers.find(w => !w.busy);
if (!available) return;
const { data, resolve } = this.queue.shift();
available.busy = true;
available.resolve = resolve;
available.postMessage(data);
}
}
module.exports = WorkerPool;
Memory Optimization
V8 Heap Configuration
# Set max heap size (default ~1.5GB)
node --max-old-space-size=4096 app.js
# Monitor memory usage
node --trace-gc app.js 2>&1 | grep "Mark-Sweep"
Avoiding Memory Leaks
// Common leak: unbounded caches
// BAD
const cache = {};
app.get('/data/:id', (req, res) => {
if (!cache[req.params.id]) {
cache[req.params.id] = fetchFromDB(req.params.id);
}
res.json(cache[req.params.id]);
});
// GOOD: Use LRU cache with size limit
const { LRUCache } = require('lru-cache');
const cache = new LRUCache({ max: 1000, ttl: 1000 * 60 * 5 });
// Common leak: event listener accumulation
// BAD
function handleRequest(req, res) {
db.on('error', (err) => res.status(500).send(err)); // Leaks!
}
// GOOD
function handleRequest(req, res) {
db.once('error', (err) => res.status(500).send(err));
}
Stream Processing
Streams prevent loading entire files/responses into memory:
const { pipeline } = require('node:stream/promises');
const { createReadStream, createWriteStream } = require('node:fs');
const { createGzip } = require('node:zlib');
// Stream a large file with compression
app.get('/download', async (req, res) => {
res.setHeader('Content-Encoding', 'gzip');
res.setHeader('Content-Type', 'application/octet-stream');
await pipeline(
createReadStream('/data/large-file.csv'),
createGzip(),
res
);
});
// Stream JSON for large datasets
app.get('/users', async (req, res) => {
res.setHeader('Content-Type', 'application/json');
res.write('[');
let first = true;
for await (const user of db.streamUsers()) {
if (!first) res.write(',');
res.write(JSON.stringify(user));
first = false;
}
res.end(']');
});
HTTP Performance
// Enable keep-alive for outgoing requests
const http = require('node:http');
const https = require('node:https');
const httpAgent = new http.Agent({ keepAlive: true, maxSockets: 100 });
const httpsAgent = new https.Agent({ keepAlive: true, maxSockets: 100 });
// Use undici for significantly faster HTTP client
const { request } = require('undici');
const { body } = await request('https://api.example.com/data');
const data = await body.json();
Profiling in Production
# CPU profile with clinic.js
npm install -g clinic
clinic doctor -- node app.js
# Then run load test, Ctrl+C, and it opens a report
# Or use built-in inspector
node --inspect app.js
# Connect Chrome DevTools to chrome://inspect
# Heap snapshot
kill -USR2 $(pgrep -f "node app.js")
# With: --heapsnapshot-signal=SIGUSR2
UV Thread Pool
Node.js uses libuv's thread pool for file I/O, DNS lookups, and crypto. The default size of 4 is too small for I/O-heavy apps:
# Increase thread pool size (set BEFORE requiring any modules)
UV_THREADPOOL_SIZE=16 node app.js
# In ecosystem.config.js
env: { UV_THREADPOOL_SIZE: 16 }
Summary
Node.js optimization follows a hierarchy: first, never block the event loop. Second, use clustering or PM2 to utilize all CPU cores. Third, offload CPU work to worker threads. Fourth, manage memory carefully with bounded caches and streams. Finally, profile regularly with clinic.js or the built-in inspector to catch regressions early. These techniques combined can improve throughput by 5-10x on a multi-core VPS.