MCP Server Graceful Shutdown and Connection Pooling
Implementing reliable graceful shutdown and connection cleanup patterns in MCP servers to ensure stability during deployments and scaling operations.
Graceful shutdown and connection pooling are critical yet often overlooked aspects of production MCP server deployments. When you're running distributed systems handling real client requests, the difference between a hard stop and a graceful shutdown can mean dropped connections, incomplete operations, and angry engineers at 2 AM. Let's build a reliable foundation.
The Problem
Picture this: your MCP server is handling active connections when Kubernetes decides it's time to redeploy. A SIGTERM arrives. If you shut down immediately, you might:
- Abandon in-flight operations that clients are waiting on
- Leave database connections in inconsistent states
- Force clients to reconnect and retry, cascading errors upstream
- Lose billing or audit data mid-transaction
Even within a single server process, if your connection pool doesn't properly manage lifecycle, you can leak connections, exhaust database limits, or timeout under load.
Connection Pooling: The Foundation
Before handling shutdown, you need a solid pool. Here's a practical pattern using a simple queue:
class ManagedConnectionPool {
constructor(config = {}) {
this.maxConnections = config.maxConnections || 10;
this.acquireTimeoutMs = config.acquireTimeoutMs || 5000;
this.idleTimeoutMs = config.idleTimeoutMs || 30000;
this.available = [];
this.inUse = new Set();
this.pendingRequests = [];
this.stats = {
acquired: 0,
released: 0,
reused: 0,
created: 0,
};
}
async acquire() {
// Try to reuse idle connection
while (this.available.length > 0) {
const conn = this.available.pop();
if (await this._isHealthy(conn)) {
this.inUse.add(conn);
this.stats.reused++;
return conn;
}
// Stale connection, discard
await conn.close();
}
// Create new if under limit
if (this.inUse.size + this.available.length < this.maxConnections) {
const conn = await this._createConnection();
this.inUse.add(conn);
this.stats.created++;
return conn;
}
// Queue for next available
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
const idx = this.pendingRequests.indexOf(handler);
if (idx !== -1) this.pendingRequests.splice(idx, 1);
reject(new Error(`Connection acquire timeout after ${this.acquireTimeoutMs}ms`));
}, this.acquireTimeoutMs);
const handler = (conn) => {
clearTimeout(timeout);
this.inUse.add(conn);
resolve(conn);
};
this.pendingRequests.push(handler);
});
}
release(conn) {
this.inUse.delete(conn);
this.stats.released++;
if (this.pendingRequests.length > 0) {
const handler = this.pendingRequests.shift();
handler(conn);
} else {
this.available.push(conn);
this._scheduleIdleTimeout(conn);
}
}
_scheduleIdleTimeout(conn) {
conn.idleTimer = setTimeout(async () => {
const idx = this.available.indexOf(conn);
if (idx !== -1) {
this.available.splice(idx, 1);
await conn.close();
}
}, this.idleTimeoutMs);
}
async _isHealthy(conn) {
try {
if (conn.idleTimer) clearTimeout(conn.idleTimer);
// Quick ping test
await conn.ping?.();
return true;
} catch {
return false;
}
}
async _createConnection() {
// Placeholder — implement for your backend
return { ping: async () => {}, close: async () => {} };
}
async drain() {
// For graceful shutdown
const drainTimeoutMs = 30000;
const start = Date.now();
while ((this.inUse.size > 0 || this.pendingRequests.length > 0) &&
Date.now() - start < drainTimeoutMs) {
await new Promise(r => setTimeout(r, 100));
}
if (this.inUse.size > 0) {
console.warn(`drain: ${this.inUse.size} connections still in use after timeout`);
}
// Close everything
for (const conn of this.available) {
try {
if (conn.idleTimer) clearTimeout(conn.idleTimer);
await conn.close();
} catch (e) {
console.error(`Error closing connection: ${e.message}`);
}
}
this.available = [];
return this.stats;
}
}
Graceful Shutdown Handler
Now integrate with your server lifecycle:
class MCPServer {
constructor(pool) {
this.pool = pool;
this.server = null;
this.isShuttingDown = false;
this.activeRequests = new Set();
}
start(port = 3000) {
this.server = require('http').createServer(async (req, res) => {
if (this.isShuttingDown) {
res.writeHead(503, { 'Retry-After': '10' });
res.end('Server is shutting down');
return;
}
const requestId = Math.random().toString(36).slice(2);
this.activeRequests.add(requestId);
try {
const conn = await this.pool.acquire();
try {
const result = await this._handleRequest(req, conn);
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(result));
} finally {
this.pool.release(conn);
}
} catch (err) {
res.writeHead(500, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: err.message }));
} finally {
this.activeRequests.delete(requestId);
}
});
this.server.listen(port, () => {
console.log(`MCP server listening on port ${port}`);
});
this._setupSignalHandlers();
}
_setupSignalHandlers() {
const gracefulShutdown = async (signal) => {
console.log(`\nReceived ${signal}, starting graceful shutdown...`);
this.isShuttingDown = true;
// Stop accepting new connections
this.server.close(() => {
console.log('HTTP server closed');
});
// Wait for in-flight requests
const shutdownTimeoutMs = 30000;
const start = Date.now();
while (this.activeRequests.size > 0 && Date.now() - start < shutdownTimeoutMs) {
console.log(`Waiting for ${this.activeRequests.size} in-flight requests...`);
await new Promise(r => setTimeout(r, 1000));
}
if (this.activeRequests.size > 0) {
console.warn(`Force-closing ${this.activeRequests.size} requests after timeout`);
}
// Drain the pool
const poolStats = await this.pool.drain();
console.log(`Pool drained:`, poolStats);
console.log('Graceful shutdown complete');
process.exit(0);
};
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
}
async _handleRequest(req, conn) {
// Your business logic here
return { status: 'ok', requestedAt: new Date().toISOString() };
}
}
// Usage
const pool = new ManagedConnectionPool({ maxConnections: 20 });
const server = new MCPServer(pool);
server.start();
Production Checklist
Deployment signals: Ensure your container orchestrator sends SIGTERM and gives your process time to drain (liveness probe timeout \< readiness probe grace period).
Load balancer coordination: Drain MCP servers from the load balancer's backend list before sending SIGTERM. Some orchestrators handle this automatically with preStop hooks.
Database constraints: If your backend database has connection limits, set maxConnections well below the per-process limit, accounting for multiple replicas.
Monitoring: Track pool stats — connection reuse ratios, acquire timeouts, drain stats. A spike in timeouts often signals upcoming bottlenecks.
Testing: Chaos-test graceful shutdown by killing your process during active requests. Verify no connections leak and clients receive proper status codes.
The Business Value
Solid connection pooling and graceful shutdown reduce incident severity:
- Fewer orphaned operations → lower retry storms
- Faster deployments → zero-downtime rolling updates
- Stable latency → connection reuse beats creating fresh connections every time
In systems handling sensitive operations (authentication, payments, audit logs), the discipline of graceful shutdown also creates an audit trail — no mysterious gaps in the ledger when a pod restarts.
The patterns above are proven in production systems handling millions of requests. The investment in clean lifecycle management pays dividends in reliability and operational peace.