fix(skill-mcp-manager): prevent memory leaks from orphaned MCP processes (#453)
* fix(skill-mcp-manager): prevent memory leaks from orphaned MCP processes - Close transport on connection failure to prevent zombie processes - Add process exit handlers (SIGINT/SIGTERM) for graceful cleanup - Use pendingConnections Map to prevent duplicate client spawns Fixes #361 🤖 GENERATED WITH ASSISTANCE OF [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode) * fix(ci): replace deprecated rhysd/actionlint-action with direct installation rhysd/actionlint-action repository was removed/archived. Use official actionlint download script instead. 🤖 GENERATED WITH ASSISTANCE OF [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode) * fix(skill-mcp-manager): add transport.close() and idle timeout to fix memory leaks Previously, disconnectSession() and disconnectAll() only called client.close() but NOT transport.close(). StdioClientTransport spawns child processes for MCP servers, and without transport.close(), these processes remained orphaned and accumulated memory (6GB leak reported). Changes: - Added missing transport.close() calls in disconnectSession() and disconnectAll() - Added idle timeout mechanism (5-minute timeout) with lastUsedAt tracking - Added cleanup timer that runs every 60 seconds to remove idle clients - Made signal handlers (SIGINT, SIGTERM, SIGBREAK) async to properly await cleanup - Ensure proper cleanup order: clear from map first, then close client, then close transport 🤖 GENERATED WITH ASSISTANCE OF [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode) * fix(ci): pin actionlint download script to v1.7.10 for supply chain security - Pin to specific release tag instead of 'main' branch - Prevents potential supply chain attacks from upstream compromises 🤖 GENERATED WITH ASSISTANCE OF [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)
This commit is contained in:
8
.github/workflows/lint-workflows.yml
vendored
8
.github/workflows/lint-workflows.yml
vendored
@@ -14,7 +14,9 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
|
|
||||||
|
- name: Install actionlint
|
||||||
|
run: |
|
||||||
|
bash <(curl -sSL https://raw.githubusercontent.com/rhysd/actionlint/v1.7.10/scripts/download-actionlint.bash)
|
||||||
|
|
||||||
- name: Run actionlint
|
- name: Run actionlint
|
||||||
uses: rhysd/actionlint-action@v1
|
run: ./actionlint -color -shellcheck=""
|
||||||
with:
|
|
||||||
fail-on-warning: false
|
|
||||||
|
|||||||
@@ -9,15 +9,61 @@ interface ManagedClient {
|
|||||||
client: Client
|
client: Client
|
||||||
transport: StdioClientTransport
|
transport: StdioClientTransport
|
||||||
skillName: string
|
skillName: string
|
||||||
|
lastUsedAt: number
|
||||||
}
|
}
|
||||||
|
|
||||||
export class SkillMcpManager {
|
export class SkillMcpManager {
|
||||||
private clients: Map<string, ManagedClient> = new Map()
|
private clients: Map<string, ManagedClient> = new Map()
|
||||||
|
private pendingConnections: Map<string, Promise<Client>> = new Map()
|
||||||
|
private cleanupRegistered = false
|
||||||
|
private cleanupInterval: ReturnType<typeof setInterval> | null = null
|
||||||
|
private readonly IDLE_TIMEOUT = 5 * 60 * 1000
|
||||||
|
|
||||||
private getClientKey(info: SkillMcpClientInfo): string {
|
private getClientKey(info: SkillMcpClientInfo): string {
|
||||||
return `${info.sessionID}:${info.skillName}:${info.serverName}`
|
return `${info.sessionID}:${info.skillName}:${info.serverName}`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private registerProcessCleanup(): void {
|
||||||
|
if (this.cleanupRegistered) return
|
||||||
|
this.cleanupRegistered = true
|
||||||
|
|
||||||
|
const cleanup = async () => {
|
||||||
|
for (const [, managed] of this.clients) {
|
||||||
|
try {
|
||||||
|
await managed.client.close()
|
||||||
|
} catch {
|
||||||
|
// Ignore errors during cleanup
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await managed.transport.close()
|
||||||
|
} catch {
|
||||||
|
// Transport may already be terminated
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.clients.clear()
|
||||||
|
this.pendingConnections.clear()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: 'exit' event is synchronous-only in Node.js, so we use 'beforeExit' for async cleanup
|
||||||
|
// However, 'beforeExit' is not emitted on explicit process.exit() calls
|
||||||
|
// Signal handlers are made async to properly await cleanup
|
||||||
|
|
||||||
|
process.on("SIGINT", async () => {
|
||||||
|
await cleanup()
|
||||||
|
process.exit(0)
|
||||||
|
})
|
||||||
|
process.on("SIGTERM", async () => {
|
||||||
|
await cleanup()
|
||||||
|
process.exit(0)
|
||||||
|
})
|
||||||
|
if (process.platform === "win32") {
|
||||||
|
process.on("SIGBREAK", async () => {
|
||||||
|
await cleanup()
|
||||||
|
process.exit(0)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async getOrCreateClient(
|
async getOrCreateClient(
|
||||||
info: SkillMcpClientInfo,
|
info: SkillMcpClientInfo,
|
||||||
config: ClaudeCodeMcpServer
|
config: ClaudeCodeMcpServer
|
||||||
@@ -26,12 +72,26 @@ export class SkillMcpManager {
|
|||||||
const existing = this.clients.get(key)
|
const existing = this.clients.get(key)
|
||||||
|
|
||||||
if (existing) {
|
if (existing) {
|
||||||
|
existing.lastUsedAt = Date.now()
|
||||||
return existing.client
|
return existing.client
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Prevent race condition: if a connection is already in progress, wait for it
|
||||||
|
const pending = this.pendingConnections.get(key)
|
||||||
|
if (pending) {
|
||||||
|
return pending
|
||||||
|
}
|
||||||
|
|
||||||
const expandedConfig = expandEnvVarsInObject(config)
|
const expandedConfig = expandEnvVarsInObject(config)
|
||||||
const client = await this.createClient(info, expandedConfig)
|
const connectionPromise = this.createClient(info, expandedConfig)
|
||||||
|
this.pendingConnections.set(key, connectionPromise)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const client = await connectionPromise
|
||||||
return client
|
return client
|
||||||
|
} finally {
|
||||||
|
this.pendingConnections.delete(key)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private async createClient(
|
private async createClient(
|
||||||
@@ -65,6 +125,8 @@ export class SkillMcpManager {
|
|||||||
Object.assign(mergedEnv, config.env)
|
Object.assign(mergedEnv, config.env)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.registerProcessCleanup()
|
||||||
|
|
||||||
const transport = new StdioClientTransport({
|
const transport = new StdioClientTransport({
|
||||||
command,
|
command,
|
||||||
args,
|
args,
|
||||||
@@ -80,6 +142,12 @@ export class SkillMcpManager {
|
|||||||
try {
|
try {
|
||||||
await client.connect(transport)
|
await client.connect(transport)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
// Close transport to prevent orphaned MCP process on connection failure
|
||||||
|
try {
|
||||||
|
await transport.close()
|
||||||
|
} catch {
|
||||||
|
// Process may already be terminated
|
||||||
|
}
|
||||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Failed to connect to MCP server "${info.serverName}".\n\n` +
|
`Failed to connect to MCP server "${info.serverName}".\n\n` +
|
||||||
@@ -92,7 +160,8 @@ export class SkillMcpManager {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
this.clients.set(key, { client, transport, skillName: info.skillName })
|
this.clients.set(key, { client, transport, skillName: info.skillName, lastUsedAt: Date.now() })
|
||||||
|
this.startCleanupTimer()
|
||||||
return client
|
return client
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -102,26 +171,64 @@ export class SkillMcpManager {
|
|||||||
for (const [key, managed] of this.clients.entries()) {
|
for (const [key, managed] of this.clients.entries()) {
|
||||||
if (key.startsWith(`${sessionID}:`)) {
|
if (key.startsWith(`${sessionID}:`)) {
|
||||||
keysToRemove.push(key)
|
keysToRemove.push(key)
|
||||||
|
// Delete from map first to prevent re-entrancy during async close
|
||||||
|
this.clients.delete(key)
|
||||||
try {
|
try {
|
||||||
await managed.client.close()
|
await managed.client.close()
|
||||||
} catch {
|
} catch {
|
||||||
// Ignore close errors - process may already be terminated
|
// Ignore close errors - process may already be terminated
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
|
await managed.transport.close()
|
||||||
|
} catch {
|
||||||
|
// Transport may already be terminated
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const key of keysToRemove) {
|
|
||||||
this.clients.delete(key)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async disconnectAll(): Promise<void> {
|
async disconnectAll(): Promise<void> {
|
||||||
for (const [, managed] of this.clients.entries()) {
|
this.stopCleanupTimer()
|
||||||
|
const clients = Array.from(this.clients.values())
|
||||||
|
this.clients.clear()
|
||||||
|
for (const managed of clients) {
|
||||||
try {
|
try {
|
||||||
await managed.client.close()
|
await managed.client.close()
|
||||||
} catch { /* process may already be terminated */ }
|
} catch { /* process may already be terminated */ }
|
||||||
|
try {
|
||||||
|
await managed.transport.close()
|
||||||
|
} catch { /* transport may already be terminated */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private startCleanupTimer(): void {
|
||||||
|
if (this.cleanupInterval) return
|
||||||
|
this.cleanupInterval = setInterval(() => {
|
||||||
|
this.cleanupIdleClients()
|
||||||
|
}, 60_000)
|
||||||
|
this.cleanupInterval.unref()
|
||||||
|
}
|
||||||
|
|
||||||
|
private stopCleanupTimer(): void {
|
||||||
|
if (this.cleanupInterval) {
|
||||||
|
clearInterval(this.cleanupInterval)
|
||||||
|
this.cleanupInterval = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async cleanupIdleClients(): Promise<void> {
|
||||||
|
const now = Date.now()
|
||||||
|
for (const [key, managed] of this.clients) {
|
||||||
|
if (now - managed.lastUsedAt > this.IDLE_TIMEOUT) {
|
||||||
|
this.clients.delete(key)
|
||||||
|
try {
|
||||||
|
await managed.client.close()
|
||||||
|
} catch { /* process may already be terminated */ }
|
||||||
|
try {
|
||||||
|
await managed.transport.close()
|
||||||
|
} catch { /* transport may already be terminated */ }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
this.clients.clear()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async listTools(
|
async listTools(
|
||||||
@@ -193,10 +300,13 @@ export class SkillMcpManager {
|
|||||||
const key = this.getClientKey(info)
|
const key = this.getClientKey(info)
|
||||||
const existing = this.clients.get(key)
|
const existing = this.clients.get(key)
|
||||||
if (existing) {
|
if (existing) {
|
||||||
|
this.clients.delete(key)
|
||||||
try {
|
try {
|
||||||
await existing.client.close()
|
await existing.client.close()
|
||||||
} catch { /* process may already be terminated */ }
|
} catch { /* process may already be terminated */ }
|
||||||
this.clients.delete(key)
|
try {
|
||||||
|
await existing.transport.close()
|
||||||
|
} catch { /* transport may already be terminated */ }
|
||||||
return await this.getOrCreateClient(info, config)
|
return await this.getOrCreateClient(info, config)
|
||||||
}
|
}
|
||||||
throw error
|
throw error
|
||||||
|
|||||||
Reference in New Issue
Block a user