perf(file-tree): parallelize directory traversal and widen default ignore list

The project file-tree endpoint walked children sequentially with
`await fsPromises.stat()` inside a for-loop plus a separate
`fsPromises.access()` probe before recursing. On high-latency
filesystems (NFS/SMB) every one of those round-trips was serialized,
so a 120k-file SMB-mounted project took ~2 minutes to load.

This change:
* Runs stat() and recursive getFileTree() calls in parallel via
  `Promise.all` — pipelines round-trips and lets subtree traversals
  overlap.
* Drops the redundant access() probe; any EACCES now surfaces from
  readdir's own try/catch in the recursive call, saving one RTT per
  directory.
* Extracts the hardcoded skip list into an IGNORED_DIRS Set and
  extends it to cover common Python / Rust / JVM / IDE build
  artefacts (.next, __pycache__, .pytest_cache, .tox, .venv,
  target, .gradle, .idea, coverage, etc).

No API shape change; existing consumers get the same tree structure,
only much faster on large or remote-mounted projects.
This commit is contained in:
leonkong via Claude
2026-04-18 16:57:07 +08:00
parent 25b00b58de
commit 153f1e54b4

View File

@@ -2221,25 +2221,42 @@ function permToRwx(perm) {
return r + w + x; return r + w + x;
} }
// Directories that are almost never interesting for a project tree but can
// contain tens of thousands of files. Skipping them before recursion keeps
// traversal time bounded on large monorepos and high-latency filesystems
// (NFS / SMB).
const IGNORED_DIRS = new Set([
// JS / TS toolchains
'node_modules', 'dist', 'build', '.next', '.nuxt', '.cache', '.parcel-cache',
// VCS
'.git', '.svn', '.hg',
// Python
'__pycache__', '.pytest_cache', '.mypy_cache', '.tox', 'venv', '.venv',
// Rust / Go / Java / Ruby
'target', 'vendor',
// Build output / IDE
'.gradle', '.idea', 'coverage', '.nyc_output'
]);
async function getFileTree(dirPath, maxDepth = 3, currentDepth = 0, showHidden = true) { async function getFileTree(dirPath, maxDepth = 3, currentDepth = 0, showHidden = true) {
// Using fsPromises from import // Using fsPromises from import
const items = []; let entries;
try { try {
const entries = await fsPromises.readdir(dirPath, { withFileTypes: true }); entries = await fsPromises.readdir(dirPath, { withFileTypes: true });
} catch (error) {
// Only log non-permission errors to avoid spam
if (error.code !== 'EACCES' && error.code !== 'EPERM') {
console.error('Error reading directory:', error);
}
return [];
}
for (const entry of entries) { const filteredEntries = entries.filter((entry) => !IGNORED_DIRS.has(entry.name));
// Debug: log all entries including hidden files
// Skip heavy build directories and VCS directories
if (entry.name === 'node_modules' ||
entry.name === 'dist' ||
entry.name === 'build' ||
entry.name === '.git' ||
entry.name === '.svn' ||
entry.name === '.hg') continue;
// Process every entry in parallel. On high-latency filesystems (NFS/SMB)
// serial stat() was the real bottleneck — issuing them concurrently lets
// the kernel pipeline the round-trips and the recursive calls overlap too.
const items = await Promise.all(filteredEntries.map(async (entry) => {
const itemPath = path.join(dirPath, entry.name); const itemPath = path.join(dirPath, entry.name);
const item = { const item = {
name: entry.name, name: entry.name,
@@ -2269,25 +2286,14 @@ async function getFileTree(dirPath, maxDepth = 3, currentDepth = 0, showHidden =
} }
if (entry.isDirectory() && currentDepth < maxDepth) { if (entry.isDirectory() && currentDepth < maxDepth) {
// Recursively get subdirectories but limit depth // Recurse. Let readdir's own EACCES bubble up through the catch in
try { // the recursive call rather than doing a separate access() probe
// Check if we can access the directory before trying to read it // (which doubled the round-trip count on SMB without adding info).
await fsPromises.access(item.path, fs.constants.R_OK); item.children = await getFileTree(itemPath, maxDepth, currentDepth + 1, showHidden);
item.children = await getFileTree(item.path, maxDepth, currentDepth + 1, showHidden);
} catch (e) {
// Silently skip directories we can't access (permission denied, etc.)
item.children = [];
}
} }
items.push(item); return item;
} }));
} catch (error) {
// Only log non-permission errors to avoid spam
if (error.code !== 'EACCES' && error.code !== 'EPERM') {
console.error('Error reading directory:', error);
}
}
return items.sort((a, b) => { return items.sort((a, b) => {
if (a.type !== b.type) { if (a.type !== b.type) {