Compare commits

...

7 Commits

Author SHA1 Message Date
Simos Mikelatos
762ecb0c3b Merge branch 'main' into fix/file-tree-concurrency 2026-06-05 14:21:26 +02:00
Haile
c7938e4f2b Merge branch 'main' into fix/file-tree-concurrency 2026-06-04 22:26:24 +03:00
Haileyesus
a9fa6eb6b6 fix(file-tree): inspect entries with lstat
Use lstat for file-tree metadata so symlink entries are identified without following targets.
2026-06-04 17:33:58 +03:00
Haileyesus
37d363c1aa fix(file-tree): bound filesystem traversal concurrency
Prevent large file-tree scans from launching unbounded stat and readdir work.

Keep the parallel traversal benefit on high-latency mounts with a bounded queue.

Ignore skipped names only for directories so same-named files stay visible.
2026-06-04 17:07:41 +03:00
Haile
4658a97952 Merge branch 'main' into perf/parallel-file-tree 2026-06-04 13:51:22 +03:00
Haile
137c7c4f3c Merge branch 'main' into perf/parallel-file-tree 2026-05-04 13:04:07 +03:00
leonkong via Claude
153f1e54b4 perf(file-tree): parallelize directory traversal and widen default ignore list
The project file-tree endpoint walked children sequentially with
`await fsPromises.stat()` inside a for-loop plus a separate
`fsPromises.access()` probe before recursing. On high-latency
filesystems (NFS/SMB) every one of those round-trips was serialized,
so a 120k-file SMB-mounted project took ~2 minutes to load.

This change:
* Runs stat() and recursive getFileTree() calls in parallel via
  `Promise.all` — pipelines round-trips and lets subtree traversals
  overlap.
* Drops the redundant access() probe; any EACCES now surfaces from
  readdir's own try/catch in the recursive call, saving one RTT per
  directory.
* Extracts the hardcoded skip list into an IGNORED_DIRS Set and
  extends it to cover common Python / Rust / JVM / IDE build
  artefacts (.next, __pycache__, .pytest_cache, .tox, .venv,
  target, .gradle, .idea, coverage, etc).

No API shape change; existing consumers get the same tree structure,
only much faster on large or remote-mounted projects.
2026-04-18 16:57:07 +08:00

View File

@@ -1483,74 +1483,133 @@ function permToRwx(perm) {
return r + w + x; return r + w + x;
} }
// Directories that are almost never interesting for a project tree but can
// contain tens of thousands of files. Skipping them before recursion keeps
// traversal time bounded on large monorepos and high-latency filesystems
// (NFS / SMB).
const IGNORED_DIRS = new Set([
// JS / TS toolchains
'node_modules', 'dist', 'build', '.next', '.nuxt', '.cache', '.parcel-cache',
// VCS
'.git', '.svn', '.hg',
// Python
'__pycache__', '.pytest_cache', '.mypy_cache', '.tox', 'venv', '.venv',
// Rust / Go / Java / Ruby
'target', 'vendor',
// Build output / IDE
'.gradle', '.idea', 'coverage', '.nyc_output'
]);
const DEFAULT_FS_CONCURRENCY = 64;
const parsedFsConcurrency = Number.parseInt(process.env.FS_CONCURRENCY || '', 10);
const FS_CONCURRENCY = Number.isFinite(parsedFsConcurrency) && parsedFsConcurrency > 0
? parsedFsConcurrency
: DEFAULT_FS_CONCURRENCY;
let activeFsOperations = 0;
const pendingFsOperations = [];
async function acquire() {
if (activeFsOperations < FS_CONCURRENCY) {
activeFsOperations += 1;
return;
}
await new Promise((resolve) => {
pendingFsOperations.push(resolve);
});
}
function release() {
const next = pendingFsOperations.shift();
if (next) {
next();
return;
}
activeFsOperations = Math.max(0, activeFsOperations - 1);
}
async function getFileTree(dirPath, maxDepth = 3, currentDepth = 0, showHidden = true) { async function getFileTree(dirPath, maxDepth = 3, currentDepth = 0, showHidden = true) {
// Using fsPromises from import // Using fsPromises from import
const items = []; let entries;
try { try {
const entries = await fsPromises.readdir(dirPath, { withFileTypes: true }); await acquire();
try {
for (const entry of entries) { entries = await fsPromises.readdir(dirPath, { withFileTypes: true });
// Debug: log all entries including hidden files } finally {
release();
// Skip heavy build directories and VCS directories
if (entry.name === 'node_modules' ||
entry.name === 'dist' ||
entry.name === 'build' ||
entry.name === '.git' ||
entry.name === '.svn' ||
entry.name === '.hg') continue;
const itemPath = path.join(dirPath, entry.name);
const item = {
name: entry.name,
path: itemPath,
type: entry.isDirectory() ? 'directory' : 'file'
};
// Get file stats for additional metadata
try {
const stats = await fsPromises.stat(itemPath);
item.size = stats.size;
item.modified = stats.mtime.toISOString();
// Convert permissions to rwx format
const mode = stats.mode;
const ownerPerm = (mode >> 6) & 7;
const groupPerm = (mode >> 3) & 7;
const otherPerm = mode & 7;
item.permissions = ((mode >> 6) & 7).toString() + ((mode >> 3) & 7).toString() + (mode & 7).toString();
item.permissionsRwx = permToRwx(ownerPerm) + permToRwx(groupPerm) + permToRwx(otherPerm);
} catch (statError) {
// If stat fails, provide default values
item.size = 0;
item.modified = null;
item.permissions = '000';
item.permissionsRwx = '---------';
}
if (entry.isDirectory() && currentDepth < maxDepth) {
// Recursively get subdirectories but limit depth
try {
// Check if we can access the directory before trying to read it
await fsPromises.access(item.path, fs.constants.R_OK);
item.children = await getFileTree(item.path, maxDepth, currentDepth + 1, showHidden);
} catch (e) {
// Silently skip directories we can't access (permission denied, etc.)
item.children = [];
}
}
items.push(item);
} }
} catch (error) { } catch (error) {
// Only log non-permission errors to avoid spam // Only log non-permission errors to avoid spam
if (error.code !== 'EACCES' && error.code !== 'EPERM') { if (error.code !== 'EACCES' && error.code !== 'EPERM') {
console.error('Error reading directory:', error); console.error('Error reading directory:', error);
} }
return [];
} }
const filteredEntries = entries.filter((entry) => !(entry.isDirectory() && IGNORED_DIRS.has(entry.name)));
// Process every entry in parallel. On high-latency filesystems (NFS/SMB)
// serial stat() was the real bottleneck — issuing them concurrently lets
// the kernel pipeline the round-trips and the recursive calls overlap too.
const items = await Promise.all(filteredEntries.map(async (entry) => {
const itemPath = path.join(dirPath, entry.name);
const item = {
name: entry.name,
path: itemPath,
type: entry.isDirectory() ? 'directory' : 'file'
};
// Get file stats for additional metadata
try {
await acquire();
try {
const stats = await fsPromises.lstat(itemPath);
item.size = stats.size;
item.modified = stats.mtime.toISOString();
// Mark symlinks so UI can distinguish them
if (stats.isSymbolicLink()) {
item.isSymlink = true;
}
// Convert permissions to rwx format
const mode = stats.mode;
const ownerPerm = (mode >> 6) & 7;
const groupPerm = (mode >> 3) & 7;
const otherPerm = mode & 7;
item.permissions =
((mode >> 6) & 7).toString() +
((mode >> 3) & 7).toString() +
(mode & 7).toString();
item.permissionsRwx =
permToRwx(ownerPerm) +
permToRwx(groupPerm) +
permToRwx(otherPerm);
} finally {
release();
}
} catch (statError) {
// If stat fails, provide default values
item.size = 0;
item.modified = null;
item.permissions = '000';
item.permissionsRwx = '---------';
}
if (entry.isDirectory() && currentDepth < maxDepth) {
// Recurse. Let readdir's own EACCES bubble up through the catch in
// the recursive call rather than doing a separate access() probe
// (which doubled the round-trip count on SMB without adding info).
// The recursive call starts with a bounded readdir; holding a permit
// for the whole subtree can deadlock when sibling directories are
// waiting on their own children.
item.children = await getFileTree(itemPath, maxDepth, currentDepth + 1, showHidden);
}
return item;
}));
return items.sort((a, b) => { return items.sort((a, b) => {
if (a.type !== b.type) { if (a.type !== b.type) {
return a.type === 'directory' ? -1 : 1; return a.type === 'directory' ? -1 : 1;