From ebb0e59e8023c0a8040d168a5adffb7102e80561 Mon Sep 17 00:00:00 2001 From: Haile <118998054+blackmammoth@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:21:30 +0300 Subject: [PATCH 1/3] fix: file tree concurrency (#828) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * perf(file-tree): parallelize directory traversal and widen default ignore list The project file-tree endpoint walked children sequentially with `await fsPromises.stat()` inside a for-loop plus a separate `fsPromises.access()` probe before recursing. On high-latency filesystems (NFS/SMB) every one of those round-trips was serialized, so a 120k-file SMB-mounted project took ~2 minutes to load. This change: * Runs stat() and recursive getFileTree() calls in parallel via `Promise.all` — pipelines round-trips and lets subtree traversals overlap. * Drops the redundant access() probe; any EACCES now surfaces from readdir's own try/catch in the recursive call, saving one RTT per directory. * Extracts the hardcoded skip list into an IGNORED_DIRS Set and extends it to cover common Python / Rust / JVM / IDE build artefacts (.next, __pycache__, .pytest_cache, .tox, .venv, target, .gradle, .idea, coverage, etc). No API shape change; existing consumers get the same tree structure, only much faster on large or remote-mounted projects. * fix(file-tree): bound filesystem traversal concurrency Prevent large file-tree scans from launching unbounded stat and readdir work. Keep the parallel traversal benefit on high-latency mounts with a bounded queue. Ignore skipped names only for directories so same-named files stay visible. * fix(file-tree): inspect entries with lstat Use lstat for file-tree metadata so symlink entries are identified without following targets. --------- Co-authored-by: leonkong via Claude --- server/index.js | 173 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 116 insertions(+), 57 deletions(-) diff --git a/server/index.js b/server/index.js index a986148e..d857ad4b 100755 --- a/server/index.js +++ b/server/index.js @@ -1483,74 +1483,133 @@ function permToRwx(perm) { return r + w + x; } +// Directories that are almost never interesting for a project tree but can +// contain tens of thousands of files. Skipping them before recursion keeps +// traversal time bounded on large monorepos and high-latency filesystems +// (NFS / SMB). +const IGNORED_DIRS = new Set([ + // JS / TS toolchains + 'node_modules', 'dist', 'build', '.next', '.nuxt', '.cache', '.parcel-cache', + // VCS + '.git', '.svn', '.hg', + // Python + '__pycache__', '.pytest_cache', '.mypy_cache', '.tox', 'venv', '.venv', + // Rust / Go / Java / Ruby + 'target', 'vendor', + // Build output / IDE + '.gradle', '.idea', 'coverage', '.nyc_output' +]); + +const DEFAULT_FS_CONCURRENCY = 64; +const parsedFsConcurrency = Number.parseInt(process.env.FS_CONCURRENCY || '', 10); +const FS_CONCURRENCY = Number.isFinite(parsedFsConcurrency) && parsedFsConcurrency > 0 + ? parsedFsConcurrency + : DEFAULT_FS_CONCURRENCY; +let activeFsOperations = 0; +const pendingFsOperations = []; + +async function acquire() { + if (activeFsOperations < FS_CONCURRENCY) { + activeFsOperations += 1; + return; + } + + await new Promise((resolve) => { + pendingFsOperations.push(resolve); + }); +} + +function release() { + const next = pendingFsOperations.shift(); + if (next) { + next(); + return; + } + + activeFsOperations = Math.max(0, activeFsOperations - 1); +} + async function getFileTree(dirPath, maxDepth = 3, currentDepth = 0, showHidden = true) { // Using fsPromises from import - const items = []; - + let entries; try { - const entries = await fsPromises.readdir(dirPath, { withFileTypes: true }); - - for (const entry of entries) { - // Debug: log all entries including hidden files - - - // Skip heavy build directories and VCS directories - if (entry.name === 'node_modules' || - entry.name === 'dist' || - entry.name === 'build' || - entry.name === '.git' || - entry.name === '.svn' || - entry.name === '.hg') continue; - - const itemPath = path.join(dirPath, entry.name); - const item = { - name: entry.name, - path: itemPath, - type: entry.isDirectory() ? 'directory' : 'file' - }; - - // Get file stats for additional metadata - try { - const stats = await fsPromises.stat(itemPath); - item.size = stats.size; - item.modified = stats.mtime.toISOString(); - - // Convert permissions to rwx format - const mode = stats.mode; - const ownerPerm = (mode >> 6) & 7; - const groupPerm = (mode >> 3) & 7; - const otherPerm = mode & 7; - item.permissions = ((mode >> 6) & 7).toString() + ((mode >> 3) & 7).toString() + (mode & 7).toString(); - item.permissionsRwx = permToRwx(ownerPerm) + permToRwx(groupPerm) + permToRwx(otherPerm); - } catch (statError) { - // If stat fails, provide default values - item.size = 0; - item.modified = null; - item.permissions = '000'; - item.permissionsRwx = '---------'; - } - - if (entry.isDirectory() && currentDepth < maxDepth) { - // Recursively get subdirectories but limit depth - try { - // Check if we can access the directory before trying to read it - await fsPromises.access(item.path, fs.constants.R_OK); - item.children = await getFileTree(item.path, maxDepth, currentDepth + 1, showHidden); - } catch (e) { - // Silently skip directories we can't access (permission denied, etc.) - item.children = []; - } - } - - items.push(item); + await acquire(); + try { + entries = await fsPromises.readdir(dirPath, { withFileTypes: true }); + } finally { + release(); } } catch (error) { // Only log non-permission errors to avoid spam if (error.code !== 'EACCES' && error.code !== 'EPERM') { console.error('Error reading directory:', error); } + return []; } + const filteredEntries = entries.filter((entry) => !(entry.isDirectory() && IGNORED_DIRS.has(entry.name))); + + // Process every entry in parallel. On high-latency filesystems (NFS/SMB) + // serial stat() was the real bottleneck — issuing them concurrently lets + // the kernel pipeline the round-trips and the recursive calls overlap too. + const items = await Promise.all(filteredEntries.map(async (entry) => { + const itemPath = path.join(dirPath, entry.name); + const item = { + name: entry.name, + path: itemPath, + type: entry.isDirectory() ? 'directory' : 'file' + }; + + // Get file stats for additional metadata + try { + await acquire(); + try { + const stats = await fsPromises.lstat(itemPath); + item.size = stats.size; + item.modified = stats.mtime.toISOString(); + + // Mark symlinks so UI can distinguish them + if (stats.isSymbolicLink()) { + item.isSymlink = true; + } + + // Convert permissions to rwx format + const mode = stats.mode; + const ownerPerm = (mode >> 6) & 7; + const groupPerm = (mode >> 3) & 7; + const otherPerm = mode & 7; + item.permissions = + ((mode >> 6) & 7).toString() + + ((mode >> 3) & 7).toString() + + (mode & 7).toString(); + item.permissionsRwx = + permToRwx(ownerPerm) + + permToRwx(groupPerm) + + permToRwx(otherPerm); + } finally { + release(); + } + } catch (statError) { + // If stat fails, provide default values + item.size = 0; + item.modified = null; + item.permissions = '000'; + item.permissionsRwx = '---------'; + } + + if (entry.isDirectory() && currentDepth < maxDepth) { + // Recurse. Let readdir's own EACCES bubble up through the catch in + // the recursive call rather than doing a separate access() probe + // (which doubled the round-trip count on SMB without adding info). + // The recursive call starts with a bounded readdir; holding a permit + // for the whole subtree can deadlock when sibling directories are + // waiting on their own children. + item.children = await getFileTree(itemPath, maxDepth, currentDepth + 1, showHidden); + } + + return item; + })); + return items.sort((a, b) => { if (a.type !== b.type) { return a.type === 'directory' ? -1 : 1; From 14ddbc7c57a01da9fb65fd87d8588532b11833fa Mon Sep 17 00:00:00 2001 From: Haile <118998054+blackmammoth@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:23:27 +0300 Subject: [PATCH 2/3] fix: redact websocket auth token in logs (#827) --- .../modules/websocket/services/websocket-auth.service.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/server/modules/websocket/services/websocket-auth.service.ts b/server/modules/websocket/services/websocket-auth.service.ts index bd689d5d..bd76c65f 100644 --- a/server/modules/websocket/services/websocket-auth.service.ts +++ b/server/modules/websocket/services/websocket-auth.service.ts @@ -20,7 +20,13 @@ export function verifyWebSocketClient( dependencies: WebSocketAuthDependencies ): boolean { const request = info.req as AuthenticatedWebSocketRequest; - console.log('WebSocket connection attempt to:', request.url); + const upgradeUrl = new URL(request.url ?? '/', 'http://localhost'); + const loggedUrl = new URL(upgradeUrl); + if (loggedUrl.searchParams.has('token')) { + loggedUrl.searchParams.set('token', 'REDACTED'); + } + + console.log('WebSocket connection attempt to:', `${loggedUrl.pathname}${loggedUrl.search}`); // Platform mode: use the first DB user and skip token checks. if (dependencies.isPlatform) { @@ -36,7 +42,6 @@ export function verifyWebSocketClient( } // OSS mode: read JWT from query string first, then Authorization header. - const upgradeUrl = new URL(request.url ?? '/', 'http://localhost'); const token = upgradeUrl.searchParams.get('token') ?? request.headers.authorization?.split(' ')[1] ?? From 3ec76b5bb15a13cec41056f4c9b9c425195022fa Mon Sep 17 00:00:00 2001 From: Haile <118998054+blackmammoth@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:24:26 +0300 Subject: [PATCH 3/3] docs: add nginx subpath deployment template (#820) Users deploying behind a reverse proxy need a config they can adapt. The template documents each proxy block and centralizes upstream/subpath values. It also notes that Nginx location matchers still require literal subpath edits. --- docs/nginx-subpath-template.conf | 218 +++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 docs/nginx-subpath-template.conf diff --git a/docs/nginx-subpath-template.conf b/docs/nginx-subpath-template.conf new file mode 100644 index 00000000..15f4f067 --- /dev/null +++ b/docs/nginx-subpath-template.conf @@ -0,0 +1,218 @@ +# CloudCLI UI Nginx subpath deployment template. +# +# Purpose: +# Serve CloudCLI UI from a path prefix such as: +# http://localhost/ai/ +# https://example.com/ai/ +# +# CloudCLI itself still runs at the root of its own HTTP server, for example: +# http://127.0.0.1:3001/ +# +# Nginx receives public requests under /ai, strips that prefix, and forwards the +# remaining path to CloudCLI. For example: +# /ai/ -> / +# /ai/session/abc -> /session/abc +# /ai/assets/index.js -> /assets/index.js +# +# Important Nginx limitation: +# Nginx does not allow variables in `location` matchers or `rewrite` regexes. +# The configurable variables below are still useful for proxy/filter values, +# but if you change /ai to a different subpath, also update every line marked: +# [SUBPATH LITERAL] +# +# To use a different subpath, replace these literal matchers: +# location = /ai +# location ^~ /ai/ +# rewrite ^/ai(?/.*)$ ... +# +# Recommended deployment shape: +# CloudCLI is the only app using /ai, while root paths /api, /ws, and /shell +# are also proxied because the current frontend still calls those endpoints +# with root-relative URLs. + +worker_processes 1; + +events { + # Maximum simultaneous connections handled by each worker process. + # The default is enough for local testing and small self-hosted deployments. + worker_connections 1024; +} + +http { + # WebSocket requests include an Upgrade header. Normal HTTP requests do not. + # This map gives us the right Connection header for both cases: + # Upgrade present -> "upgrade" + # Upgrade absent -> "close" + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + server { + # For HTTPS deployments, replace this with `listen 443 ssl http2;` and + # add ssl_certificate / ssl_certificate_key lines. + listen 80 default_server; + + # Use your real hostname in production, for example: + # server_name cloudcli.example.com; + server_name localhost 127.0.0.1; + + # ---- User settings ------------------------------------------------- + # + # Public path prefix where users access CloudCLI. + # Do not add a trailing slash. + # + # This variable can be used in redirects and response rewrites. It + # cannot be used in `location` matchers, so update the [SUBPATH LITERAL] + # lines too if you change it. + set $cloudcli_subpath /ai; + + # Private upstream URL where the CloudCLI server is listening. + # For a default local server this is usually http://127.0.0.1:3001. + set $cloudcli_upstream http://127.0.0.1:3001; + + # Allow larger file uploads through the code editor/project file APIs. + client_max_body_size 100m; + + # Redirect /ai to /ai/ so relative browser URL resolution is stable. + # [SUBPATH LITERAL] Change `/ai` if you change $cloudcli_subpath. + location = /ai { + return 301 $cloudcli_subpath/; + } + + # Main prefixed CloudCLI UI route. + # + # [SUBPATH LITERAL] Change `/ai/` and the `^/ai` rewrite if you change + # $cloudcli_subpath. + location ^~ /ai/ { + # Strip the public subpath before proxying. CloudCLI expects to see + # root paths such as /, /session/:id, /assets/..., /manifest.json. + rewrite ^/ai(?/.*)$ $cloudcli_path break; + + # Forward the rewritten request to the private CloudCLI server. + proxy_pass $cloudcli_upstream; + + # Use HTTP/1.1 so WebSocket upgrade requests can pass through if a + # browser reaches a socket endpoint under the subpath. + proxy_http_version 1.1; + + # Preserve useful request metadata for logs and future app support. + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Prefix $cloudcli_subpath; + + # WebSocket upgrade headers. Harmless for normal HTTP requests. + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + + # Long-running agent and terminal sessions can stay open for a long + # time, so avoid closing idle proxied connections too aggressively. + proxy_read_timeout 3600s; + proxy_send_timeout 3600s; + + # Disable gzip from the upstream response so sub_filter can inspect + # and rewrite HTML/JSON/JS response bodies. + proxy_set_header Accept-Encoding ""; + + # Rewrite browser-visible root-relative URLs so the runtime can + # discover that the app is mounted under the subpath. + # + # Examples: + # href="/manifest.json" -> href="/ai/manifest.json" + # src="/assets/app.js" -> src="/ai/assets/app.js" + # + # These rewrites are important for React Router basename detection. + sub_filter_once off; + sub_filter_types + application/json + application/manifest+json + application/javascript + text/javascript; + + sub_filter 'href="/' 'href="$cloudcli_subpath/'; + sub_filter 'src="/' 'src="$cloudcli_subpath/'; + + # The production HTML and JS register the service worker at /sw.js. + # Rewrite that registration so the worker is served from /ai/sw.js. + sub_filter "register('/sw.js')" "register('$cloudcli_subpath/sw.js')"; + sub_filter 'register("/sw.js")' 'register("$cloudcli_subpath/sw.js")'; + + # The manifest and service worker contain root-relative paths too. + # Rewriting them keeps PWA metadata and cached manifest requests + # under the same public subpath. + sub_filter '"start_url": "/"' '"start_url": "$cloudcli_subpath/"'; + sub_filter '"scope": "/"' '"scope": "$cloudcli_subpath/"'; + sub_filter '"src": "/' '"src": "$cloudcli_subpath/'; + sub_filter "'/manifest.json'" "'$cloudcli_subpath/manifest.json'"; + sub_filter '"/manifest.json"' '"$cloudcli_subpath/manifest.json"'; + } + + # Root API proxy. + # + # The current CloudCLI frontend calls APIs with root-relative URLs such + # as /api/auth/login. Keep this location unless the frontend becomes + # fully prefix-aware for API requests. + location ^~ /api/ { + proxy_pass $cloudcli_upstream; + + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Prefix $cloudcli_subpath; + + proxy_read_timeout 3600s; + proxy_send_timeout 3600s; + } + + # Main app WebSocket proxy. + # + # The frontend opens /ws for realtime chat/session/task updates. + location /ws { + proxy_pass $cloudcli_upstream; + + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Prefix $cloudcli_subpath; + + proxy_read_timeout 3600s; + proxy_send_timeout 3600s; + } + + # Shell WebSocket proxy. + # + # The browser terminal uses /shell. It requires the same WebSocket + # upgrade handling as /ws. + location /shell { + proxy_pass $cloudcli_upstream; + + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Prefix $cloudcli_subpath; + + proxy_read_timeout 3600s; + proxy_send_timeout 3600s; + } + + # Optional health endpoint proxy used by the frontend version checker. + location = /health { + proxy_pass $cloudcli_upstream; + + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Prefix $cloudcli_subpath; + } + } +}