refactor: add cross-platform utility functions

This commit is contained in:
Haileyesus
2026-03-17 10:19:50 +03:00
parent 23c39a42b1
commit 7df21556dd
16 changed files with 872 additions and 0 deletions

View File

@@ -0,0 +1,47 @@
name: Cross Platform Server Verification
on:
pull_request:
push:
workflow_dispatch:
jobs:
verify:
name: Verify on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
node:
- 22
steps:
# This step checks out the repository so the matrix job can build and test it.
- name: Checkout repository
uses: actions/checkout@v4
# This step installs the Node.js version the README already declares as the project baseline.
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node }}
cache: npm
# This step installs dependencies exactly as locked so native and shell behavior stays reproducible.
- name: Install dependencies
run: npm ci
# This step verifies the TypeScript server code before runtime checks.
- name: Typecheck server
run: npm run typecheck:server
# This step runs the built-in Node tests that exercise the OS adapter layer directly.
- name: Test server adapters
run: npm run test:server
# This step ensures the in-progress TypeScript backend still compiles in each OS environment.
- name: Build server
run: npm run server:build

View File

@@ -22,6 +22,12 @@ server/
error-handler.ts
not-found-handler.ts
request-context.ts
platform/
runtime-platform.ts
text.ts
stream.ts
shell.ts
path.ts
types/
app.ts
http.ts
@@ -80,6 +86,11 @@ server/
Example: `async-handler.ts` removes repeated `try/catch(next)` wrappers in controllers.
Example: `not-found-handler.ts` is the generic fallback for unknown API routes.
- `src/shared/platform/`
Shared OS-adapter helpers for shell spawning, line ending normalization, streaming stdout/stderr parsing, and path normalization.
Example: `platform/stream.ts` is where process output gets split into complete lines without leaking CRLF edge cases into feature code.
Example: `platform/shell.ts` is where PowerShell-vs-bash command construction lives so provider modules do not branch on `process.platform`.
- `src/shared/types/`
Global type aliases that are safe to share across modules. This layer uses `type`, not `interface`.
Example: `types/http.ts` defines `ApiMeta`, `ApiErrorShape`, `RequestContext`, `AuthenticatedRequest`, and `EndpointInventoryRecord`.

View File

@@ -0,0 +1,314 @@
# Cross-Platform Input Parsing Notes
## Why This Matters In This Repo
CloudCLI is not only an HTTP API plus React UI. From the README and current backend layout, it also launches CLIs, keeps interactive terminal sessions alive, reads and writes local files, parses process output, and forwards terminal input from the browser into local shells. That puts the backend on the boundary between browser input, terminal behavior, child process behavior, and filesystem behavior. Linux and Windows differ at each of those boundaries.
For the TypeScript migration, the OS adapter layer now lives in:
- [server/src/shared/platform/index.ts](/c:/Users/OMEN6/Desktop/Projects/Paid/ClaudeCodeUI%20-%20Siteboon/claudecodeui/server/src/shared/platform/index.ts)
Use those helpers in new `server/src` code so feature modules do not branch on the operating system.
## Assumptions
- The legacy runtime in `server/index.js` stays untouched for now.
- New backend code will be added under `server/src`.
- Node.js 22 is the baseline because the README already requires Node 22+.
- The main instability is text handling around shells, streams, and files, not business logic.
## Where Parsing Happens In This Repo
- `server/index.js`: PTY shell input/output and session reuse
- `server/cursor-cli.js`: streaming line-delimited JSON from `cursor-agent`
- `server/gemini-response-handler.js`: incremental parsing of Gemini JSON lines
- `server/routes/mcp.js` and `server/routes/codex.js`: parsing human-readable CLI output
- `server/cli.js` and `server/load-env.js`: parsing command-line args and `.env` text
- `server/routes/git.js` and related routes: parsing Git stdout line by line
Those are not all the same problem. In this repo, "input parsing" means terminal input parsing, stream parsing, file parsing, shell command construction, and path normalization.
## Core Terms
### Process
A process is a running program such as `node server/start.js`, `git`, `codex`, or `cursor-agent`. When your backend launches one of these, the backend is the parent process and the launched program is the child process.
### Child Process
A child process is a process started by another process. Examples:
- CloudCLI launches `git status`
- CloudCLI launches `codex mcp list`
- CloudCLI launches `cursor-agent --output-format stream-json`
Important point: a child process usually does not hand you one final string. It emits output over time.
### stdin, stdout, stderr
These are the three standard streams:
- `stdin`: data going into the process
- `stdout`: normal output coming out
- `stderr`: diagnostics, warnings, and errors
Node example:
```ts
const child = spawn('git', ['status']);
child.stdout.on('data', (chunk) => {
// normal output from git
});
child.stderr.on('data', (chunk) => {
// warnings or errors
});
child.stdin.write('yes\n');
child.stdin.end();
```
Repo examples:
- terminal keystrokes go to `stdin`
- `cursor-agent` JSON events arrive on `stdout`
- many CLI failures appear on `stderr`
### TTY and PTY
- `TTY`: a terminal device
- `PTY`: a pseudo-terminal, meaning software that behaves like a terminal
Why it matters:
- `spawn()` is best for non-interactive commands like `git status`
- `node-pty` is best for interactive shells like PowerShell or bash sessions
Repo example: `server/index.js` uses `node-pty` for the integrated shell because agents and shells expect terminal behavior, not just plain pipes.
### argv
`argv` means argument vector: the list of command-line arguments passed to a program.
Example:
```ts
spawn('git', ['log', '--oneline', '-5']);
```
Here the executable is `git` and the argv is `['log', '--oneline', '-5']`. This is safer than building one big shell string because Node passes arguments directly instead of asking a shell to reinterpret them.
### cwd
`cwd` means current working directory. Examples:
- run `git status` in the project root
- run `claude mcp add --scope local` inside the current project
- run a terminal session inside a selected workspace
If `cwd` is wrong, parsing may look broken even when the parser is correct, because the command itself is operating in the wrong place.
### Buffer, String, and Decoding
A `Buffer` is raw bytes. A string is decoded text. Processes emit bytes first, then you decode them, and only after that should you parse lines, JSON, or tokens.
Example:
```ts
child.stdout.on('data', (chunk: Buffer) => {
const text = chunk.toString('utf8');
});
```
### Line Ending
A line ending marks the end of a text line:
- Linux/macOS usually use LF: `\n`
- Windows often uses CRLF: `\r\n`
- older tools sometimes emit CR alone: `\r`
Classic bug:
```ts
'a\r\nb\r\n'.split('\n');
// ['a\r', 'b\r', '']
```
That hidden trailing `\r` is one of the most common Windows parsing bugs.
### BOM
BOM means byte order mark. In UTF-8 text it appears as `\uFEFF` at the start. Typical failures:
- first key becomes `\uFEFFNAME` instead of `NAME`
- JSON parsing fails because the first character is not what the parser expected
- `.env` parsing silently produces the wrong first variable name
The adapter layer strips BOM explicitly for that reason.
### Chunk
A chunk is one partial piece of stream data. Chunks are transport boundaries, not logical message boundaries. Important rules:
- one line can arrive in multiple chunks
- one chunk can contain many lines
- one JSON object can be split across chunk boundaries
Example:
```txt
Chunk 1: {"type":"message","text":"hel
Chunk 2: lo"}\r\n{"type":"message","text":"next"}\r\n
```
If you parse each chunk independently, you corrupt the first JSON object.
## The Backend Parsing Lifecycle
Most backend parsing problems in this repo can be viewed as a four-step pipeline:
1. Receive raw bytes or raw text.
2. Normalize transport details.
3. Parse business structure.
4. Return normalized data to the rest of the app.
Examples:
- file bytes -> UTF-8 string -> normalize line endings -> split lines -> parse fields
- stdout chunks -> accumulate partial lines -> parse JSON per line -> emit events
- browser terminal input -> normalize Enter/newlines -> write to PTY
The operating system mainly affects step 2. That is why the new adapter layer exists.
## Linux vs Windows Differences That Usually Matter
### 1. Newlines In Files And Process Output
Linux usually gives LF. Windows often gives CRLF. Some tools mix them.
Bad pattern:
```ts
const lines = output.split('\n');
```
Safer pattern:
```ts
import { splitLines } from '@/shared/platform/index.js';
const lines = splitLines(output, {
preserveEmptyLines: false,
trimTrailingEmptyLine: true,
});
```
Use `splitLines()` when you already have the whole string in memory.
### 2. Chunked Streams
A process stream is not line-oriented by default.
Bad pattern:
```ts
child.stdout.on('data', (chunk) => {
const event = JSON.parse(chunk.toString());
});
```
This fails when one JSON object is split across chunks.
Safer pattern:
```ts
import { createStreamLineAccumulator } from '@/shared/platform/index.js';
const lines = createStreamLineAccumulator({ preserveEmptyLines: false });
child.stdout.on('data', (chunk) => {
for (const line of lines.push(chunk)) {
const event = JSON.parse(line);
}
});
child.on('close', () => {
for (const line of lines.flush()) {
const event = JSON.parse(line);
}
});
```
Use this for Cursor, Gemini, JSONL, NDJSON, or any line-based CLI protocol.
### 3. Shell Syntax And Fallback Logic
POSIX shells and PowerShell do not use the same syntax.
- POSIX fallback: `cmd1 || cmd2`
- PowerShell fallback: `cmd1; if ($LASTEXITCODE -ne 0) { cmd2 }`
Use:
```ts
import { buildFallbackCommand, createShellSpawnPlan } from '@/shared/platform/index.js';
const shellCommand = buildFallbackCommand('codex resume 123', 'codex', 'windows');
const spawnPlan = createShellSpawnPlan(shellCommand, 'windows');
```
This keeps feature code from hardcoding bash rules into Windows paths or PowerShell rules into Linux code.
### 4. Quoting Rules
Even when two shells both support quotes, they do not escape them the same way.
- POSIX single quote escape is awkward: `'it'"'"'s'`
- PowerShell single quote escape doubles the quote: `'it''s'`
Use:
```ts
import { quoteShellArgument } from '@/shared/platform/index.js';
const safe = quoteShellArgument("it's", 'windows');
```
### 5. Path Separators And Case
- Linux paths use `/`
- Windows paths typically use `\`
- Linux is usually case-sensitive
- Windows is usually case-insensitive
Examples:
- `/repo/File.ts` and `/repo/file.ts` are different on Linux
- `C:\Repo\File.ts` and `c:\repo\file.ts` usually refer to the same file on Windows
Use:
```ts
import { arePathsEquivalent, normalizePathForPlatform, toPortablePath } from '@/shared/platform/index.js';
```
Guideline:
- use platform-specific paths when calling the OS
- use portable slash paths for logs, keys, and serialized payloads
### 6. Terminal Input
Terminal input is not the same as a normal HTML form submission.
- pressing Enter may arrive as `\r`
- pasted text may contain `\n` or `\r\n`
- terminal apps often expect carriage return behavior
Use:
```ts
import { normalizeTerminalInput } from '@/shared/platform/index.js';
```
This matters for PTY writes because terminal software often treats `\r` as the real Enter key behavior.
## The New Adapter Functions
- `normalizeTextForParsing()`: use when your goal is parsing text consistently, not preserving original file style; good for `.env`, JSONL, human-readable CLI output, and buffered command output.
- `splitLines()`: use when the full text is already in memory and you want clean logical lines; good for config files, buffered Git output, and fully collected CLI output.
- `createStreamLineAccumulator()`: use when text arrives incrementally over time; good for `stdout`, `stderr`, line-based streaming JSON, and long-lived child processes.
- `createShellSpawnPlan()`: use when the command must go through a shell because shell syntax is required; good for fallback commands, resume-or-start command chains, and interactive shell launch plans.
- `quoteShellArgument()`: use before interpolating dynamic values into shell command strings; good for session IDs, file paths, branch names, and user-provided subcommands.
- `buildFallbackCommand()`: use when the same logic must work in bash and PowerShell; a repo-shaped example is "resume Codex session if it exists, otherwise start a fresh one."
- `preserveExistingLineEndings()`: use when writing text files back to disk and you want to avoid noisy diffs; good for markdown files, config files, and user-managed text artifacts.
## Practical Backend Rules For This Repo
1. If you already have the full text, normalize once and then parse.
2. If the source is a stream, use an accumulator and never parse per chunk.
3. Prefer `spawn(executable, argv, { shell: false })` whenever possible.
4. Only use a shell when shell syntax is actually needed.
5. When you must use a shell, push all shell-specific behavior into the adapter layer.
6. Preserve existing line endings on user files unless you intentionally want normalization.
7. Separate transport normalization from business parsing.
## Common Mistakes To Avoid
- Parsing stdout chunk-by-chunk. Symptom: random JSON parse failures or truncated events. Fix: accumulate complete lines first.
- Using `split('\n')` on Windows text. Symptom: values end with `\r` and equality checks fail. Fix: normalize line endings or use `splitLines()`.
- Building one huge shell string for everything. Symptom: quoting bugs, OS-specific failures, and injection risk. Fix: prefer `spawn()` with argv; if shell is required, use `quoteShellArgument()` and `createShellSpawnPlan()`.
- Rewriting files with a different line-ending style. Symptom: huge git diffs and noisy file changes. Fix: use `preserveExistingLineEndings()`.
## Testing Strategy Implemented Here
This strategy intentionally does not add Jest, Vitest, or another test framework.
It uses:
- Node's built-in `node:test`
- `tsx` only to execute TypeScript tests
- a GitHub Actions matrix on Ubuntu and Windows
Local verification:
```bash
npm run test:server
npm run verify:server
```
CI verification:
- `npm run typecheck:server`
- `npm run test:server`
- `npm run server:build`
This gives you two kinds of confidence:
- contract confidence: the adapter functions behave as designed
- environment confidence: the same checks pass on real Linux and Windows runners
## Final Mental Model
Think in three layers:
1. Raw transport layer. Examples: chunks, bytes, terminal keystrokes, raw file text.
2. Normalization layer. Examples: strip BOM, normalize line endings, normalize terminal input, normalize shell behavior.
3. Business parsing layer. Examples: parse JSON, parse CLI output, parse `.env`, parse Git status, parse session files.
If you keep layer 2 in shared adapters, layer 3 stops caring about Linux vs Windows.

View File

@@ -34,6 +34,8 @@
"preview": "vite preview",
"typecheck:client": "tsc --noEmit -p tsconfig.json",
"typecheck:server": "tsc --noEmit -p server/tsconfig.json",
"test:server": "node --import tsx --test server/src/shared/platform/runtime-platform.test.ts server/src/shared/platform/text.test.ts server/src/shared/platform/stream.test.ts server/src/shared/platform/path.test.ts server/src/shared/platform/shell.test.ts",
"verify:server": "npm run typecheck:server && npm run test:server && npm run server:build",
"typecheck": "npm run typecheck:client && npm run typecheck:server",
"lint": "eslint src/",
"lint:fix": "eslint src/ --fix",

View File

@@ -0,0 +1,7 @@
// This barrel keeps future imports short while the backend migrates into TypeScript.
export * from './path.js';
export * from './runtime-platform.js';
export * from './shell.js';
export * from './stream.js';
export * from './text.js';
export * from './types.js';

View File

@@ -0,0 +1,29 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { arePathsEquivalent, normalizePathForPlatform, toPortablePath } from './path.js';
// This test verifies path strings can be normalized for logs and platform-specific execution.
test('path helpers normalize separators in both directions', () => {
assert.equal(toPortablePath('folder\\child\\file.txt'), 'folder/child/file.txt');
assert.equal(
normalizePathForPlatform('folder\\child/file.txt', 'windows'),
'folder\\child\\file.txt',
);
assert.equal(
normalizePathForPlatform('folder\\child/file.txt', 'linux'),
'folder/child/file.txt',
);
});
// This test verifies path comparison respects Windows case-insensitivity but POSIX case-sensitivity.
test('arePathsEquivalent follows the case rules of the target platform', () => {
assert.equal(
arePathsEquivalent('C:\\Repo\\File.txt', 'c:/repo/file.txt', 'windows'),
true,
);
assert.equal(
arePathsEquivalent('/repo/File.txt', '/repo/file.txt', 'linux'),
false,
);
});

View File

@@ -0,0 +1,34 @@
import path from 'path';
import { getPlatformPathSeparator, isWindowsPlatform, resolveRuntimePlatform } from './runtime-platform.js';
import type { RuntimePlatform } from './types.js';
// This helper converts paths into a portable slash-separated form for logs, keys, and serialized payloads.
export function toPortablePath(value: string): string {
return value.replace(/\\/g, '/');
}
// This helper rewrites any mixture of separators into the preferred style for the target platform.
export function normalizePathForPlatform(
value: string,
platform: RuntimePlatform = resolveRuntimePlatform(),
): string {
const separator = getPlatformPathSeparator(platform);
return value.replace(/[\\/]+/g, separator);
}
// This helper compares paths using the case-sensitivity rules of the target platform.
export function arePathsEquivalent(
left: string,
right: string,
platform: RuntimePlatform = resolveRuntimePlatform(),
): boolean {
// This branch uses the target platform's path semantics instead of the host machine's semantics.
const pathModule = isWindowsPlatform(platform) ? path.win32 : path.posix;
const normalizedLeft = pathModule.normalize(normalizePathForPlatform(left, platform));
const normalizedRight = pathModule.normalize(normalizePathForPlatform(right, platform));
return isWindowsPlatform(platform)
? normalizedLeft.toLowerCase() === normalizedRight.toLowerCase()
: normalizedLeft === normalizedRight;
}

View File

@@ -0,0 +1,27 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import {
getPlatformLineEnding,
getPlatformPathSeparator,
isWindowsPlatform,
resolveRuntimePlatform,
} from './runtime-platform.js';
// This test covers the platform vocabulary used by the adapter layer.
test('resolveRuntimePlatform maps Node platforms into adapter platforms', () => {
assert.equal(resolveRuntimePlatform('win32'), 'windows');
assert.equal(resolveRuntimePlatform('darwin'), 'macos');
assert.equal(resolveRuntimePlatform('linux'), 'linux');
assert.equal(resolveRuntimePlatform('freebsd'), 'linux');
});
// This test verifies the shared helpers expose the expected OS defaults.
test('platform helpers expose the expected line endings and separators', () => {
assert.equal(isWindowsPlatform('windows'), true);
assert.equal(isWindowsPlatform('linux'), false);
assert.equal(getPlatformLineEnding('windows'), 'crlf');
assert.equal(getPlatformLineEnding('linux'), 'lf');
assert.equal(getPlatformPathSeparator('windows'), '\\');
assert.equal(getPlatformPathSeparator('macos'), '/');
});

View File

@@ -0,0 +1,29 @@
import type { LineEnding, RuntimePlatform } from './types.js';
// This function maps Node's platform strings into the smaller vocabulary used by the adapter layer.
export function resolveRuntimePlatform(nodePlatform: NodeJS.Platform = process.platform): RuntimePlatform {
switch (nodePlatform) {
case 'win32':
return 'windows';
case 'darwin':
return 'macos';
default:
// Every non-Windows, non-macOS platform in this project behaves like a POSIX shell target.
return 'linux';
}
}
// This helper keeps Windows checks readable at call sites.
export function isWindowsPlatform(platform: RuntimePlatform = resolveRuntimePlatform()): boolean {
return platform === 'windows';
}
// This helper centralizes the preferred newline style for each platform.
export function getPlatformLineEnding(platform: RuntimePlatform = resolveRuntimePlatform()): LineEnding {
return isWindowsPlatform(platform) ? 'crlf' : 'lf';
}
// This helper centralizes the preferred path separator for each platform.
export function getPlatformPathSeparator(platform: RuntimePlatform = resolveRuntimePlatform()): '\\' | '/' {
return isWindowsPlatform(platform) ? '\\' : '/';
}

View File

@@ -0,0 +1,43 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { buildFallbackCommand, createShellSpawnPlan, quoteShellArgument } from './shell.js';
// This test verifies the backend can ask for a shell launch without branching on the OS at every call site.
test('createShellSpawnPlan returns the expected executable and argv per platform', () => {
assert.deepEqual(createShellSpawnPlan('echo hello', 'windows'), {
platform: 'windows',
executable: 'powershell.exe',
args: ['-Command', 'echo hello'],
commandFlag: '-Command',
preferredLineEnding: 'crlf',
pathSeparator: '\\',
});
assert.deepEqual(createShellSpawnPlan('echo hello', 'linux'), {
platform: 'linux',
executable: 'bash',
args: ['-c', 'echo hello'],
commandFlag: '-c',
preferredLineEnding: 'lf',
pathSeparator: '/',
});
});
// This test verifies shell quoting rules stay isolated inside the adapter layer.
test('quoteShellArgument escapes embedded single quotes correctly', () => {
assert.equal(quoteShellArgument("it's", 'windows'), "'it''s'");
assert.equal(quoteShellArgument("it's", 'linux'), `'it'"'"'s'`);
});
// This test verifies resume-or-fallback command composition stays platform-specific in one helper.
test('buildFallbackCommand emits PowerShell or POSIX fallback syntax', () => {
assert.equal(
buildFallbackCommand("codex resume '123'", 'codex', 'windows'),
"codex resume '123'; if ($LASTEXITCODE -ne 0) { codex }",
);
assert.equal(
buildFallbackCommand("codex resume '123'", 'codex', 'linux'),
"codex resume '123' || codex",
);
});

View File

@@ -0,0 +1,55 @@
import { getPlatformLineEnding, getPlatformPathSeparator, resolveRuntimePlatform } from './runtime-platform.js';
import type { RuntimePlatform, ShellSpawnPlan } from './types.js';
// This helper returns the shell executable and argv shape for the target platform.
export function createShellSpawnPlan(
command: string,
platform: RuntimePlatform = resolveRuntimePlatform(),
): ShellSpawnPlan {
if (platform === 'windows') {
return {
platform,
executable: 'powershell.exe',
args: ['-Command', command],
commandFlag: '-Command',
preferredLineEnding: getPlatformLineEnding(platform),
pathSeparator: getPlatformPathSeparator(platform),
};
}
return {
platform,
executable: 'bash',
args: ['-c', command],
commandFlag: '-c',
preferredLineEnding: getPlatformLineEnding(platform),
pathSeparator: getPlatformPathSeparator(platform),
};
}
// This helper quotes one argument so the caller does not need to remember shell-specific escaping rules.
export function quoteShellArgument(
value: string,
platform: RuntimePlatform = resolveRuntimePlatform(),
): string {
if (platform === 'windows') {
// PowerShell escapes a single quote inside a single-quoted string by doubling it.
return `'${value.replace(/'/g, "''")}'`;
}
// POSIX shells escape a single quote by closing the string, injecting an escaped quote, and reopening it.
return `'${value.replace(/'/g, "'\"'\"'")}'`;
}
// This helper builds the platform-specific "try primary, then fallback" shell expression.
export function buildFallbackCommand(
primaryCommand: string,
fallbackCommand: string,
platform: RuntimePlatform = resolveRuntimePlatform(),
): string {
if (platform === 'windows') {
return `${primaryCommand}; if ($LASTEXITCODE -ne 0) { ${fallbackCommand} }`;
}
return `${primaryCommand} || ${fallbackCommand}`;
}

View File

@@ -0,0 +1,41 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { createStreamLineAccumulator } from './stream.js';
// This test verifies CRLF split across chunk boundaries does not create a fake empty line.
test('createStreamLineAccumulator handles CRLF across chunk boundaries', () => {
const accumulator = createStreamLineAccumulator();
assert.deepEqual(accumulator.push('first\r'), []);
assert.deepEqual(accumulator.push('\nsecond\r\nthird'), ['first', 'second']);
assert.deepEqual(accumulator.flush(), ['third']);
});
// This test verifies the first chunk can safely contain a UTF-8 BOM.
test('createStreamLineAccumulator strips a BOM from the first chunk only', () => {
const accumulator = createStreamLineAccumulator();
assert.deepEqual(accumulator.push(Buffer.from('\uFEFFalpha\nbeta')), ['alpha']);
assert.deepEqual(accumulator.flush(), ['beta']);
});
// This test verifies callers can intentionally drop empty lines when parsing command output.
test('createStreamLineAccumulator can discard empty lines', () => {
const accumulator = createStreamLineAccumulator({ preserveEmptyLines: false });
assert.deepEqual(accumulator.push('one\n\n'), ['one']);
assert.deepEqual(accumulator.push('two\r\n\r\nthree'), ['two']);
assert.deepEqual(accumulator.flush(), ['three']);
});
// This test verifies the parser can be reused for a second stream after reset.
test('createStreamLineAccumulator reset clears the internal buffer', () => {
const accumulator = createStreamLineAccumulator();
assert.deepEqual(accumulator.push('partial'), []);
assert.equal(accumulator.peek(), 'partial');
accumulator.reset();
assert.equal(accumulator.peek(), '');
assert.deepEqual(accumulator.push('done\n'), ['done']);
});

View File

@@ -0,0 +1,98 @@
import { stripUtf8Bom } from './text.js';
import type { StreamLineAccumulator, StreamLineAccumulatorOptions } from './types.js';
// This helper keeps the push logic focused on line extraction rather than Buffer/string branching.
function chunkToString(chunk: Buffer | string): string {
return Buffer.isBuffer(chunk) ? chunk.toString('utf8') : chunk;
}
// This helper lets callers reuse the same cross-platform line parser for stdout, stderr, or file streams.
export function createStreamLineAccumulator(
options: StreamLineAccumulatorOptions = {},
): StreamLineAccumulator {
const { preserveEmptyLines = true } = options;
let buffer = '';
let isFirstChunk = true;
// This helper applies BOM stripping only once because a stream can only start once.
const normalizeIncomingChunk = (chunk: Buffer | string): string => {
const text = chunkToString(chunk);
if (!isFirstChunk) {
return text;
}
isFirstChunk = false;
return stripUtf8Bom(text);
};
// This helper enforces the caller's empty-line policy in one place.
const maybeAppendLine = (lines: string[], line: string): void => {
if (preserveEmptyLines || line.length > 0) {
lines.push(line);
}
};
return {
// This method extracts only complete lines and keeps an incomplete trailing fragment in memory.
push: (chunk: Buffer | string): string[] => {
buffer += normalizeIncomingChunk(chunk);
const lines: string[] = [];
let lineStartIndex = 0;
let cursor = 0;
while (cursor < buffer.length) {
const currentCharacter = buffer[cursor];
if (currentCharacter === '\n') {
maybeAppendLine(lines, buffer.slice(lineStartIndex, cursor));
cursor += 1;
lineStartIndex = cursor;
continue;
}
if (currentCharacter === '\r') {
// A trailing carriage return may be the first half of a CRLF sequence from the next chunk.
if (cursor === buffer.length - 1) {
break;
}
maybeAppendLine(lines, buffer.slice(lineStartIndex, cursor));
cursor += buffer[cursor + 1] === '\n' ? 2 : 1;
lineStartIndex = cursor;
continue;
}
cursor += 1;
}
buffer = buffer.slice(lineStartIndex);
return lines;
},
// This method flushes the final unterminated fragment when the stream closes.
flush: (): string[] => {
if (buffer === '') {
return [];
}
const trailingLine = buffer.endsWith('\r') ? buffer.slice(0, -1) : buffer;
buffer = '';
if (!preserveEmptyLines && trailingLine.length === 0) {
return [];
}
return [trailingLine];
},
// This method exposes the buffered partial fragment for diagnostics or advanced callers.
peek: (): string => buffer,
// This method resets the parser so a caller can reuse the same object for a new stream.
reset: (): void => {
buffer = '';
isFirstChunk = true;
},
};
}

View File

@@ -0,0 +1,46 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import {
detectLineEnding,
normalizeLineEndings,
normalizeTerminalInput,
normalizeTextForParsing,
preserveExistingLineEndings,
splitLines,
stripUtf8Bom,
} from './text.js';
// This test verifies the parser can consume mixed OS line endings as one stable LF format.
test('normalizeTextForParsing converts CRLF and CR into LF', () => {
assert.equal(normalizeTextForParsing('a\r\nb\rc\n'), 'a\nb\nc\n');
});
// This test verifies BOM stripping and explicit output line-ending control.
test('normalizeLineEndings strips a UTF-8 BOM and can emit CRLF', () => {
assert.equal(stripUtf8Bom('\uFEFFhello'), 'hello');
assert.equal(normalizeLineEndings('\uFEFFa\nb', 'crlf'), 'a\r\nb');
});
// This test verifies callers can opt into preserving or trimming empty lines explicitly.
test('splitLines supports empty-line preservation and trailing-line trimming', () => {
assert.deepEqual(splitLines('a\r\n\r\nb\r\n'), ['a', '', 'b', '']);
assert.deepEqual(
splitLines('a\r\n\r\nb\r\n', {
preserveEmptyLines: false,
trimTrailingEmptyLine: true,
}),
['a', 'b'],
);
});
// This test verifies file rewrites can preserve the line-ending style already present on disk.
test('preserveExistingLineEndings reuses the current file style', () => {
assert.equal(detectLineEnding('a\r\nb\r\n'), 'crlf');
assert.equal(preserveExistingLineEndings('x\ny', 'a\r\nb\r\n'), 'x\r\ny');
});
// This test verifies pasted terminal input is normalized into carriage returns for PTY writes.
test('normalizeTerminalInput converts mixed newlines into carriage returns', () => {
assert.equal(normalizeTerminalInput('one\r\ntwo\nthree\rfour'), 'one\rtwo\rthree\rfour');
});

View File

@@ -0,0 +1,55 @@
import type { LineEnding, SplitLinesOptions } from './types.js';
// This constant is the UTF-8 byte order mark represented as a JavaScript string.
const UTF8_BOM = '\uFEFF';
// This helper removes a UTF-8 BOM because it breaks parsers that expect plain text or JSON at byte zero.
export function stripUtf8Bom(value: string): string {
return value.startsWith(UTF8_BOM) ? value.slice(1) : value;
}
// This helper turns any mixture of CRLF, LF, or legacy CR endings into one explicit target format.
export function normalizeLineEndings(value: string, target: LineEnding = 'lf'): string {
const withoutBom = stripUtf8Bom(value);
const asLf = withoutBom.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
return target === 'crlf' ? asLf.replace(/\n/g, '\r\n') : asLf;
}
// This helper infers the dominant file style so later writes can preserve the existing convention.
export function detectLineEnding(value: string): LineEnding {
return value.includes('\r\n') ? 'crlf' : 'lf';
}
// This helper splits text into logical lines after normalizing line endings first.
export function splitLines(value: string, options: SplitLinesOptions = {}): string[] {
const { preserveEmptyLines = true, trimTrailingEmptyLine = false } = options;
const normalized = normalizeLineEndings(value, 'lf');
const lines = normalized.split('\n');
const trimmedLines =
trimTrailingEmptyLine && lines.at(-1) === ''
? lines.slice(0, -1)
: lines;
return preserveEmptyLines ? trimmedLines : trimmedLines.filter((line) => line.length > 0);
}
// This helper gives parsers one stable newline format regardless of the source operating system.
export function normalizeTextForParsing(value: string): string {
return normalizeLineEndings(value, 'lf');
}
// This helper prepares text for file output when the caller wants to force a specific line-ending style.
export function normalizeTextForFileWrite(value: string, lineEnding: LineEnding): string {
return normalizeLineEndings(value, lineEnding);
}
// This helper keeps file rewrites stable by reusing the line-ending style already present on disk.
export function preserveExistingLineEndings(nextText: string, currentText: string): string {
return normalizeTextForFileWrite(nextText, detectLineEnding(currentText));
}
// This helper converts pasted or synthetic input into the carriage-return form PTYs expect for Enter.
export function normalizeTerminalInput(value: string): string {
return stripUtf8Bom(value).replace(/\r\n|\n|\r/g, '\r');
}

View File

@@ -0,0 +1,34 @@
// This type keeps the rest of the backend independent from Node's raw platform names.
export type RuntimePlatform = 'windows' | 'linux' | 'macos';
// This type makes line-ending intent explicit in parser and file-write code.
export type LineEnding = 'lf' | 'crlf';
// This type describes how to launch a shell without leaking OS-specific details.
export type ShellSpawnPlan = {
platform: RuntimePlatform;
executable: string;
args: string[];
commandFlag: '-Command' | '-c';
preferredLineEnding: LineEnding;
pathSeparator: '\\' | '/';
};
// This type configures how static text should be split into lines.
export type SplitLinesOptions = {
preserveEmptyLines?: boolean;
trimTrailingEmptyLine?: boolean;
};
// This type configures how streaming stdout and stderr chunks should be accumulated.
export type StreamLineAccumulatorOptions = {
preserveEmptyLines?: boolean;
};
// This type is the public contract for incremental line parsing from process streams.
export type StreamLineAccumulator = {
push: (chunk: Buffer | string) => string[];
flush: () => string[];
peek: () => string;
reset: () => void;
};