fix: narrow unicode cleanup scope

This commit is contained in:
Affaan Mustafa
2026-03-29 09:06:44 -04:00
parent 432a45274e
commit 7483d646e4
29 changed files with 249 additions and 180 deletions

View File

@@ -48,6 +48,7 @@ function resetAliases() {
}
function runTests() {
const rocketEmoji = String.fromCodePoint(0x1F680);
console.log('\n=== Testing session-aliases.js ===\n');
let passed = 0;
@@ -1441,7 +1442,7 @@ function runTests() {
'CJK characters should be rejected');
// Emoji
const emojiResult = aliases.resolveAlias('rocket-');
const emojiResult = aliases.resolveAlias(`rocket-${rocketEmoji}`);
assert.strictEqual(emojiResult, null,
'Emoji should be rejected by the ASCII-only regex');

View File

@@ -27,6 +27,8 @@ function test(name, fn) {
// Test suite
function runTests() {
const rocketParty = String.fromCodePoint(0x1F680, 0x1F389);
const partyEmoji = String.fromCodePoint(0x1F389);
console.log('\n=== Testing utils.js ===\n');
let passed = 0;
@@ -166,9 +168,12 @@ function runTests() {
if (test('sanitizeSessionId returns stable hashes for non-ASCII values', () => {
const chinese = utils.sanitizeSessionId('我的项目');
const cyrillic = utils.sanitizeSessionId('проект');
const emoji = utils.sanitizeSessionId(rocketParty);
assert.ok(/^[a-f0-9]{8}$/.test(chinese), `Expected 8-char hash, got: ${chinese}`);
assert.ok(/^[a-f0-9]{8}$/.test(cyrillic), `Expected 8-char hash, got: ${cyrillic}`);
assert.ok(/^[a-f0-9]{8}$/.test(emoji), `Expected 8-char hash, got: ${emoji}`);
assert.notStrictEqual(chinese, cyrillic);
assert.notStrictEqual(chinese, emoji);
assert.strictEqual(utils.sanitizeSessionId('日本語プロジェクト'), utils.sanitizeSessionId('日本語プロジェクト'));
})) passed++; else failed++;
@@ -704,7 +709,7 @@ function runTests() {
if (test('writeFile handles unicode content', () => {
const testFile = path.join(utils.getTempDir(), `utils-test-${Date.now()}.txt`);
try {
const unicode = '日本語テスト 中文 émojis';
const unicode = `日本語テスト ${String.fromCodePoint(0x1F680)} émojis`;
utils.writeFile(testFile, unicode);
const content = utils.readFile(testFile);
assert.strictEqual(content, unicode);
@@ -1868,18 +1873,18 @@ function runTests() {
}
})) passed++; else failed++;
// ── Round 108: grepFile with Unicode content — UTF-16 string matching on split lines ──
console.log('\nRound 108: grepFile (Unicode — regex matching on UTF-16 split lines):');
if (test('grepFile finds Unicode patterns across lines', () => {
// ── Round 108: grepFile with Unicode/emoji content — UTF-16 string matching on split lines ──
console.log('\nRound 108: grepFile (Unicode/emoji — regex matching on UTF-16 split lines):');
if (test('grepFile finds Unicode emoji patterns across lines', () => {
const tmpDir = fs.mkdtempSync(path.join(utils.getTempDir(), 'r108-grep-unicode-'));
const testFile = path.join(tmpDir, 'test.txt');
try {
fs.writeFileSync(testFile, '猫 celebration\nnormal line\n猫 party\n日本語テスト');
const unicodeResults = utils.grepFile(testFile, /猫/);
assert.strictEqual(unicodeResults.length, 2,
'Should find Unicode matches on 2 lines (lines 1 and 3)');
assert.strictEqual(unicodeResults[0].lineNumber, 1);
assert.strictEqual(unicodeResults[1].lineNumber, 3);
fs.writeFileSync(testFile, `${partyEmoji} celebration\nnormal line\n${partyEmoji} party\n日本語テスト`);
const emojiResults = utils.grepFile(testFile, new RegExp(partyEmoji, 'u'));
assert.strictEqual(emojiResults.length, 2,
'Should find emoji on 2 lines (lines 1 and 3)');
assert.strictEqual(emojiResults[0].lineNumber, 1);
assert.strictEqual(emojiResults[1].lineNumber, 3);
const cjkResults = utils.grepFile(testFile, /日本語/);
assert.strictEqual(cjkResults.length, 1,
'Should find CJK characters on line 4');

View File

@@ -67,7 +67,7 @@ for (const testFile of testFiles) {
const displayPath = testFile.split(path.sep).join('/');
if (!fs.existsSync(testPath)) {
console.log(`WARNING: Skipping ${displayPath} (file not found)`);
console.log(`WARNING Skipping ${displayPath} (file not found)`);
continue;
}

View File

@@ -35,6 +35,7 @@ function makeTempRoot(prefix) {
const warningEmoji = String.fromCodePoint(0x26A0, 0xFE0F);
const toolsEmoji = String.fromCodePoint(0x1F6E0, 0xFE0F);
const zeroWidthSpace = String.fromCodePoint(0x200B);
const rocketEmoji = String.fromCodePoint(0x1F680);
let passed = 0;
let failed = 0;
@@ -78,6 +79,36 @@ if (
passed++;
else failed++;
if (
test('write mode does not rewrite executable files', () => {
const root = makeTempRoot('ecc-unicode-code-');
fs.mkdirSync(path.join(root, 'scripts'), { recursive: true });
const scriptFile = path.join(root, 'scripts', 'sample.js');
const original = `const label = "Launch ${rocketEmoji}";\n`;
fs.writeFileSync(scriptFile, original);
const result = runCheck(root, ['--write']);
assert.notStrictEqual(result.status, 0, result.stdout + result.stderr);
assert.match(result.stderr, /scripts\/sample\.js:1:23 emoji U\+1F680/);
assert.strictEqual(fs.readFileSync(scriptFile, 'utf8'), original);
})
)
passed++;
else failed++;
if (
test('plain symbols like copyright remain allowed', () => {
const root = makeTempRoot('ecc-unicode-symbols-');
fs.mkdirSync(path.join(root, 'docs'), { recursive: true });
fs.writeFileSync(path.join(root, 'docs', 'legal.md'), 'Copyright © ECC\nTrademark ® ECC\n');
const result = runCheck(root);
assert.strictEqual(result.status, 0, result.stdout + result.stderr);
})
)
passed++;
else failed++;
console.log(`\nPassed: ${passed}`);
console.log(`Failed: ${failed}`);
process.exit(failed > 0 ? 1 : 0);