From e9577e34f1f9b1c2cbd2d93156739ba6d5d3d9a5 Mon Sep 17 00:00:00 2001 From: Frank <97429702+tsubasakong@users.noreply.github.com> Date: Sat, 7 Mar 2026 14:47:35 -0800 Subject: [PATCH] fix: force UTF-8 for instinct CLI file IO (#353) --- .../scripts/instinct-cli.py | 14 +++++----- .../scripts/test_parse_instinct.py | 28 +++++++++++++++++++ 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/skills/continuous-learning-v2/scripts/instinct-cli.py b/skills/continuous-learning-v2/scripts/instinct-cli.py index 0d0192b9..65a5a002 100755 --- a/skills/continuous-learning-v2/scripts/instinct-cli.py +++ b/skills/continuous-learning-v2/scripts/instinct-cli.py @@ -186,7 +186,7 @@ def detect_project() -> dict: def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None: """Update the projects.json registry.""" try: - with open(REGISTRY_FILE) as f: + with open(REGISTRY_FILE, encoding="utf-8") as f: registry = json.load(f) except (FileNotFoundError, json.JSONDecodeError): registry = {} @@ -200,7 +200,7 @@ def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None: REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True) tmp_file = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.tmp.{os.getpid()}" - with open(tmp_file, "w") as f: + with open(tmp_file, "w", encoding="utf-8") as f: json.dump(registry, f, indent=2) f.flush() os.fsync(f.fileno()) @@ -210,7 +210,7 @@ def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None: def load_registry() -> dict: """Load the projects registry.""" try: - with open(REGISTRY_FILE) as f: + with open(REGISTRY_FILE, encoding="utf-8") as f: return json.load(f) except (FileNotFoundError, json.JSONDecodeError): return {} @@ -272,7 +272,7 @@ def _load_instincts_from_dir(directory: Path, source_type: str, scope_label: str ] for file in files: try: - content = file.read_text() + content = file.read_text(encoding="utf-8") parsed = parse_instinct_file(content) for inst in parsed: inst['_source_file'] = str(file) @@ -379,7 +379,7 @@ def cmd_status(args) -> int: # Observations stats obs_file = project.get("observations_file") if obs_file and Path(obs_file).exists(): - with open(obs_file) as f: + with open(obs_file, encoding="utf-8") as f: obs_count = sum(1 for _ in f) print(f"-" * 60) print(f" Observations: {obs_count} events logged") @@ -450,7 +450,7 @@ def cmd_import(args) -> int: except ValueError as e: print(f"Invalid path: {e}", file=sys.stderr) return 1 - content = path.read_text() + content = path.read_text(encoding="utf-8") # Parse instincts new_instincts = parse_instinct_file(content) @@ -981,7 +981,7 @@ def cmd_projects(args) -> int: inherited_count = len(_load_instincts_from_dir(inherited_dir, "inherited", "project")) obs_file = project_dir / "observations.jsonl" if obs_file.exists(): - with open(obs_file) as f: + with open(obs_file, encoding="utf-8") as f: obs_count = sum(1 for _ in f) else: obs_count = 0 diff --git a/skills/continuous-learning-v2/scripts/test_parse_instinct.py b/skills/continuous-learning-v2/scripts/test_parse_instinct.py index 41360ebc..71734a9a 100644 --- a/skills/continuous-learning-v2/scripts/test_parse_instinct.py +++ b/skills/continuous-learning-v2/scripts/test_parse_instinct.py @@ -13,6 +13,7 @@ Covers: """ import importlib.util +import io import json import os import sys @@ -483,6 +484,21 @@ def test_load_supports_md_extension(tmp_path): assert "test-instinct" in ids +def test_load_instincts_from_dir_uses_utf8_encoding(tmp_path, monkeypatch): + yaml_file = tmp_path / "test.yaml" + yaml_file.write_text("placeholder") + calls = [] + + def fake_read_text(self, *args, **kwargs): + calls.append(kwargs.get("encoding")) + return SAMPLE_INSTINCT_YAML + + monkeypatch.setattr(Path, "read_text", fake_read_text) + result = _load_instincts_from_dir(tmp_path, "personal", "project") + assert result[0]["id"] == "test-instinct" + assert calls == ["utf-8"] + + # ───────────────────────────────────────────── # load_all_instincts tests # ───────────────────────────────────────────── @@ -940,6 +956,18 @@ def test_load_registry_valid(patch_globals): assert result == data +def test_load_registry_uses_utf8_encoding(monkeypatch): + calls = [] + + def fake_open(path, mode="r", *args, **kwargs): + calls.append(kwargs.get("encoding")) + return io.StringIO("{}") + + monkeypatch.setattr(_mod, "open", fake_open, raising=False) + assert load_registry() == {} + assert calls == ["utf-8"] + + def test_validate_instinct_id(): assert _validate_instinct_id("good-id_1.0") assert not _validate_instinct_id("../bad")