fix(clv2): harden registry writes and project deletion (#2294, #2297)

Two security-priority fixes in continuous-learning-v2/scripts/instinct-cli.py:

- #2294: _write_registry wrote projects.json without the advisory lock that
  _update_registry holds, so concurrent 'projects delete/gc/merge' could race an
  observe-time update and corrupt the registry. Extract the lock into a shared
  _registry_lock() context manager and use it in both writers.

- #2297: _remove_project_storage called shutil.rmtree on PROJECTS_DIR/project_id
  with no containment check. Add defense-in-depth: resolve the path and refuse to
  delete anything that is not strictly inside PROJECTS_DIR (or is the root
  itself), so a relaxed validator or future caller can never cause an
  arbitrary-directory delete.

Adds 5 pytest regression tests (atomic write under lock, contained delete,
missing-dir no-op, traversal refused, root refused). Node integration suite
(tests/scripts/instinct-cli-projects.test.js) green 9/9.
This commit is contained in:
Affaan Mustafa
2026-06-21 21:49:33 -04:00
parent 71d22d0a77
commit ff4a565613
2 changed files with 86 additions and 24 deletions
@@ -46,6 +46,8 @@ load_registry = _mod.load_registry
_validate_instinct_id = _mod._validate_instinct_id
_validate_import_url = _mod._validate_import_url
_update_registry = _mod._update_registry
_write_registry = _mod._write_registry
_remove_project_storage = _mod._remove_project_storage
_confidence_bar = _mod._confidence_bar
@@ -1043,3 +1045,41 @@ def test_update_registry_atomic_replaces_file(patch_globals):
assert "abc123" in data
leftovers = list(tree["registry_file"].parent.glob(".projects.json.tmp.*"))
assert leftovers == []
def test_write_registry_atomic_no_tmp_leftovers(patch_globals):
# Issue #2294: _write_registry now holds the registry lock like
# _update_registry. It must still write atomically with no stray tmp files.
tree = patch_globals
_write_registry({"keep": {"name": "demo", "root": "/repo", "remote": ""}})
data = json.loads(tree["registry_file"].read_text())
assert data == {"keep": {"name": "demo", "root": "/repo", "remote": ""}}
leftovers = list(tree["registry_file"].parent.glob(".projects.json.tmp.*"))
assert leftovers == []
def test_remove_project_storage_deletes_contained_dir(patch_globals):
tree = patch_globals
target = tree["projects_dir"] / "proj-1"
(target / "instincts").mkdir(parents=True)
(target / "instincts" / "x.md").write_text("hi", encoding="utf-8")
_remove_project_storage("proj-1")
assert not target.exists()
def test_remove_project_storage_missing_dir_is_noop(patch_globals):
# No raise when the contained dir simply does not exist.
_remove_project_storage("never-created")
def test_remove_project_storage_blocks_traversal(patch_globals):
# Issue #2297: defense-in-depth — a traversal id must be refused even when a
# caller skips _validate_project_id, so this can never delete outside
# PROJECTS_DIR.
with pytest.raises(ValueError):
_remove_project_storage("../../etc")
def test_remove_project_storage_blocks_root_itself(patch_globals):
with pytest.raises(ValueError):
_remove_project_storage(".")