Compare commits

..

1 Commits

Author SHA1 Message Date
Affaan Mustafa ff4a565613 fix(clv2): harden registry writes and project deletion (#2294, #2297)
Two security-priority fixes in continuous-learning-v2/scripts/instinct-cli.py:

- #2294: _write_registry wrote projects.json without the advisory lock that
  _update_registry holds, so concurrent 'projects delete/gc/merge' could race an
  observe-time update and corrupt the registry. Extract the lock into a shared
  _registry_lock() context manager and use it in both writers.

- #2297: _remove_project_storage called shutil.rmtree on PROJECTS_DIR/project_id
  with no containment check. Add defense-in-depth: resolve the path and refuse to
  delete anything that is not strictly inside PROJECTS_DIR (or is the root
  itself), so a relaxed validator or future caller can never cause an
  arbitrary-directory delete.

Adds 5 pytest regression tests (atomic write under lock, contained delete,
missing-dir no-op, traversal refused, root refused). Node integration suite
(tests/scripts/instinct-cli-projects.test.js) green 9/9.
2026-06-21 21:49:33 -04:00
3 changed files with 87 additions and 25 deletions
+1 -1
View File
@@ -25,7 +25,7 @@ dependencies = [
[project.optional-dependencies]
dev = [
"pytest>=9.1.1",
"pytest>=8.0",
"pytest-asyncio>=0.23",
"pytest-cov>=4.1",
"pytest-mock>=3.12",
@@ -27,6 +27,7 @@ import ipaddress
import socket
import urllib.parse
import urllib.request
from contextlib import contextmanager
from pathlib import Path
from datetime import datetime, timedelta, timezone
from collections import defaultdict
@@ -394,22 +395,36 @@ def detect_project() -> dict:
}
@contextmanager
def _registry_lock():
"""Serialize registry read-modify-write across concurrent sessions.
Acquires the same advisory lock for every registry writer (``_update_registry``
and ``_write_registry``) so ``projects delete/gc/merge`` cannot interleave with
a concurrent observe-time update and corrupt ``projects.json``. No-op on
platforms without ``fcntl`` (Windows).
"""
REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
lock_path = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.lock"
lock_fd = None
try:
if _HAS_FCNTL:
lock_fd = open(lock_path, "w")
fcntl.flock(lock_fd, fcntl.LOCK_EX)
yield
finally:
if lock_fd is not None:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None:
"""Update the projects.json registry.
Uses file locking (where available) to prevent concurrent sessions from
overwriting each other's updates.
"""
REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
lock_path = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.lock"
lock_fd = None
try:
# Acquire advisory lock to serialize read-modify-write
if _HAS_FCNTL:
lock_fd = open(lock_path, "w")
fcntl.flock(lock_fd, fcntl.LOCK_EX)
with _registry_lock():
try:
with open(REGISTRY_FILE, encoding="utf-8") as f:
registry = json.load(f)
@@ -429,10 +444,6 @@ def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None:
f.flush()
os.fsync(f.fileno())
os.replace(tmp_file, REGISTRY_FILE)
finally:
if lock_fd is not None:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
def load_registry() -> dict:
@@ -445,15 +456,19 @@ def load_registry() -> dict:
def _write_registry(registry: dict) -> None:
"""Write the project registry atomically."""
REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
tmp_file = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.tmp.{os.getpid()}"
with open(tmp_file, "w", encoding="utf-8") as f:
json.dump(registry, f, indent=2)
f.write("\n")
f.flush()
os.fsync(f.fileno())
os.replace(tmp_file, REGISTRY_FILE)
"""Write the project registry atomically.
Holds the same advisory lock as ``_update_registry`` so concurrent
``projects delete/gc/merge`` and observe-time updates cannot corrupt the file.
"""
with _registry_lock():
tmp_file = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.tmp.{os.getpid()}"
with open(tmp_file, "w", encoding="utf-8") as f:
json.dump(registry, f, indent=2)
f.write("\n")
f.flush()
os.fsync(f.fileno())
os.replace(tmp_file, REGISTRY_FILE)
def _validate_project_id(project_id: str) -> bool:
@@ -573,7 +588,14 @@ def _project_counts(project_id: str) -> dict:
def _remove_project_storage(project_id: str) -> None:
project_dir = PROJECTS_DIR / project_id
# Defense-in-depth: resolve and confirm the target is contained within
# PROJECTS_DIR before recursively deleting, even though callers validate the
# project id. A relaxed validator or a future caller must never be able to
# turn this into an arbitrary-directory delete.
projects_root = PROJECTS_DIR.resolve()
project_dir = (PROJECTS_DIR / project_id).resolve()
if project_dir == projects_root or projects_root not in project_dir.parents:
raise ValueError(f"refusing to remove {project_dir}: escapes {projects_root}")
if project_dir.exists():
shutil.rmtree(project_dir)
@@ -46,6 +46,8 @@ load_registry = _mod.load_registry
_validate_instinct_id = _mod._validate_instinct_id
_validate_import_url = _mod._validate_import_url
_update_registry = _mod._update_registry
_write_registry = _mod._write_registry
_remove_project_storage = _mod._remove_project_storage
_confidence_bar = _mod._confidence_bar
@@ -1043,3 +1045,41 @@ def test_update_registry_atomic_replaces_file(patch_globals):
assert "abc123" in data
leftovers = list(tree["registry_file"].parent.glob(".projects.json.tmp.*"))
assert leftovers == []
def test_write_registry_atomic_no_tmp_leftovers(patch_globals):
# Issue #2294: _write_registry now holds the registry lock like
# _update_registry. It must still write atomically with no stray tmp files.
tree = patch_globals
_write_registry({"keep": {"name": "demo", "root": "/repo", "remote": ""}})
data = json.loads(tree["registry_file"].read_text())
assert data == {"keep": {"name": "demo", "root": "/repo", "remote": ""}}
leftovers = list(tree["registry_file"].parent.glob(".projects.json.tmp.*"))
assert leftovers == []
def test_remove_project_storage_deletes_contained_dir(patch_globals):
tree = patch_globals
target = tree["projects_dir"] / "proj-1"
(target / "instincts").mkdir(parents=True)
(target / "instincts" / "x.md").write_text("hi", encoding="utf-8")
_remove_project_storage("proj-1")
assert not target.exists()
def test_remove_project_storage_missing_dir_is_noop(patch_globals):
# No raise when the contained dir simply does not exist.
_remove_project_storage("never-created")
def test_remove_project_storage_blocks_traversal(patch_globals):
# Issue #2297: defense-in-depth — a traversal id must be refused even when a
# caller skips _validate_project_id, so this can never delete outside
# PROJECTS_DIR.
with pytest.raises(ValueError):
_remove_project_storage("../../etc")
def test_remove_project_storage_blocks_root_itself(patch_globals):
with pytest.raises(ValueError):
_remove_project_storage(".")