From acbc152375c215b4fe2a20abb29dfb733727c4cb Mon Sep 17 00:00:00 2001
From: wp_duality <chenchen0829@me.com>
Date: Fri, 15 May 2026 20:11:30 +0800
Subject: [PATCH] feat(skills): enrich windows-desktop-e2e with
 trace/dpi/diagnostics (#1925)

* feat(skills): enrich windows-desktop-e2e with trace/dpi/diagnostics

- opt-in E2E_TRACE for step-level screenshots + JSONL action log;
  text content redacted by default (E2E_TRACE_INCLUDE_TEXT to opt in)
- DPI/scaling rules + debug_match() helper for screenshot fallback
- flaky table covers Qt5 set_edit_text fallback and off-screen controls

* docs: fix windows e2e debug helper

---------

Co-authored-by: Affaan Mustafa <affaan@dcube.ai>
---
 skills/windows-desktop-e2e/SKILL.md | 99 +++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
diff --git a/skills/windows-desktop-e2e/SKILL.md b/skills/windows-desktop-e2e/SKILL.md
index e9f0d3c7..72e4d487 100644
--- a/skills/windows-desktop-e2e/SKILL.md
+++ b/skills/windows-desktop-e2e/SKILL.md
@@ -366,6 +366,65 @@ def stop_recording(proc):
     proc.stdin.write(b"q"); proc.stdin.flush(); proc.wait(timeout=10)
 ```
 
+## Per-Step Trace (opt-in)
+
+The default failure screenshot is often too thin for diagnosing flaky tests. The step-level trace below is **off by default** — enable it only when reproducing a flaky case.
+
+### Enable
+
+```bash
+E2E_TRACE=1 pytest tests/test_login.py -v
+# Include typed text in the JSONL log (DO NOT use on tests that type credentials/PII):
+E2E_TRACE=1 E2E_TRACE_INCLUDE_TEXT=1 pytest ...
+```
+
+### Patch into BasePage
+
+```python
+import os, json, time
+TRACE_ENABLED      = os.environ.get("E2E_TRACE") == "1"
+TRACE_INCLUDE_TEXT = os.environ.get("E2E_TRACE_INCLUDE_TEXT") == "1"
+
+class BasePage:
+    _step = 0
+
+    def _trace(self, action, spec=None, text=None):
+        if not TRACE_ENABLED:
+            return
+        BasePage._step += 1
+        idx = f"{BasePage._step:03d}"
+        os.makedirs(ARTIFACT_DIR, exist_ok=True)
+        try:
+            self.window.capture_as_image().save(
+                os.path.join(ARTIFACT_DIR, f"step_{idx}_{action}.png"))
+        except Exception:
+            pass  # capture failure must not break the test
+        rec = {
+            "ts": time.time(), "step": BasePage._step, "action": action,
+            "locator": getattr(spec, "criteria", None),
+            "text": text if TRACE_INCLUDE_TEXT else ("<redacted>" if text else None),
+        }
+        with open(os.path.join(ARTIFACT_DIR, "trace.jsonl"), "a") as f:
+            f.write(json.dumps(rec) + "\n")
+
+    def click(self, spec):
+        self.wait_visible(spec); self._trace("click_before", spec)
+        spec.click_input();      self._trace("click_after",  spec)
+
+    def type_text(self, spec, text):
+        self.wait_visible(spec); self._trace("type_before", spec, text)
+        # ... existing set_edit_text / keyboard fallback ...
+        self._trace("type_after", spec)
+```
+
+### Caveats
+
+- **PII / credentials**: `type_text` content is `<redacted>` by default. Never set `E2E_TRACE_INCLUDE_TEXT=1` on login or payment flows.
+- **Overhead**: ~50–200ms per action + one PNG per step on disk. Don't enable on the default CI matrix — only on a dedicated flake-repro job.
+- **Artifact bloat**: a long flow produces tens of MB; tune `retention-days` accordingly.
+- **Parallel/rerun hygiene**: this simple example appends to `trace.jsonl` and uses a class-level counter. Clear the artifact directory before reruns, and use per-worker artifact dirs for parallel tests.
+- **Coverage gap**: actions performed outside `BasePage` (raw `pywinauto` calls in test code) are not traced.
+
 ## Flaky Test Handling
 
 ```python
@@ -387,6 +446,8 @@ Common causes and fixes:
 | Animation in progress | `wait_until(lambda: not loading_indicator.exists())` |
 | Dialog timing | `wait_window(title, timeout=15)` |
 | CI display not ready | Set `DISPLAY` or use virtual desktop in CI |
+| `set_edit_text` raises NotImplementedError | UIA ValuePattern missing (common on Qt 5.x) — `BasePage.type_text` already falls back to `keyboard.send_keys` |
+| Control exists but `wait_visible` times out | Window minimised or off-screen — call `win.restore()` + `win.set_focus()` before waiting |
 
 ## Test Isolation & Sandbox
 
@@ -719,6 +780,44 @@ def click_image(template_path, confidence=0.85):
     pyautogui.click(*pos)
 ```
 
+### DPI / Scaling Rules (screenshot mode only)
+
+Screenshot matching is brutally sensitive to Windows display scaling (100% / 125% / 150%). Three hard rules:
+
+1. **Capture templates at the same scale as the target machine.** Don't try to rescue a mismatch with `PIL.Image.resize` — `cv2.matchTemplate` is very fragile against resampling artefacts.
+2. **Pin the CI display scaling.** On `windows-latest` add a step like `Set-DisplayResolution 1920 1080 -Force` and disable per-monitor DPI scaling, so screenshot dimensions are reproducible.
+3. **Record the scale alongside each artefact.** On capture, write `GetDpiForWindow(hwnd) / 96` to `artifacts/<test>/metadata.json` — postmortems become obvious instead of guess-work.
+
+> Process-level DPI awareness (`SetProcessDpiAwarenessContext`) **can conflict with Qt's own DPI handling** when the app under test is Qt-based. Prefer "same-scale templates + CI pin" over flipping process-wide DPI mode in fixtures.
+
+### Debugging Match Confidence
+
+When tuning the `confidence` threshold, the only sane workflow is to **see** where the match landed. The helper below is diagnosis-only — do not call it from test code.
+
+```python
+def debug_match(template_path, out="artifacts/match_debug.png", confidence=0.85):
+    """Diagnosis-only. Draw the best-match rectangle + score back on the current screen.
+
+    NOT for production tests — use when calibrating confidence or chasing false matches.
+    """
+    import os, cv2, pyautogui, numpy as np
+    screen = np.array(pyautogui.screenshot())[:, :, ::-1]
+    tpl    = cv2.imread(template_path)
+    if tpl is None:
+        raise RuntimeError(f"Template unreadable: {template_path}")
+    res    = cv2.matchTemplate(screen, tpl, cv2.TM_CCOEFF_NORMED)
+    _, mv, _, ml = cv2.minMaxLoc(res)
+    h, w   = tpl.shape[:2]
+    colour = (0, 255, 0) if mv >= confidence else (0, 0, 255)  # green pass / red fail
+    cv2.rectangle(screen, ml, (ml[0]+w, ml[1]+h), colour, 2)
+    cv2.putText(screen, f"score={mv:.3f} thr={confidence}",
+                (ml[0], max(20, ml[1]-6)),
+                cv2.FONT_HERSHEY_SIMPLEX, 0.7, colour, 2)
+    os.makedirs(os.path.dirname(out) or ".", exist_ok=True)
+    cv2.imwrite(out, screen)
+    return mv
+```
+
 **Use sparingly** — image matching breaks on DPI changes, theme switches, and partial occlusion.
 Always try UIA first; fall back to screenshots only for genuinely unreachable controls.