Query-Doctor · veksen · Apr 18, 2026
diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml
@@ -0,0 +1,126 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/refs/heads/master/src/schemas/json/github-workflow.json
+name: Benchmark PR
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+  workflow_dispatch:
+
+jobs:
+  bench:
+    name: Run benchmark & compare against base
+    runs-on: ubuntu-24.04
+    if: ${{ github.repository_owner == 'query-doctor' }}
+    timeout-minutes: 45
+    permissions:
+      contents: read
+      pull-requests: write
+    steps:
+      - name: Checkout PR (full history)
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v6
+        with:
+          node-version: 24
+          cache: npm
+
+      - name: Resolve base commit
+        id: base
+        run: |
+          git fetch origin ${{ github.base_ref }} --depth=1
+          echo "sha=$(git merge-base origin/${{ github.base_ref }} HEAD)" >> "$GITHUB_OUTPUT"
+
+      - name: Install dependencies (PR)
+        run: npm ci
+
+      - name: Run benchmark on PR
+        run: npx vitest bench --run --outputJson=/tmp/bench-pr.json
+
+      - name: Preserve PR bench source
+        # Base may predate this file or its API; we run base's source against
+        # the PR's bench definition so we're measuring code changes, not bench changes.
+        run: cp src/remote/optimizer.bench.ts /tmp/optimizer.bench.ts
+
+      - name: Switch to base commit
+        run: git checkout -f ${{ steps.base.outputs.sha }}
+
+      - name: Restore PR bench file onto base source
+        run: |
+          mkdir -p src/remote
+          cp /tmp/optimizer.bench.ts src/remote/optimizer.bench.ts
+
+      - name: Install dependencies (base)
+        id: base-install
+        continue-on-error: true
+        run: npm ci
+
+      - name: Run benchmark on base
+        id: base-bench
+        if: steps.base-install.outcome == 'success'
+        continue-on-error: true
+        run: npx vitest bench --run --outputJson=/tmp/bench-base.json
+
+      - name: Switch back to PR
+        if: always()
+        run: git checkout -f ${{ github.event.pull_request.head.sha }}
+
+      - name: Build PR comment
+        id: build
+        run: |
+          set +e
+          if [ "${{ steps.base-bench.outcome }}" = "success" ] && [ -f /tmp/bench-base.json ]; then
+            node scripts/compare-bench.mjs /tmp/bench-base.json /tmp/bench-pr.json > bench-comparison.md
+            echo "regressed=$?" >> "$GITHUB_OUTPUT"
+            echo "Base commit: \`${{ steps.base.outputs.sha }}\`" >> bench-comparison.md
+          else
+            node scripts/compare-bench.mjs /tmp/bench-pr.json > bench-comparison.md
+            echo "regressed=0" >> "$GITHUB_OUTPUT"
+            echo "" >> bench-comparison.md
+            echo "_Base commit \`${{ steps.base.outputs.sha }}\` benchmark did not run (missing bench, incompatible API, or failure)._" >> bench-comparison.md
+          fi
+
+      - name: Upload raw bench results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: bench-results
+          path: |
+            /tmp/bench-pr.json
+            /tmp/bench-base.json
+          if-no-files-found: ignore
+          retention-days: 30
+
+      - name: Post / update PR comment
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const body = fs.readFileSync('bench-comparison.md', 'utf8');
+            const marker = '<!-- benchmark-pr-comment -->';
+            const final = marker + '\n' + body;
+            const { owner, repo } = context.repo;
+            const issue_number = context.issue.number;
+            if (!issue_number) return; // workflow_dispatch has no PR
+            const comments = await github.paginate(github.rest.issues.listComments, {
+              owner, repo, issue_number,
+            });
+            const existing = comments.find(c => c.body && c.body.startsWith(marker));
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner, repo, comment_id: existing.id, body: final,
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner, repo, issue_number, body: final,
+              });
+            }
+
+      - name: Fail on regression
+        if: steps.build.outputs.regressed == '1'
+        run: |
+          echo "::error::Benchmark regression detected (>20% on at least one bench). See PR comment for details."
+          exit 1
diff --git a/package.json b/package.json
@@ -8,6 +8,7 @@
     "start:dev": "node --import tsx --watch src/main.ts",
     "dev": "node --env-file=.env --import tsx --watch src/main.ts",
     "test": "vitest",
+    "bench": "vitest bench",
     "typecheck": "tsc --noEmit",
     "build": "esbuild src/main.ts --bundle --platform=node --format=esm --outfile=dist/main.mjs --packages=external && cp src/reporters/github/success.md.j2 src/sync/schema_dump.sql dist/"
   },

diff --git a/scripts/compare-bench.mjs b/scripts/compare-bench.mjs
@@ -0,0 +1,142 @@
+#!/usr/bin/env node
+// Compare vitest --outputJson benchmark reports and emit a markdown summary.
+// Usage:
+//   node scripts/compare-bench.mjs <base.json> <pr.json> [--threshold=<pct>]   (diff mode)
+//   node scripts/compare-bench.mjs <pr.json>                                   (current-only mode)
+// Exit code 0 unless a benchmark regressed beyond --threshold (default 20%).
+
+import { readFileSync } from "node:fs";
+
+const args = process.argv.slice(2);
+const positional = args.filter((a) => !a.startsWith("--"));
+const flags = Object.fromEntries(
+  args
+    .filter((a) => a.startsWith("--"))
+    .map((a) => {
+      const [k, v] = a.slice(2).split("=");
+      return [k, v ?? "true"];
+    }),
+);
+
+if (positional.length < 1) {
+  console.error(
+    "usage: compare-bench.mjs <base.json> <pr.json> [--threshold=<pct>]\n" +
+      "       compare-bench.mjs <pr.json>",
+  );
+  process.exit(2);
+}
+
+const threshold = Number(flags.threshold ?? 20);
+const currentOnly = positional.length === 1;
+const [basePath, prPath] = currentOnly ? [null, positional[0]] : positional;
+
+const base = currentOnly ? { files: [] } : JSON.parse(readFileSync(basePath, "utf8"));
+const pr = JSON.parse(readFileSync(prPath, "utf8"));
+
+function flatten(report) {
+  const out = new Map();
+  for (const file of report.files ?? []) {
+    for (const group of file.groups ?? []) {
+      for (const b of group.benchmarks ?? []) {
+        out.set(`${group.fullName} > ${b.name}`, b);
+      }
+    }
+  }
+  return out;
+}
+
+const baseMap = flatten(base);
+const prMap = flatten(pr);
+
+function fmtMs(n) {
+  if (n == null || Number.isNaN(n)) return "—";
+  if (n < 1) return `${n.toFixed(3)}ms`;
+  if (n < 100) return `${n.toFixed(2)}ms`;
+  return `${n.toFixed(0)}ms`;
+}
+
+function fmtPct(n) {
+  if (n == null || Number.isNaN(n)) return "—";
+  const sign = n > 0 ? "+" : "";
+  return `${sign}${n.toFixed(1)}%`;
+}
+
+function verdict(deltaPct, threshold) {
+  if (deltaPct == null || Number.isNaN(deltaPct)) return "🆕";
+  if (deltaPct >= threshold) return "🔴";
+  if (deltaPct <= -threshold) return "🟢";
+  return "⚪";
+}
+
+const rows = [];
+let regressed = false;
+
+const allKeys = new Set([...baseMap.keys(), ...prMap.keys()]);
+for (const key of allKeys) {
+  const b = baseMap.get(key);
+  const p = prMap.get(key);
+  const baseMean = b?.mean;
+  const prMean = p?.mean;
+  const deltaPct =
+    baseMean != null && prMean != null && baseMean > 0
+      ? ((prMean - baseMean) / baseMean) * 100
+      : null;
+  if (deltaPct != null && deltaPct >= threshold) regressed = true;
+
+  rows.push({
+    key,
+    baseMean,
+    prMean,
+    baseRme: b?.rme,
+    prRme: p?.rme,
+    deltaPct,
+    verdict: verdict(deltaPct, threshold),
+    onlyBase: !p,
+    onlyPr: !b,
+  });
+}
+
+rows.sort((a, b) => a.key.localeCompare(b.key));
+
+const lines = [];
+lines.push("### Benchmark comparison");
+lines.push("");
+
+if (currentOnly) {
+  lines.push("_No baseline available — showing PR results only._");
+  lines.push("");
+  lines.push("| Benchmark | Mean | RME | Samples |");
+  lines.push("|---|---:|---:|---:|");
+  for (const r of rows) {
+    const samples = prMap.get(r.key)?.sampleCount ?? "—";
+    const rme = r.prRme != null ? `±${r.prRme.toFixed(1)}%` : "—";
+    lines.push(`| \`${r.key}\` | ${fmtMs(r.prMean)} | ${rme} | ${samples} |`);
+  }
+} else {
+  lines.push(
+    `Threshold: ±${threshold}% on mean. 🔴 regression · 🟢 improvement · ⚪ within noise · 🆕 new/removed.`,
+  );
+  lines.push("");
+  lines.push("| | Benchmark | Base mean | PR mean | Δ | RME (base → PR) |");
+  lines.push("|---|---|---:|---:|---:|---|");
+  for (const r of rows) {
+    const rme =
+      r.baseRme != null && r.prRme != null
+        ? `±${r.baseRme.toFixed(1)}% → ±${r.prRme.toFixed(1)}%`
+        : "—";
+    lines.push(
+      `| ${r.verdict} | \`${r.key}\` | ${fmtMs(r.baseMean)} | ${fmtMs(r.prMean)} | ${fmtPct(r.deltaPct)} | ${rme} |`,
+    );
+  }
+}
+
+lines.push("");
+lines.push(
+  "_Benchmarks use testcontainers + wall-time; some noise is expected. Treat single-digit deltas as not-significant._",
+);
+
+process.stdout.write(lines.join("\n") + "\n");
+
+if (regressed) {
+  process.exitCode = 1;
+}