Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions .github/workflows/benchmark.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/refs/heads/master/src/schemas/json/github-workflow.json
name: Benchmark PR

on:
pull_request:
types: [opened, synchronize, reopened]
workflow_dispatch:

jobs:
bench:
name: Run benchmark & compare against base
runs-on: ubuntu-24.04
if: ${{ github.repository_owner == 'query-doctor' }}
timeout-minutes: 45
permissions:
contents: read
pull-requests: write
steps:
- name: Checkout PR (full history)
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}

- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: 24
cache: npm

- name: Resolve base commit
id: base
run: |
git fetch origin ${{ github.base_ref }} --depth=1
echo "sha=$(git merge-base origin/${{ github.base_ref }} HEAD)" >> "$GITHUB_OUTPUT"

- name: Install dependencies (PR)
run: npm ci

- name: Run benchmark on PR
run: npx vitest bench --run --outputJson=/tmp/bench-pr.json

- name: Preserve PR bench source
# Base may predate this file or its API; we run base's source against
# the PR's bench definition so we're measuring code changes, not bench changes.
run: cp src/remote/optimizer.bench.ts /tmp/optimizer.bench.ts

- name: Switch to base commit
run: git checkout -f ${{ steps.base.outputs.sha }}

- name: Restore PR bench file onto base source
run: |
mkdir -p src/remote
cp /tmp/optimizer.bench.ts src/remote/optimizer.bench.ts

- name: Install dependencies (base)
id: base-install
continue-on-error: true
run: npm ci

- name: Run benchmark on base
id: base-bench
if: steps.base-install.outcome == 'success'
continue-on-error: true
run: npx vitest bench --run --outputJson=/tmp/bench-base.json

- name: Switch back to PR
if: always()
run: git checkout -f ${{ github.event.pull_request.head.sha }}

- name: Build PR comment
id: build
run: |
set +e
if [ "${{ steps.base-bench.outcome }}" = "success" ] && [ -f /tmp/bench-base.json ]; then
node scripts/compare-bench.mjs /tmp/bench-base.json /tmp/bench-pr.json > bench-comparison.md
echo "regressed=$?" >> "$GITHUB_OUTPUT"
echo "Base commit: \`${{ steps.base.outputs.sha }}\`" >> bench-comparison.md
else
node scripts/compare-bench.mjs /tmp/bench-pr.json > bench-comparison.md
echo "regressed=0" >> "$GITHUB_OUTPUT"
echo "" >> bench-comparison.md
echo "_Base commit \`${{ steps.base.outputs.sha }}\` benchmark did not run (missing bench, incompatible API, or failure)._" >> bench-comparison.md
fi

- name: Upload raw bench results
uses: actions/upload-artifact@v4
if: always()
with:
name: bench-results
path: |
/tmp/bench-pr.json
/tmp/bench-base.json
if-no-files-found: ignore
retention-days: 30

- name: Post / update PR comment
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const body = fs.readFileSync('bench-comparison.md', 'utf8');
const marker = '<!-- benchmark-pr-comment -->';
const final = marker + '\n' + body;
const { owner, repo } = context.repo;
const issue_number = context.issue.number;
if (!issue_number) return; // workflow_dispatch has no PR
const comments = await github.paginate(github.rest.issues.listComments, {
owner, repo, issue_number,
});
const existing = comments.find(c => c.body && c.body.startsWith(marker));
if (existing) {
await github.rest.issues.updateComment({
owner, repo, comment_id: existing.id, body: final,
});
} else {
await github.rest.issues.createComment({
owner, repo, issue_number, body: final,
});
}

- name: Fail on regression
if: steps.build.outputs.regressed == '1'
run: |
echo "::error::Benchmark regression detected (>20% on at least one bench). See PR comment for details."
exit 1
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"start:dev": "node --import tsx --watch src/main.ts",
"dev": "node --env-file=.env --import tsx --watch src/main.ts",
"test": "vitest",
"bench": "vitest bench",
"typecheck": "tsc --noEmit",
"build": "esbuild src/main.ts --bundle --platform=node --format=esm --outfile=dist/main.mjs --packages=external && cp src/reporters/github/success.md.j2 src/sync/schema_dump.sql dist/"
},
Expand Down
142 changes: 142 additions & 0 deletions scripts/compare-bench.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#!/usr/bin/env node
// Compare vitest --outputJson benchmark reports and emit a markdown summary.
// Usage:
// node scripts/compare-bench.mjs <base.json> <pr.json> [--threshold=<pct>] (diff mode)
// node scripts/compare-bench.mjs <pr.json> (current-only mode)
// Exit code 0 unless a benchmark regressed beyond --threshold (default 20%).

import { readFileSync } from "node:fs";

const args = process.argv.slice(2);
const positional = args.filter((a) => !a.startsWith("--"));
const flags = Object.fromEntries(
args
.filter((a) => a.startsWith("--"))
.map((a) => {
const [k, v] = a.slice(2).split("=");
return [k, v ?? "true"];
}),
);

if (positional.length < 1) {
console.error(
"usage: compare-bench.mjs <base.json> <pr.json> [--threshold=<pct>]\n" +
" compare-bench.mjs <pr.json>",
);
process.exit(2);
}

const threshold = Number(flags.threshold ?? 20);
const currentOnly = positional.length === 1;
const [basePath, prPath] = currentOnly ? [null, positional[0]] : positional;

const base = currentOnly ? { files: [] } : JSON.parse(readFileSync(basePath, "utf8"));
const pr = JSON.parse(readFileSync(prPath, "utf8"));

function flatten(report) {
const out = new Map();
for (const file of report.files ?? []) {
for (const group of file.groups ?? []) {
for (const b of group.benchmarks ?? []) {
out.set(`${group.fullName} > ${b.name}`, b);
}
}
}
return out;
}

const baseMap = flatten(base);
const prMap = flatten(pr);

function fmtMs(n) {
if (n == null || Number.isNaN(n)) return "—";
if (n < 1) return `${n.toFixed(3)}ms`;
if (n < 100) return `${n.toFixed(2)}ms`;
return `${n.toFixed(0)}ms`;
}

function fmtPct(n) {
if (n == null || Number.isNaN(n)) return "—";
const sign = n > 0 ? "+" : "";
return `${sign}${n.toFixed(1)}%`;
}

function verdict(deltaPct, threshold) {
if (deltaPct == null || Number.isNaN(deltaPct)) return "🆕";
if (deltaPct >= threshold) return "🔴";
if (deltaPct <= -threshold) return "🟢";
return "⚪";
}

const rows = [];
let regressed = false;

const allKeys = new Set([...baseMap.keys(), ...prMap.keys()]);
for (const key of allKeys) {
const b = baseMap.get(key);
const p = prMap.get(key);
const baseMean = b?.mean;
const prMean = p?.mean;
const deltaPct =
baseMean != null && prMean != null && baseMean > 0
? ((prMean - baseMean) / baseMean) * 100
: null;
if (deltaPct != null && deltaPct >= threshold) regressed = true;

rows.push({
key,
baseMean,
prMean,
baseRme: b?.rme,
prRme: p?.rme,
deltaPct,
verdict: verdict(deltaPct, threshold),
onlyBase: !p,
onlyPr: !b,
});
}

rows.sort((a, b) => a.key.localeCompare(b.key));

const lines = [];
lines.push("### Benchmark comparison");
lines.push("");

if (currentOnly) {
lines.push("_No baseline available — showing PR results only._");
lines.push("");
lines.push("| Benchmark | Mean | RME | Samples |");
lines.push("|---|---:|---:|---:|");
for (const r of rows) {
const samples = prMap.get(r.key)?.sampleCount ?? "—";
const rme = r.prRme != null ? `±${r.prRme.toFixed(1)}%` : "—";
lines.push(`| \`${r.key}\` | ${fmtMs(r.prMean)} | ${rme} | ${samples} |`);
}
} else {
lines.push(
`Threshold: ±${threshold}% on mean. 🔴 regression · 🟢 improvement · ⚪ within noise · 🆕 new/removed.`,
);
lines.push("");
lines.push("| | Benchmark | Base mean | PR mean | Δ | RME (base → PR) |");
lines.push("|---|---|---:|---:|---:|---|");
for (const r of rows) {
const rme =
r.baseRme != null && r.prRme != null
? `±${r.baseRme.toFixed(1)}% → ±${r.prRme.toFixed(1)}%`
: "—";
lines.push(
`| ${r.verdict} | \`${r.key}\` | ${fmtMs(r.baseMean)} | ${fmtMs(r.prMean)} | ${fmtPct(r.deltaPct)} | ${rme} |`,
);
}
}

lines.push("");
lines.push(
"_Benchmarks use testcontainers + wall-time; some noise is expected. Treat single-digit deltas as not-significant._",
);

process.stdout.write(lines.join("\n") + "\n");

if (regressed) {
process.exitCode = 1;
}
Loading
Loading