Skip to content

[Hackathon] Benchmark GHA #4

[Hackathon] Benchmark GHA

[Hackathon] Benchmark GHA #4

name: Benchmark Comparison
on:
pull_request:
types:
- opened
- synchronize
- reopened
pull_request_target:
types:
- opened
- synchronize
- reopened
permissions:
pull-requests: write
env:
# Maximum allowed performance regression percentage before failing the pipeline
BENCHMARK_FAIL_THRESHOLD: 8
jobs:
comparison:
name: Check Changes
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.pull_request.head.sha }}
- uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
with:
version: 10.27.0
run_install: false
- name: Use Node.js 22.x
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
with:
node-version: 22.18.0
cache: "pnpm"
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Build packages
run: pnpm --filter @mui/x-charts-premium... build
- name: Install Playwright browsers
run: pnpm --filter benchmark-tool exec playwright install chromium --with-deps
- name: Run benchmarks
run: pnpm --filter benchmark-tool test
- name: Extract metrics and compare
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
env:
FAIL_THRESHOLD: ${{ env.BENCHMARK_FAIL_THRESHOLD }}
with:
script: |
const fs = require('fs');
const path = require('path');
const failThreshold = parseFloat(process.env.FAIL_THRESHOLD) || 5;
// Extract total duration from a trace file
function extractTotalDuration(trace) {
let totalDuration = 0;
for (const event of trace.traceEvents) {
if (event.ph === 'X') {
totalDuration += event.dur;
}
}
return totalDuration;
}
// Read PR benchmark results per file
const benchmarksDir = './test/benchmark-tool/benchmarks';
const prMetricsByFile = {};
if (fs.existsSync(benchmarksDir)) {
const files = fs.readdirSync(benchmarksDir).filter(f => f.endsWith('.json'));
for (const file of files) {
const trace = JSON.parse(fs.readFileSync(path.join(benchmarksDir, file), 'utf-8'));
prMetricsByFile[file] = extractTotalDuration(trace);
}
}
// Fetch baseline metrics from performance-benchmark-data repository
// Find the latest benchmark folder (YYYY-MM-DD-HH-MM-SS format)
const masterMetricsByFile = {};
try {
// List directories in mui-x folder using GitHub API
const contentsResponse = await fetch('https://api.github.com/repos/mnajdova/performance-benchmark-data/contents/mui-x');
if (contentsResponse.ok) {
const contents = await contentsResponse.json();
// Filter directories and sort by name (timestamp format sorts chronologically)
const folders = contents
.filter(item => item.type === 'dir')
.map(item => item.name)
.sort()
.reverse();
if (folders.length > 0) {
const latestFolder = folders[0];
console.log(`Using latest benchmark folder: ${latestFolder}`);
// Fetch metadata.json to get the list of benchmark files
const metadataResponse = await fetch(`https://raw.githubusercontent.com/mnajdova/performance-benchmark-data/main/mui-x/${latestFolder}/metadata.json`);
if (metadataResponse.ok) {
const metadata = await metadataResponse.json();
// Fetch metrics from each benchmark file listed in metadata
for (const filePath of metadata.files) {
const fileName = filePath.split('/').pop();
const fileResponse = await fetch(`https://raw.githubusercontent.com/mnajdova/performance-benchmark-data/main/mui-x/${latestFolder}/${fileName}`);
if (fileResponse.ok) {
const trace = await fileResponse.json();
masterMetricsByFile[fileName] = extractTotalDuration(trace);
}
}
}
}
}
} catch (e) {
console.log('Could not fetch baseline metrics:', e.message);
}
// Get all unique file names from both PR and master
const allFiles = [...new Set([...Object.keys(prMetricsByFile), ...Object.keys(masterMetricsByFile)])].sort();
const hasMasterMetrics = Object.keys(masterMetricsByFile).length > 0;
// Track benchmarks that exceed the threshold
const failedBenchmarks = [];
let body;
if (hasMasterMetrics) {
// Build per-file comparison rows
const fileRows = allFiles.map(file => {
const prDuration = prMetricsByFile[file] || 0;
const masterDuration = masterMetricsByFile[file] || 0;
const diff = prDuration - masterDuration;
const diffPercentNum = masterDuration > 0 ? (diff / masterDuration) * 100 : (prDuration > 0 ? Infinity : 0);
const diffPercent = masterDuration > 0 ? diffPercentNum.toFixed(2) : (prDuration > 0 ? '+∞' : '0.00');
const emoji = diff > 0 ? '🔺' : diff < 0 ? '🔽' : '➡️';
const benchmarkName = file.replace('.json', '');
// Check if regression exceeds threshold
if (diffPercentNum > failThreshold) {
failedBenchmarks.push({ name: benchmarkName, diff: diffPercent });
}
return `| ${benchmarkName} | ${masterDuration.toLocaleString()} | ${prDuration.toLocaleString()} | ${emoji} ${diff > 0 ? '+' : ''}${diff.toLocaleString()} (${diffPercent}%) |`;
}).join('\n');
let statusSection = '';
if (failedBenchmarks.length > 0) {
statusSection = `
> **⚠️ Performance regression detected!** The following benchmarks exceed the ${failThreshold}% threshold:
> ${failedBenchmarks.map(b => `\`${b.name}\` (+${b.diff}%)`).join(', ')}
`;
}
body = `## Performance Comparison
${statusSection}
| Benchmark | Master (μs) | PR (μs) | Diff |
|-----------|-------------|---------|------|
${fileRows}
`;
} else {
const fileRows = allFiles.map(file => {
const prDuration = prMetricsByFile[file] || 0;
const benchmarkName = file.replace('.json', '');
return `| ${benchmarkName} | ${prDuration.toLocaleString()} |`;
}).join('\n');
body = `## Performance Comparison
> **Note:** Baseline metrics not found. Showing PR metrics only.
| Benchmark | PR (μs) |
|-----------|---------|
${fileRows}
`;
}
// Find existing comment
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.payload.pull_request.number,
});
const botComment = comments.find(comment =>
comment.user.type === 'Bot' && comment.body.includes('## Performance Comparison')
);
if (botComment) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body,
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.payload.pull_request.number,
body,
});
}
// Fail the pipeline if any benchmark exceeds the threshold
if (failedBenchmarks.length > 0) {
core.setFailed(`Performance regression detected: ${failedBenchmarks.map(b => b.name).join(', ')} exceeded the ${failThreshold}% threshold`);
}