[Hackathon] Benchmark GHA #4

Workflow file for this run

.github/workflows/benchmark-comparison.yml at 87f554b

	name: Benchmark Comparison

	on:
	pull_request:
	types:
	- opened
	- synchronize
	- reopened
	pull_request_target:
	types:
	- opened
	- synchronize
	- reopened

	permissions:
	pull-requests: write

	env:
	# Maximum allowed performance regression percentage before failing the pipeline
	BENCHMARK_FAIL_THRESHOLD: 8

	jobs:
	comparison:
	name: Check Changes
	runs-on: ubuntu-latest
	steps:
	- name: Checkout PR branch
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	ref: ${{ github.event.pull_request.head.sha }}

	- uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
	with:
	version: 10.27.0
	run_install: false

	- name: Use Node.js 22.x
	uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
	with:
	node-version: 22.18.0
	cache: "pnpm"

	- name: Install dependencies
	run: pnpm install --frozen-lockfile

	- name: Build packages
	run: pnpm --filter @mui/x-charts-premium... build

	- name: Install Playwright browsers
	run: pnpm --filter benchmark-tool exec playwright install chromium --with-deps

	- name: Run benchmarks
	run: pnpm --filter benchmark-tool test

	- name: Extract metrics and compare
	uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
	env:
	FAIL_THRESHOLD: ${{ env.BENCHMARK_FAIL_THRESHOLD }}
	with:
	script: \|
	const fs = require('fs');
	const path = require('path');

	const failThreshold = parseFloat(process.env.FAIL_THRESHOLD) \|\| 5;

	// Extract total duration from a trace file
	function extractTotalDuration(trace) {
	let totalDuration = 0;
	for (const event of trace.traceEvents) {
	if (event.ph === 'X') {
	totalDuration += event.dur;
	}
	}
	return totalDuration;
	}

	// Read PR benchmark results per file
	const benchmarksDir = './test/benchmark-tool/benchmarks';
	const prMetricsByFile = {};

	if (fs.existsSync(benchmarksDir)) {
	const files = fs.readdirSync(benchmarksDir).filter(f => f.endsWith('.json'));
	for (const file of files) {
	const trace = JSON.parse(fs.readFileSync(path.join(benchmarksDir, file), 'utf-8'));
	prMetricsByFile[file] = extractTotalDuration(trace);
	}
	}

	// Fetch baseline metrics from performance-benchmark-data repository
	// Find the latest benchmark folder (YYYY-MM-DD-HH-MM-SS format)
	const masterMetricsByFile = {};
	try {
	// List directories in mui-x folder using GitHub API
	const contentsResponse = await fetch('https://api.github.com/repos/mnajdova/performance-benchmark-data/contents/mui-x');
	if (contentsResponse.ok) {
	const contents = await contentsResponse.json();
	// Filter directories and sort by name (timestamp format sorts chronologically)
	const folders = contents
	.filter(item => item.type === 'dir')
	.map(item => item.name)
	.sort()
	.reverse();

	if (folders.length > 0) {
	const latestFolder = folders[0];
	console.log(`Using latest benchmark folder: ${latestFolder}`);

	// Fetch metadata.json to get the list of benchmark files
	const metadataResponse = await fetch(`https://raw.githubusercontent.com/mnajdova/performance-benchmark-data/main/mui-x/${latestFolder}/metadata.json`);
	if (metadataResponse.ok) {
	const metadata = await metadataResponse.json();

	// Fetch metrics from each benchmark file listed in metadata
	for (const filePath of metadata.files) {
	const fileName = filePath.split('/').pop();
	const fileResponse = await fetch(`https://raw.githubusercontent.com/mnajdova/performance-benchmark-data/main/mui-x/${latestFolder}/${fileName}`);
	if (fileResponse.ok) {
	const trace = await fileResponse.json();
	masterMetricsByFile[fileName] = extractTotalDuration(trace);
	}
	}
	}
	}
	}
	} catch (e) {
	console.log('Could not fetch baseline metrics:', e.message);
	}

	// Get all unique file names from both PR and master
	const allFiles = [...new Set([...Object.keys(prMetricsByFile), ...Object.keys(masterMetricsByFile)])].sort();
	const hasMasterMetrics = Object.keys(masterMetricsByFile).length > 0;

	// Track benchmarks that exceed the threshold
	const failedBenchmarks = [];

	let body;
	if (hasMasterMetrics) {
	// Build per-file comparison rows
	const fileRows = allFiles.map(file => {
	const prDuration = prMetricsByFile[file] \|\| 0;
	const masterDuration = masterMetricsByFile[file] \|\| 0;
	const diff = prDuration - masterDuration;
	const diffPercentNum = masterDuration > 0 ? (diff / masterDuration) * 100 : (prDuration > 0 ? Infinity : 0);
	const diffPercent = masterDuration > 0 ? diffPercentNum.toFixed(2) : (prDuration > 0 ? '+∞' : '0.00');
	const emoji = diff > 0 ? '🔺' : diff < 0 ? '🔽' : '➡️';
	const benchmarkName = file.replace('.json', '');

	// Check if regression exceeds threshold
	if (diffPercentNum > failThreshold) {
	failedBenchmarks.push({ name: benchmarkName, diff: diffPercent });
	}

	return `\| ${benchmarkName} \| ${masterDuration.toLocaleString()} \| ${prDuration.toLocaleString()} \| ${emoji} ${diff > 0 ? '+' : ''}${diff.toLocaleString()} (${diffPercent}%) \|`;
	}).join('\n');

	let statusSection = '';
	if (failedBenchmarks.length > 0) {
	statusSection = `
	> ⚠️ Performance regression detected! The following benchmarks exceed the ${failThreshold}% threshold:
	> ${failedBenchmarks.map(b => `\`${b.name}\` (+${b.diff}%)`).join(', ')}

	`;
	}

	body = `## Performance Comparison
	${statusSection}
	\| Benchmark \| Master (μs) \| PR (μs) \| Diff \|
	\|-----------\|-------------\|---------\|------\|
	${fileRows}
	`;
	} else {
	const fileRows = allFiles.map(file => {
	const prDuration = prMetricsByFile[file] \|\| 0;
	const benchmarkName = file.replace('.json', '');
	return `\| ${benchmarkName} \| ${prDuration.toLocaleString()} \|`;
	}).join('\n');

	body = `## Performance Comparison

	> Note: Baseline metrics not found. Showing PR metrics only.

	\| Benchmark \| PR (μs) \|
	\|-----------\|---------\|
	${fileRows}
	`;
	}

	// Find existing comment
	const { data: comments } = await github.rest.issues.listComments({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.payload.pull_request.number,
	});

	const botComment = comments.find(comment =>
	comment.user.type === 'Bot' && comment.body.includes('## Performance Comparison')
	);

	if (botComment) {
	await github.rest.issues.updateComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	comment_id: botComment.id,
	body,
	});
	} else {
	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.payload.pull_request.number,
	body,
	});
	}

	// Fail the pipeline if any benchmark exceeds the threshold
	if (failedBenchmarks.length > 0) {
	core.setFailed(`Performance regression detected: ${failedBenchmarks.map(b => b.name).join(', ')} exceeded the ${failThreshold}% threshold`);
	}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Hackathon] Benchmark GHA #4

Workflow file

[Hackathon] Benchmark GHA #4

Uh oh!

Workflow file for this run