[Hackathon] Benchmark GHA #4
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Benchmark Comparison | |
| on: | |
| pull_request: | |
| types: | |
| - opened | |
| - synchronize | |
| - reopened | |
| pull_request_target: | |
| types: | |
| - opened | |
| - synchronize | |
| - reopened | |
| permissions: | |
| pull-requests: write | |
| env: | |
| # Maximum allowed performance regression percentage before failing the pipeline | |
| BENCHMARK_FAIL_THRESHOLD: 8 | |
| jobs: | |
| comparison: | |
| name: Check Changes | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout PR branch | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| ref: ${{ github.event.pull_request.head.sha }} | |
| - uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0 | |
| with: | |
| version: 10.27.0 | |
| run_install: false | |
| - name: Use Node.js 22.x | |
| uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0 | |
| with: | |
| node-version: 22.18.0 | |
| cache: "pnpm" | |
| - name: Install dependencies | |
| run: pnpm install --frozen-lockfile | |
| - name: Build packages | |
| run: pnpm --filter @mui/x-charts-premium... build | |
| - name: Install Playwright browsers | |
| run: pnpm --filter benchmark-tool exec playwright install chromium --with-deps | |
| - name: Run benchmarks | |
| run: pnpm --filter benchmark-tool test | |
| - name: Extract metrics and compare | |
| uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 | |
| env: | |
| FAIL_THRESHOLD: ${{ env.BENCHMARK_FAIL_THRESHOLD }} | |
| with: | |
| script: | | |
| const fs = require('fs'); | |
| const path = require('path'); | |
| const failThreshold = parseFloat(process.env.FAIL_THRESHOLD) || 5; | |
| // Extract total duration from a trace file | |
| function extractTotalDuration(trace) { | |
| let totalDuration = 0; | |
| for (const event of trace.traceEvents) { | |
| if (event.ph === 'X') { | |
| totalDuration += event.dur; | |
| } | |
| } | |
| return totalDuration; | |
| } | |
| // Read PR benchmark results per file | |
| const benchmarksDir = './test/benchmark-tool/benchmarks'; | |
| const prMetricsByFile = {}; | |
| if (fs.existsSync(benchmarksDir)) { | |
| const files = fs.readdirSync(benchmarksDir).filter(f => f.endsWith('.json')); | |
| for (const file of files) { | |
| const trace = JSON.parse(fs.readFileSync(path.join(benchmarksDir, file), 'utf-8')); | |
| prMetricsByFile[file] = extractTotalDuration(trace); | |
| } | |
| } | |
| // Fetch baseline metrics from performance-benchmark-data repository | |
| // Find the latest benchmark folder (YYYY-MM-DD-HH-MM-SS format) | |
| const masterMetricsByFile = {}; | |
| try { | |
| // List directories in mui-x folder using GitHub API | |
| const contentsResponse = await fetch('https://api.github.com/repos/mnajdova/performance-benchmark-data/contents/mui-x'); | |
| if (contentsResponse.ok) { | |
| const contents = await contentsResponse.json(); | |
| // Filter directories and sort by name (timestamp format sorts chronologically) | |
| const folders = contents | |
| .filter(item => item.type === 'dir') | |
| .map(item => item.name) | |
| .sort() | |
| .reverse(); | |
| if (folders.length > 0) { | |
| const latestFolder = folders[0]; | |
| console.log(`Using latest benchmark folder: ${latestFolder}`); | |
| // Fetch metadata.json to get the list of benchmark files | |
| const metadataResponse = await fetch(`https://raw.githubusercontent.com/mnajdova/performance-benchmark-data/main/mui-x/${latestFolder}/metadata.json`); | |
| if (metadataResponse.ok) { | |
| const metadata = await metadataResponse.json(); | |
| // Fetch metrics from each benchmark file listed in metadata | |
| for (const filePath of metadata.files) { | |
| const fileName = filePath.split('/').pop(); | |
| const fileResponse = await fetch(`https://raw.githubusercontent.com/mnajdova/performance-benchmark-data/main/mui-x/${latestFolder}/${fileName}`); | |
| if (fileResponse.ok) { | |
| const trace = await fileResponse.json(); | |
| masterMetricsByFile[fileName] = extractTotalDuration(trace); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } catch (e) { | |
| console.log('Could not fetch baseline metrics:', e.message); | |
| } | |
| // Get all unique file names from both PR and master | |
| const allFiles = [...new Set([...Object.keys(prMetricsByFile), ...Object.keys(masterMetricsByFile)])].sort(); | |
| const hasMasterMetrics = Object.keys(masterMetricsByFile).length > 0; | |
| // Track benchmarks that exceed the threshold | |
| const failedBenchmarks = []; | |
| let body; | |
| if (hasMasterMetrics) { | |
| // Build per-file comparison rows | |
| const fileRows = allFiles.map(file => { | |
| const prDuration = prMetricsByFile[file] || 0; | |
| const masterDuration = masterMetricsByFile[file] || 0; | |
| const diff = prDuration - masterDuration; | |
| const diffPercentNum = masterDuration > 0 ? (diff / masterDuration) * 100 : (prDuration > 0 ? Infinity : 0); | |
| const diffPercent = masterDuration > 0 ? diffPercentNum.toFixed(2) : (prDuration > 0 ? '+∞' : '0.00'); | |
| const emoji = diff > 0 ? '🔺' : diff < 0 ? '🔽' : '➡️'; | |
| const benchmarkName = file.replace('.json', ''); | |
| // Check if regression exceeds threshold | |
| if (diffPercentNum > failThreshold) { | |
| failedBenchmarks.push({ name: benchmarkName, diff: diffPercent }); | |
| } | |
| return `| ${benchmarkName} | ${masterDuration.toLocaleString()} | ${prDuration.toLocaleString()} | ${emoji} ${diff > 0 ? '+' : ''}${diff.toLocaleString()} (${diffPercent}%) |`; | |
| }).join('\n'); | |
| let statusSection = ''; | |
| if (failedBenchmarks.length > 0) { | |
| statusSection = ` | |
| > **⚠️ Performance regression detected!** The following benchmarks exceed the ${failThreshold}% threshold: | |
| > ${failedBenchmarks.map(b => `\`${b.name}\` (+${b.diff}%)`).join(', ')} | |
| `; | |
| } | |
| body = `## Performance Comparison | |
| ${statusSection} | |
| | Benchmark | Master (μs) | PR (μs) | Diff | | |
| |-----------|-------------|---------|------| | |
| ${fileRows} | |
| `; | |
| } else { | |
| const fileRows = allFiles.map(file => { | |
| const prDuration = prMetricsByFile[file] || 0; | |
| const benchmarkName = file.replace('.json', ''); | |
| return `| ${benchmarkName} | ${prDuration.toLocaleString()} |`; | |
| }).join('\n'); | |
| body = `## Performance Comparison | |
| > **Note:** Baseline metrics not found. Showing PR metrics only. | |
| | Benchmark | PR (μs) | | |
| |-----------|---------| | |
| ${fileRows} | |
| `; | |
| } | |
| // Find existing comment | |
| const { data: comments } = await github.rest.issues.listComments({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.payload.pull_request.number, | |
| }); | |
| const botComment = comments.find(comment => | |
| comment.user.type === 'Bot' && comment.body.includes('## Performance Comparison') | |
| ); | |
| if (botComment) { | |
| await github.rest.issues.updateComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| comment_id: botComment.id, | |
| body, | |
| }); | |
| } else { | |
| await github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.payload.pull_request.number, | |
| body, | |
| }); | |
| } | |
| // Fail the pipeline if any benchmark exceeds the threshold | |
| if (failedBenchmarks.length > 0) { | |
| core.setFailed(`Performance regression detected: ${failedBenchmarks.map(b => b.name).join(', ')} exceeded the ${failThreshold}% threshold`); | |
| } |