Simple Git Stats
9 min read
Git Stats
I wanted to see a quick graph of a larger project I was working on for a retrospective with the team, I wrote a quick script to do it. Hopefully it’s useful to someone else.
Getting The Statistics
There is a great library for getting the lines of code from a git repository called Tokei, however this is only for the current state of the repository. So lets loop over the history of the repository and get the stats for each commit.
branch=develop
# make directory at location of script
mkdir -p $(dirname $0)/working
echo "WARNING: This will delete any working files in the working directory"
read -p "Press enter to continue"
# Loop over commits
for commit in $(git rev-list $branch)
do
# check out the commit - force checkout
git checkout -f $commit
# Output progress
date=$(git show -s --format=%ci $commit | cut -d' ' -f1,2 | sed 's/ /T/')
echo $commit $date
# run tokei and save output to file in working directory
tokei --output json > $(dirname $0)/working/$date-$commit.json
done
# put repo back to head of branch selected
git checkout $branch
Parsing The Data
Next we need to loop over the generated stats and drop them into a useful format for Chart js. I’m using TypeScript for this, but it could easily be done in JavaScript.
import fs from 'fs';
import path from 'path';
type TTokeiFile = Record<
string,
{
comments: number;
blanks: number;
code: number;
}
>;
type timeseriesOutput = {
date: string;
languages: Record<
string,
{
comments: number;
blanks: number;
code: number;
}
>;
};
type output = {
languages: Array<string>;
timeseries: Array<timeseriesOutput>;
dates: Array<string>;
dataCode: Record<string, Array<number>>;
dataComments: Record<string, Array<number>>;
dataBlanks: Record<string, Array<number>>;
};
// output dist directory
const outDir = path.join(__dirname, 'dist');
if (!fs.existsSync(outDir)) {
fs.mkdirSync(outDir);
}
// Read out the files in the working directory
const workingDir = path.join(__dirname, 'working');
const files = fs.readdirSync(workingDir);
files.sort().reverse();
// loop through files and get last file for each date
// file format is: 2023-06-04T12:04:27-7e42aebc55d6689fb9f5b1cd09804cc82fad41dd.json
const filesByDate = new Map<string, string>();
for (const file of files) {
const date = file.split('T')[0];
if (!date) {
continue;
}
const existing = filesByDate.get(date);
if (!existing) {
filesByDate.set(date, file);
}
}
const fileListReduced = Array.from(filesByDate.values());
fileListReduced.sort();
const out = {
languages: [],
timeseries: [],
dates: [],
dataCode: {},
dataComments: {},
dataBlanks: {},
} as output;
const outLanguages = new Set<string>();
// Loop through and read each file
for (const file of fileListReduced) {
const filePath = path.join(workingDir, file);
const fileContent = fs.readFileSync(filePath, 'utf-8');
const parsed = JSON.parse(fileContent) as TTokeiFile;
const date = file.split('T')[0];
if (!date) continue;
const languages = Object.keys(parsed);
languages.forEach(outLanguages.add, outLanguages);
const record: timeseriesOutput = {
date,
languages: {},
};
for (const language of languages) {
if (parsed[language]) {
record.languages[language] = {
comments: parsed[language]!.comments,
blanks: parsed[language]!.blanks,
code: parsed[language]!.code,
};
}
}
out.timeseries.push(record);
}
out.languages = Array.from(outLanguages);
// Set the final time series data
// we did not do this on the original loop due to some languages possibly not being present in any given file
out.dates = out.timeseries.map((t) => t.date);
for (const rec of out.timeseries) {
for (const lang of out.languages) {
out.dataCode[lang] = out.dataCode[lang] || [];
out.dataComments[lang] = out.dataComments[lang] || [];
out.dataBlanks[lang] = out.dataBlanks[lang] || [];
out.dataCode[lang]!.push(rec.languages[lang]?.code || 0);
out.dataComments[lang]!.push(rec.languages[lang]?.comments || 0);
out.dataBlanks[lang]!.push(rec.languages[lang]?.blanks || 0);
}
}
// output the output file
const outFile = path.join(outDir, 'results.json');
fs.writeFileSync(outFile, JSON.stringify(out, null, 2));
//force copy stats.js and index.html into folder
fs.copyFileSync(
path.join(__dirname, 'stats.js'),
path.join(outDir, 'stats.js')
);
fs.copyFileSync(
path.join(__dirname, 'index.html'),
path.join(outDir, 'index.html')
);
console.log('Results written to dist folder');
Display
Lets now display this data in a chart. I’m using Chart.js for this, but you could use any charting library you like.
<html>
<head>
<title>Results</title>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.jsdelivr.net/npm/dayjs@1/dayjs.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chartjs-adapter-date-fns/dist/chartjs-adapter-date-fns.bundle.min.js"></script>
<style>
#chart-continer {
position: relative;
width: 100%;
height: 100%;
max-height: 700px;
}
#chart {
width: 100%;
}
</style>
</head>
<body>
<h2>Git Stats</h2>
<div id="chart-continer">
<canvas id="chart"></canvas>
</div>
<div id="chart-options">
<label><input type="checkbox" value="dataCode" checked /> Code</label>
<label><input type="checkbox" value="dataComments" /> Comments</label>
<label><input type="checkbox" value="dataBlanks" /> Blank Lines</label>
</div>
<script src="stats.js"></script>
</body>
</html>
And the javascript:
var ctx = document.getElementById('chart').getContext('2d');
let data = null;
let chart = null;
async function setData(incoming) {
data = await incoming.json();
}
function dataSetParse() {
const selectedItems = document.querySelectorAll(
'#chart-options input:checked'
);
let dataSet = {};
if (selectedItems.length > 0) {
for (const si of selectedItems) {
// combine with existing data
for (const ds in data[si.value]) {
if (!dataSet[ds]) {
dataSet[ds] = data[si.value][ds];
} else {
dataSet[ds] = dataSet[ds].map((v, i) => v + data[si.value][ds][i]);
}
}
}
} else {
dataSet = data.dataCode;
}
return dataSet;
}
async function updateChart() {
const dataSet = dataSetParse();
// update chart data
for (const line in dataSet) {
if (line == 'Total') continue;
//find
const found = chart.data.datasets.find((ds) => ds.label == line);
if (found) {
found.data = dataSet[line];
continue;
}
chart.data.datasets.push({
label: line,
//backgroundColor: data.lines[line].color,
//borderColor: data.lines[line].color,
data: dataSet[line],
fill: true,
});
}
chart.update();
}
async function renderChart() {
// Line data
const lineData = {
labels: data.dates,
datasets: [],
};
const dataSet = dataSetParse();
for (const line in dataSet) {
if (line == 'Total') continue;
lineData.datasets.push({
label: line,
//backgroundColor: data.lines[line].color,
//borderColor: data.lines[line].color,
data: dataSet[line],
fill: true,
});
}
// Chartjs stacked area chart
const config = {
type: 'line',
data: lineData,
options: {
responsive: true,
maintainAspectRatio: false,
plugins: {
title: {
display: true,
text: () => 'Chart.js Stacked Chart',
},
tooltip: {
mode: 'index',
callbacks: {
title: function (context) {
const dt = dayjs(context[0].parsed.x).format('DD MMM, YYYY');
return `${dt} - Lines of Code`;
},
},
},
},
interaction: {
mode: 'nearest',
axis: 'x',
intersect: false,
},
scales: {
x: {
type: 'time',
time: {
unit: 'day',
},
ticks: {
callback: (val) => {
return dayjs(val).format('DD MMM, YYYY');
},
},
title: {
display: true,
text: 'Date',
},
},
y: {
stacked: true,
title: {
display: true,
text: 'Value',
},
},
},
},
};
chart = new Chart(ctx, config);
}
// wait for page to finish render
document.addEventListener('DOMContentLoaded', () => {
// fetch dist/results.json
fetch('results.json').then(setData).then(renderChart);
const options = document.querySelectorAll('#chart-options input');
options.forEach((option) => {
option.addEventListener('change', (e) => {
updateChart();
});
});
});
The Code
If you’d like to see the code, it’s available on GitHub, I am adding some additional stats to it as I need them. If needed there is a tag blog-post
that will show the code as it was when I wrote this post.