forgejo-stats/stats.py
Ari Archer 28604e768b
improve readability and scaling
Signed-off-by: Ari Archer <ari@ari.lt>
2024-06-21 21:45:37 +03:00

287 lines
8.4 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Forgejo statistics
Copyright (C) 2024 Ari Archer <ari@ari.lt>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>."""
import json
import typing as t
from collections import defaultdict
from datetime import datetime, timedelta
from warnings import filterwarnings as filter_warnings
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import requests
# Configuration
BASE_URL: str = "https://git.ari.lt/api/v1"
USERNAME: str = "ari"
LANG_COLS: t.Final[int] = 3 # How many colums for the languages
AUTHORS: t.Tuple[str, ...] = (
USERNAME,
"TruncatedDinoSour",
"Ari Archer",
"ari.web.xyz@gmail.com",
"ari@ari.lt",
"B00bleaTeA",
"4FAD63E936B305906A6C4894A50D5B4B599AF8A2",
)
IGNORE_REPOS: t.Tuple[str, ...] = "dino-kernel", "sysvinit"
COLOUR: str = "#ffa647"
DPI: t.Final[int] = 200
COMMIT_PAGE_LIMIT: t.Final[int] = 5 # Maximum commit pages (similar to UI)
COMMIT_YEARS: t.Final[int] = (
2 # How many years should the commit plot use in your commit history?
)
# Code
def is_authoring(author: t.Any) -> bool:
"""Returns if the user is authoring"""
author = str(author)
for a in AUTHORS:
if a.lower() in author.lower():
return True
return False
def get_repositories() -> t.List[t.Dict[str, t.Any]]:
"""Fetch all repositories of a user"""
all_repos: t.List[t.Dict[str, t.Any]] = []
page: int = 1
while True:
response: requests.Response = requests.get(
f"{BASE_URL}/users/{USERNAME}/repos?page={page}&limit=100"
)
response.raise_for_status()
repos: t.List[t.Any] = response.json()
print(f"Got {len(repos)} repositories from page {page}")
if not repos:
break
for repo in repos:
if repo["name"] not in IGNORE_REPOS and is_authoring(repo["owner"]):
print(f"Author of {repo['name']}")
all_repos.append(repo)
page += 1
return all_repos
def get_repo_languages(repo: t.Dict[str, t.Any]) -> t.Dict[str, int]:
"""Fetch languages used in a specific repository"""
response: requests.Response = requests.get(
f"{BASE_URL}/repos/{USERNAME}/{repo['name']}/languages"
)
response.raise_for_status()
languages: t.Dict[str, int] = response.json()
print(f"Languages for {repo['name']}: {languages}")
return languages
def get_repo_commits(repo: t.Dict[str, t.Any]) -> t.List[t.Dict[str, t.Any]]:
"""Get the commit objects in a specific repository"""
all_commits: t.List[t.Dict[str, t.Any]] = []
page: int = 1
while page <= COMMIT_PAGE_LIMIT:
response: requests.Response = requests.get(
f"{BASE_URL}/repos/{USERNAME}/{repo['name']}/commits?author={USERNAME}&page={page}&limit=100"
)
if response.status_code == 409:
break
response.raise_for_status()
commits: t.List[t.Dict[str, t.Any]] = [
commit for commit in response.json() if is_authoring(commit)
]
print(
f"Got authored {len(commits)} commits from repository's {repo['name']} page {page}"
)
if not commits:
break
all_commits.extend(commits)
page += 1
return all_commits
def analyze_data(
repos: t.List[t.Dict[str, t.Any]]
) -> t.Tuple[t.Dict[str, int], t.Dict[str, int]]:
"""Analyze repositories to get language usage and commit activity"""
language_stats: t.Dict[str, int] = defaultdict(int)
commit_activity: t.Dict[str, int] = defaultdict(int)
for idx, repo in enumerate(repos):
print(f"[{idx / len(repos):.2%}] Analyzing {repo['name']} ...")
languages: t.Dict[str, int] = get_repo_languages(repo)
for lang, count in languages.items():
language_stats[lang] += count
commits: t.List[t.Dict[str, t.Any]] = get_repo_commits(repo)
for commit in commits:
date_str = commit["commit"]["author"]["date"][:10]
commit_activity[date_str] += 1
return language_stats, commit_activity
def plot_data(
language_stats: t.Dict[str, int],
commit_activity: t.Dict[str, int],
language_colours: t.Dict[str, str],
) -> None:
"""Plot language statistics and commit activity"""
plt.style.use("dark_background")
# -*- Language statistics -*-
print("Plotting languages...")
sorted_languages: t.List[t.Tuple[str, int]] = sorted(
language_stats.items(), key=lambda x: x[1], reverse=True
)[: 7 * LANG_COLS]
languages, counts = zip(*sorted_languages)
total: int = sum(counts)
fig_lang, ax_lang = plt.subplots(
figsize=((10 / 3) * LANG_COLS, (2 / 3) * LANG_COLS)
)
fig_lang.patch.set_facecolor("#000000")
fig_lang.patch.set_alpha(0.3)
cumulative_counts: t.Any = np.cumsum([0] + list(counts[:-1]))
for lang, count, cum_count in zip(languages, counts, cumulative_counts):
color = language_colours.get(lang.lower(), "#CCCCCC")
ax_lang.barh(0, count, color=color, left=cum_count, edgecolor="white")
ax_lang.set_xlim(0, total) # Ensure the x-axis matches the total width of all bars
ax_lang.axis("off")
ax_lang.set_title("Top Programming Languages by Usage", fontsize=14, color=COLOUR)
ax_lang.legend(
handles=[
plt.Rectangle(
(0, 0), 1, 1, color=language_colours.get(lang.lower(), "#CCCCCC")
)
for lang in languages
],
labels=[
f"{lang} - {count/total:.2%}" for lang, count in zip(languages, counts)
],
loc="upper center",
bbox_to_anchor=(0.5, -0.05),
ncol=LANG_COLS,
fontsize=8,
frameon=False,
)
plt.tight_layout()
plt.savefig("languages.png", dpi=DPI)
print("Wrote languages.png")
# -*- Commit statistics -*-
print("Plotting commits...")
two_years_ago: datetime = datetime.now() - timedelta(days=(365.25 * COMMIT_YEARS))
filtered_dates: t.Dict[str, int] = {
date: count
for date, count in commit_activity.items()
if datetime.strptime(date, "%Y-%m-%d") >= two_years_ago
}
dates: t.List[str] = sorted(filtered_dates.keys())
counts: t.List[int] = [filtered_dates[date] for date in dates]
dates: t.List[np.datetime64] = [np.datetime64(date) for date in dates]
fig_commits, ax_commits = plt.subplots(figsize=(16, 9))
fig_commits.patch.set_facecolor("#000000")
fig_commits.patch.set_alpha(0.3)
ax_commits.patch.set_alpha(0.2)
ax_commits.fill_between(dates, counts, color=COLOUR, alpha=0.4)
ax_commits.plot(dates, counts, color=COLOUR, alpha=0.6)
ax_commits.xaxis.set_major_locator(mdates.MonthLocator())
ax_commits.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
ax_commits.set_xlabel("Date", fontsize=12, color="white")
ax_commits.set_ylabel("Number of Commits", fontsize=12, color="white")
ax_commits.set_title("Commit Activity", fontsize=14, color=COLOUR)
ax_commits.grid(True, which="both", linestyle="--", linewidth=0.5, color="gray")
plt.gcf().autofmt_xdate()
plt.tight_layout()
plt.savefig("commits.png", dpi=DPI)
print("Wrote commits.png")
def main() -> int:
"""entry / main function"""
with open("lang.json", "r") as fp:
language_colours: t.Dict[str, str] = json.load(fp)
repos: t.List[t.Dict[str, t.Any]] = get_repositories()
print(f"Found {len(repos)} repositories")
print("Analyzing repositories...")
language_stats, commit_activity = analyze_data(repos)
print("Plotting data...")
plot_data(language_stats, commit_activity, language_colours)
return 0
if __name__ == "__main__":
assert main.__annotations__.get("return") is int, "main() should return an integer"
filter_warnings("error", category=Warning)
raise SystemExit(main())