Prometheus exporter for github repo stats (#38607)
* Prometheus exporter for github repo stats I want more grafana charts Already deployed on moon, this is just so other codebases can use it if they want. * Add docs and issue/PR priorities * Sure add merge conflicts too
This commit is contained in:
committed by
GitHub
parent
a6938a6442
commit
3fb57679be
241
Tools/github_repo_stats.py
Normal file
241
Tools/github_repo_stats.py
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import requests
|
||||||
|
import itertools
|
||||||
|
import signal
|
||||||
|
import sdnotify
|
||||||
|
import os
|
||||||
|
from io import StringIO
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from enum import Enum
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Iterable, TypeVar, Callable, Any, Optional
|
||||||
|
from prometheus_client import start_http_server, CollectorRegistry
|
||||||
|
from prometheus_client.core import Metric, GaugeMetricFamily
|
||||||
|
import time
|
||||||
|
# from prometheus_client.registry import Collector
|
||||||
|
|
||||||
|
GITHUB_API_URL = "https://api.github.com/graphql"
|
||||||
|
GITHUB_TOKEN = os.environ["SS14_REPO_STATS_GITHUB_TOKEN"]
|
||||||
|
USER_AGENT = "github_repo_stats.py/1.0.0"
|
||||||
|
|
||||||
|
# We have a GraphQL client at home
|
||||||
|
|
||||||
|
class AbstractQuery(ABC):
|
||||||
|
@abstractmethod
|
||||||
|
def get_selector(self, alias: str):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class IssueState(Enum):
|
||||||
|
CLOSED = 1,
|
||||||
|
OPEN = 2,
|
||||||
|
|
||||||
|
class IssueQuery(AbstractQuery):
|
||||||
|
def __init__(self, states: Optional[list[IssueState]] = None, labels: Optional[list[str]] = None) -> None:
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.states = states
|
||||||
|
self.labels = labels
|
||||||
|
|
||||||
|
def get_selector(self, alias: str):
|
||||||
|
query_args = []
|
||||||
|
if self.states and len(self.states):
|
||||||
|
query_args.append(f"states:{enum_join(self.states)}")
|
||||||
|
if self.labels and len(self.labels):
|
||||||
|
query_args.append(f"labels:{labels_join(self.labels)}")
|
||||||
|
|
||||||
|
args = f"({','.join(query_args)})" if len(query_args) else ""
|
||||||
|
|
||||||
|
return f"{alias}: issues{args}"
|
||||||
|
|
||||||
|
class PullRequestState(Enum):
|
||||||
|
CLOSED = 1,
|
||||||
|
MERGED = 2
|
||||||
|
OPEN = 3,
|
||||||
|
|
||||||
|
class PullRequestQuery(AbstractQuery):
|
||||||
|
def __init__(self, states: Optional[list[PullRequestState]] = None, labels: Optional[list[str]] = None) -> None:
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.states = states
|
||||||
|
self.labels = labels
|
||||||
|
|
||||||
|
def get_selector(self, alias: str):
|
||||||
|
query_args = []
|
||||||
|
if self.states and len(self.states):
|
||||||
|
query_args.append(f"states:{enum_join(self.states)}")
|
||||||
|
|
||||||
|
if self.labels and len(self.labels):
|
||||||
|
query_args.append(f"labels:{labels_join(self.labels)}")
|
||||||
|
|
||||||
|
args = f"({','.join(query_args)})" if len(query_args) else ""
|
||||||
|
|
||||||
|
return f"{alias}: pullRequests{args}"
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Repo:
|
||||||
|
owner: str
|
||||||
|
name: str
|
||||||
|
|
||||||
|
queries: dict[str, AbstractQuery]
|
||||||
|
|
||||||
|
LABEL_UNTRIAGED = "S: Untriaged"
|
||||||
|
LABEL_NEEDS_REVIEW = "S: Needs Review"
|
||||||
|
LABEL_AWAITING_CHANGES = "S: Awaiting Changes"
|
||||||
|
LABEL_APPROVED = "S: Approved"
|
||||||
|
LABEL_P0 = "P0: Critical"
|
||||||
|
LABEL_P1 = "P1: High"
|
||||||
|
LABEL_CONFLICT = "S: Merge Conflict"
|
||||||
|
|
||||||
|
REPO_CONFIG = [
|
||||||
|
Repo("space-wizards", "space-station-14", queries={
|
||||||
|
# Issue queries
|
||||||
|
"issue_total_count": IssueQuery(),
|
||||||
|
"issue_open_count": IssueQuery(states=[IssueState.OPEN]),
|
||||||
|
"issue_closed_count": IssueQuery(states=[IssueState.CLOSED]),
|
||||||
|
"issue_untriaged_count": IssueQuery(states=[IssueState.OPEN], labels=[LABEL_UNTRIAGED]),
|
||||||
|
|
||||||
|
# PR queries
|
||||||
|
"pr_total_count": PullRequestQuery(),
|
||||||
|
"pr_open_count": PullRequestQuery(states=[PullRequestState.OPEN]),
|
||||||
|
"pr_closed_count": PullRequestQuery(states=[PullRequestState.CLOSED]),
|
||||||
|
"pr_merged_count": PullRequestQuery(states=[PullRequestState.MERGED]),
|
||||||
|
"pr_untriaged_count": PullRequestQuery(states=[PullRequestState.OPEN], labels=[LABEL_UNTRIAGED]),
|
||||||
|
"pr_needs_review_count": PullRequestQuery(states=[PullRequestState.OPEN], labels=[LABEL_NEEDS_REVIEW]),
|
||||||
|
"pr_awaiting_changes_count": PullRequestQuery(states=[PullRequestState.OPEN], labels=[LABEL_AWAITING_CHANGES]),
|
||||||
|
"pr_approved_count": PullRequestQuery(states=[PullRequestState.OPEN], labels=[LABEL_APPROVED]),
|
||||||
|
"pr_p0_count": PullRequestQuery(states=[PullRequestState.OPEN], labels=[LABEL_P0]),
|
||||||
|
"pr_p1_count": PullRequestQuery(states=[PullRequestState.OPEN], labels=[LABEL_P1]),
|
||||||
|
"pr_conflict_count": PullRequestQuery(states=[PullRequestState.OPEN], labels=[LABEL_CONFLICT]),
|
||||||
|
}),
|
||||||
|
Repo("space-wizards", "RobustToolbox", queries={
|
||||||
|
# Issue queries
|
||||||
|
"issue_total_count": IssueQuery(),
|
||||||
|
"issue_open_count": IssueQuery(states=[IssueState.OPEN]),
|
||||||
|
"issue_closed_count": IssueQuery(states=[IssueState.CLOSED]),
|
||||||
|
"issue_untriaged_count": IssueQuery(states=[IssueState.OPEN], labels=[LABEL_UNTRIAGED]),
|
||||||
|
|
||||||
|
# PR queries
|
||||||
|
"pr_total_count": PullRequestQuery(),
|
||||||
|
"pr_open_count": PullRequestQuery(states=[PullRequestState.OPEN]),
|
||||||
|
"pr_closed_count": PullRequestQuery(states=[PullRequestState.CLOSED]),
|
||||||
|
"pr_merged_count": PullRequestQuery(states=[PullRequestState.MERGED]),
|
||||||
|
"pr_untriaged_count": PullRequestQuery(states=[PullRequestState.OPEN], labels=[LABEL_UNTRIAGED]),
|
||||||
|
"pr_needs_review_count": PullRequestQuery(states=[PullRequestState.OPEN], labels=[LABEL_NEEDS_REVIEW]),
|
||||||
|
"pr_awaiting_changes_count": PullRequestQuery(states=[PullRequestState.OPEN], labels=[LABEL_AWAITING_CHANGES]),
|
||||||
|
}),
|
||||||
|
Repo("space-wizards", "docs", queries={
|
||||||
|
# Issue queries
|
||||||
|
"issue_total_count": IssueQuery(),
|
||||||
|
"issue_open_count": IssueQuery(states=[IssueState.OPEN]),
|
||||||
|
"issue_closed_count": IssueQuery(states=[IssueState.CLOSED]),
|
||||||
|
|
||||||
|
# PR queries
|
||||||
|
"pr_total_count": PullRequestQuery(),
|
||||||
|
"pr_open_count": PullRequestQuery(states=[PullRequestState.OPEN]),
|
||||||
|
"pr_closed_count": PullRequestQuery(states=[PullRequestState.CLOSED]),
|
||||||
|
"pr_merged_count": PullRequestQuery(states=[PullRequestState.MERGED]),
|
||||||
|
}),
|
||||||
|
]
|
||||||
|
|
||||||
|
E = TypeVar("E", bound=Enum)
|
||||||
|
def enum_join(iterable: Iterable[E]) -> str:
|
||||||
|
return f"[{','.join((e.name for e in iterable))}]"
|
||||||
|
|
||||||
|
def labels_join(iterable: Iterable[str]) -> str:
|
||||||
|
contents = ','.join(map(graphql_string, iterable))
|
||||||
|
return f"[{contents}]"
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
def first_or_default(iterable: Iterable[T], func: Callable[[T], bool]) -> Optional[T]:
|
||||||
|
for val in iterable:
|
||||||
|
if func(val):
|
||||||
|
return val
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
REQUESTS_SESSION = requests.Session()
|
||||||
|
REQUESTS_SESSION.headers["User-Agent"] = USER_AGENT
|
||||||
|
|
||||||
|
def graphql_string(val: str) -> str:
|
||||||
|
# This is probably good enough.
|
||||||
|
# Note that this script doesn't accept (potentially malicious) user input.
|
||||||
|
# If it did, I wouldn't be doing it like this.
|
||||||
|
val_replaced = val.replace('"', '\\"')
|
||||||
|
return f"\"{val_replaced}\""
|
||||||
|
|
||||||
|
def generate_graphql_query_for_repo(repo: Repo) -> str:
|
||||||
|
strio = StringIO()
|
||||||
|
|
||||||
|
strio.write("query {\n")
|
||||||
|
strio.write(f"\trepository(owner: {graphql_string(repo.owner)}, name: {graphql_string(repo.name)})" + " {\n")
|
||||||
|
|
||||||
|
for key, query in repo.queries.items():
|
||||||
|
strio.write(f"\t\t{query.get_selector(key)}" + " {\n")
|
||||||
|
strio.write("\t\t\ttotalCount\n")
|
||||||
|
strio.write("\t\t}\n")
|
||||||
|
|
||||||
|
strio.write("\t}\n")
|
||||||
|
strio.write("}\n")
|
||||||
|
|
||||||
|
return strio.getvalue()
|
||||||
|
|
||||||
|
def repo_key(repo: Repo) -> str:
|
||||||
|
return f"{repo.owner}/{repo.name}"
|
||||||
|
|
||||||
|
def do_graphql_query(query: str) -> Any:
|
||||||
|
with REQUESTS_SESSION.post(GITHUB_API_URL, headers={"Authorization": f"Bearer {GITHUB_TOKEN}"}, json={"query": query}) as resp:
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
CACHED_QUERIES = {repo_key(repo): generate_graphql_query_for_repo(repo) for repo in REPO_CONFIG}
|
||||||
|
|
||||||
|
# RHEL 9 ships with an old version of prometheus_client that doesn't have the "Collector" type...
|
||||||
|
class StatsCollector: #(Collector):
|
||||||
|
def collect(self) -> Iterable[Metric]:
|
||||||
|
query_time_metric = GaugeMetricFamily("repo_stats_query_time", "Time the GitHub query API call took", labels=["repo"])
|
||||||
|
metrics: dict[str, GaugeMetricFamily] = {}
|
||||||
|
# Make all the metric families.
|
||||||
|
for repo in REPO_CONFIG:
|
||||||
|
r_key = repo_key(repo)
|
||||||
|
for key in repo.queries.keys():
|
||||||
|
if key not in metrics:
|
||||||
|
metrics[key] = GaugeMetricFamily(f"repo_stats_{key}", key, labels=["repo"])
|
||||||
|
|
||||||
|
# Do the requests and fill out the stats.
|
||||||
|
query = CACHED_QUERIES[r_key]
|
||||||
|
start_time = time.monotonic_ns()
|
||||||
|
query_data = do_graphql_query(query)
|
||||||
|
end_time = time.monotonic_ns()
|
||||||
|
repo_data = query_data["data"]["repository"]
|
||||||
|
|
||||||
|
for key in repo.queries.keys():
|
||||||
|
value = repo_data[key]["totalCount"]
|
||||||
|
metrics[key].add_metric([r_key], float(value))
|
||||||
|
|
||||||
|
query_time_metric.add_metric([r_key], (end_time - start_time) / 1_000_000_000)
|
||||||
|
|
||||||
|
yield from metrics.values()
|
||||||
|
yield query_time_metric
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("port", type=int)
|
||||||
|
parser.add_argument("--host", default="0.0.0.0")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
port = args.port
|
||||||
|
host = args.host
|
||||||
|
|
||||||
|
registry = CollectorRegistry(auto_describe=True)
|
||||||
|
registry.register(StatsCollector())
|
||||||
|
|
||||||
|
start_http_server(port, host, registry)
|
||||||
|
|
||||||
|
sdnotify.SystemdNotifier().notify("READY=1")
|
||||||
|
|
||||||
|
signal.pause()
|
||||||
|
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user