Utilities

warn_once

warn_once(logger, message, cache: Optional[set] = None, *args, **kwargs)

Source code in aboba/utils/logging.py

def warn_once(logger, message, cache: Optional[set] = None, *args, **kwargs):
    if cache is None:
        cache = _cache
    if message in cache:
        return
    cache.add(message)
    logger.warning(message, *args, **kwargs)

draw_interval

draw_interval(real_alpha: float, left_alpha: float, right_alpha: float, axes: Axes, alpha: float = 0.05, name: Optional[str] = None, lang: Language = 'en')

Visualize the confidence interval for the estimated Type I error rate (alpha).

This function draws a horizontal line representing the theoretical alpha level, fills the confidence interval around the empirical alpha, and marks the observed empirical alpha with a star. It is typically used in A/A testing to assess whether the statistical test maintains the nominal error rate.

Parameters

real_alpha : float The empirically observed Type I error rate (proportion of rejections when null hypothesis is true). left_alpha : float Left boundary of the confidence interval for real_alpha. right_alpha : float Right boundary of the confidence interval for real_alpha. axes : plt.Axes Matplotlib Axes object to draw the plot on. alpha : float, optional The theoretical (nominal) significance level, by default 0.05. name : str, optional Name of the experiment or group, used as part of the plot title. If None, only the rejection rate is shown in the title. lang : Language, optional Language code for labels ('en' or 'ru'), by default 'en'.

Returns

None

Source code in aboba/utils/draw.py

def draw_interval(
    real_alpha: float,
    left_alpha: float,
    right_alpha: float,
    axes: plt.Axes,
    alpha: float = 0.05,
    name: Optional[str] = None,
    lang: Language = "en",
):
    """
    Visualize the confidence interval for the estimated Type I error rate (alpha).

    This function draws a horizontal line representing the theoretical alpha level,
    fills the confidence interval around the empirical alpha, and marks the
    observed empirical alpha with a star. It is typically used in A/A testing
    to assess whether the statistical test maintains the nominal error rate.

    Parameters
    ----------
    real_alpha : float
        The empirically observed Type I error rate (proportion of rejections
        when null hypothesis is true).
    left_alpha : float
        Left boundary of the confidence interval for `real_alpha`.
    right_alpha : float
        Right boundary of the confidence interval for `real_alpha`.
    axes : plt.Axes
        Matplotlib Axes object to draw the plot on.
    alpha : float, optional
        The theoretical (nominal) significance level, by default 0.05.
    name : str, optional
        Name of the experiment or group, used as part of the plot title.
        If None, only the rejection rate is shown in the title.
    lang : Language, optional
        Language code for labels ('en' or 'ru'), by default 'en'.

    Returns
    -------
    None
    """
    with sns.axes_style("whitegrid"):
        # Set background grid color to light gray (like in screenshot)
        axes.grid(True)

        # Horizontal line (baseline)
        axes.hlines(0, 0, 1, color="black", lw=2, alpha=0.6)

        # Theoretical alpha line (vertical dashed red)
        axes.vlines(alpha, -0.2, 0.2, color="red", lw=3, linestyle="--", alpha=0.9)

        # Confidence interval (green fill)
        axes.fill_between(
            [left_alpha, right_alpha],
            [-0.1], [0.1],
            color="#20b2aa",
            alpha=0.4
        )

        # Star marker for empirical alpha
        axes.scatter(real_alpha, 0, s=300, marker="*", color="red")

        # X-axis limits
        margin = max(0.002, 0.05 * (right_alpha - left_alpha))
        axes.set_xlim((min(alpha, left_alpha) - margin, max(alpha, right_alpha) + margin))

        # Title
        if name is not None:
            title = f"{name} | {t('rejections', lang)} = {100 * real_alpha:.2f}%, ({100 * left_alpha:.2f}%, {100 * right_alpha:.2f}%)"
        else:
            title = f"{t('rejections', lang).capitalize()} = {100 * real_alpha:.2f}%, ({100 * left_alpha:.2f}%, {100 * right_alpha:.2f}%)"

        axes.set_title(title, fontsize=12, pad=4, fontweight='bold')


        axes.set_ylim((-0.25, 0.25))
        axes.set_yticks([])
        axes.set_xlabel("")
        axes.set_ylabel("")

draw_pvalue_distribution

draw_pvalue_distribution(pvals: List[float], axes: Axes, name: Optional[str] = None, lang: Language = 'en')

Plot a histogram of p-values to assess their distribution.

Under the null hypothesis (e.g., in an A/A test), p-values should be uniformly distributed between 0 and 1. This plot helps diagnose issues like p-hacking, selection bias, or violations of test assumptions. A flat histogram around the density of 1.0 indicates a well-calibrated test.

Parameters

pvals : List[float] A list of p-values obtained from repeated statistical tests. axes : plt.Axes Matplotlib Axes object to draw the plot on. name : str, optional Name of the experiment or group, used as part of the plot title. If None, a generic title is used. lang : Language, optional Language code for labels ('en' or 'ru'), by default 'en'.

Returns

None

Source code in aboba/utils/draw.py

def draw_pvalue_distribution(
    pvals: List[float],
    axes: plt.Axes,
    name: Optional[str] = None,
    lang: Language = "en",
):
    """
    Plot a histogram of p-values to assess their distribution.

    Under the null hypothesis (e.g., in an A/A test), p-values should be
    uniformly distributed between 0 and 1. This plot helps diagnose issues
    like p-hacking, selection bias, or violations of test assumptions.
    A flat histogram around the density of 1.0 indicates a well-calibrated test.

    Parameters
    ----------
    pvals : List[float]
        A list of p-values obtained from repeated statistical tests.
    axes : plt.Axes
        Matplotlib Axes object to draw the plot on.
    name : str, optional
        Name of the experiment or group, used as part of the plot title.
        If None, a generic title is used.
    lang : Language, optional
        Language code for labels ('en' or 'ru'), by default 'en'.

    Returns
    -------
    None
    """
    with sns.axes_style("whitegrid"):
        axes.grid(True)

        n, bins, patches = axes.hist(
            pvals,
            bins=50,  
            density=True,  
            alpha=0.4,
            color="#ec7c26",
            edgecolor="navy",
            linewidth=0.3,
        )

        axes.axhline(1.0, color="red", linestyle="--", linewidth=2, alpha=0.8)

        # Labels
        axes.set_xlabel(t('pvalue', lang), fontsize=10)
        axes.set_ylabel(t('density', lang), fontsize=10)
        title = t('pvalue_distribution', lang)
        if name:
            title = f"{name} | {title}"
        axes.set_title(title, fontsize=12, pad=4)

        # X-axis ticks every 0.2
        axes.set_xticks(np.arange(0, 1.1, 0.2))

calculate_real_alpha

calculate_real_alpha(n_errors: int, n_iter: int, method: str = 'wilson')

Estimate real alpha level and its interval

PARAMETER	DESCRIPTION
`n_errors`	number of errors in an experiment TYPE: `int`
`n_iter`	number of experiments TYPE: `int`
`method`	`proportion_confint` method TYPE: `str` DEFAULT: `'wilson'`

RETURNS	DESCRIPTION
	Tuple[float, float, float]: real_alpha, left_alpha, right_alpha

Source code in aboba/utils/alpha_interval.py

def calculate_real_alpha(n_errors: int, n_iter: int, method: str = "wilson"):
    """

    Estimate real alpha level and its interval

    Args:
        n_errors (int): number of errors in an experiment
        n_iter (int): number of experiments
        method (str): `proportion_confint` method

    Returns:
        Tuple[float, float, float]: real_alpha, left_alpha, right_alpha
    """
    assert n_iter >= 1, f"n_iter={n_iter}, but should be larger than 0."
    real_alpha = n_errors / n_iter
    left_alpha, right_alpha = proportion_confint(n_errors, n_iter, method=method)

    return real_alpha, left_alpha, right_alpha

Logging

warn_once

warn_once

Visualization

draw_interval

draw_interval

Parameters

Returns

draw_pvalue_distribution

draw_pvalue_distribution

Parameters

Returns

Statistical Calculations

calculate_real_alpha

calculate_real_alpha