Skip to content

Multiple Group Tests

Tests for comparing more than two groups simultaneously.

F-Tests

FIndependentTest

FIndependentTest

Bases: BaseTest

Performs a custom F-test (ANOVA-style) for comparing multiple independent groups.

ATTRIBUTE DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str

Source code in aboba/tests/multiple/f_test.py
class FIndependentTest(BaseTest):
    """
    Performs a custom F-test (ANOVA-style) for comparing multiple independent groups.

    Attributes:
        value_column (str): Name of the column containing the values to test.
    """

    def __init__(
        self,
        value_column="target",
    ):
        """
        Independent F-test for comparing variances between groups.

        This test compares the variances of two or more independent groups to determine
        if there are statistically significant differences between them.

        Args:
            value_column (str): Name of the column containing the values to test.

        Examples:
            ```python
            import pandas as pd
            import numpy as np
            from aboba.tests.multiple.f_test import FIndependentTest

            # Create sample data
            np.random.seed(42)
            group1 = pd.DataFrame({'target': np.random.normal(10, 1, 50)})
            group2 = pd.DataFrame({'target': np.random.normal(10, 2, 50)})
            group3 = pd.DataFrame({'target': np.random.normal(10, 1.5, 50)})

            # Perform the test
            test = FIndependentTest(value_column='target')
            result = test.test([group1, group2, group3], {})
            print(f"P-value: {result.pvalue:.4f}")
            ```
        """
        super().__init__()
        self.value_column = value_column

    def test(self, groups: List[pd.DataFrame], artefacts: Dict = {}) -> TestResult:
        """
        Perform the independent F-test on the provided groups.

        Args:
            groups (List[pd.DataFrame]): List of DataFrames representing the groups to compare.
            artefacts (dict): Dictionary to store additional results.

        Returns:
            TestResult: Object containing the p-value.
        """

        ns = list(map(len, groups))
        N = sum(ns)
        k = len(groups)

        X_dot_j = [group[self.value_column].sum() / len(group) for group in groups]

        X_sum = sum(group[self.value_column].sum() for group in groups) / N

        V_in = sum(
            ((group[self.value_column] - X_dot) ** 2).sum()
            for group, X_dot in zip(groups, X_dot_j)
        )

        V_out = sum(n_j * ((X_dot - X_sum) ** 2) for n_j, X_dot in zip(ns, X_dot_j))

        F_X = (V_out / (k - 1)) / (V_in / (N - k))
        distr = sps.f(dfn=k - 1, dfd=N - k)
        pvalue = distr.sf(F_X)

        return TestResult(pvalue=pvalue)

__init__

__init__(value_column='target')

Independent F-test for comparing variances between groups.

This test compares the variances of two or more independent groups to determine if there are statistically significant differences between them.

PARAMETER DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str DEFAULT: 'target'

Examples:

import pandas as pd
import numpy as np
from aboba.tests.multiple.f_test import FIndependentTest

# Create sample data
np.random.seed(42)
group1 = pd.DataFrame({'target': np.random.normal(10, 1, 50)})
group2 = pd.DataFrame({'target': np.random.normal(10, 2, 50)})
group3 = pd.DataFrame({'target': np.random.normal(10, 1.5, 50)})

# Perform the test
test = FIndependentTest(value_column='target')
result = test.test([group1, group2, group3], {})
print(f"P-value: {result.pvalue:.4f}")
Source code in aboba/tests/multiple/f_test.py
def __init__(
    self,
    value_column="target",
):
    """
    Independent F-test for comparing variances between groups.

    This test compares the variances of two or more independent groups to determine
    if there are statistically significant differences between them.

    Args:
        value_column (str): Name of the column containing the values to test.

    Examples:
        ```python
        import pandas as pd
        import numpy as np
        from aboba.tests.multiple.f_test import FIndependentTest

        # Create sample data
        np.random.seed(42)
        group1 = pd.DataFrame({'target': np.random.normal(10, 1, 50)})
        group2 = pd.DataFrame({'target': np.random.normal(10, 2, 50)})
        group3 = pd.DataFrame({'target': np.random.normal(10, 1.5, 50)})

        # Perform the test
        test = FIndependentTest(value_column='target')
        result = test.test([group1, group2, group3], {})
        print(f"P-value: {result.pvalue:.4f}")
        ```
    """
    super().__init__()
    self.value_column = value_column

test

test(groups: List[DataFrame], artefacts: Dict = {}) -> TestResult

Perform the independent F-test on the provided groups.

PARAMETER DESCRIPTION
groups

List of DataFrames representing the groups to compare.

TYPE: List[DataFrame]

artefacts

Dictionary to store additional results.

TYPE: dict DEFAULT: {}

RETURNS DESCRIPTION
TestResult

Object containing the p-value.

TYPE: TestResult

Source code in aboba/tests/multiple/f_test.py
def test(self, groups: List[pd.DataFrame], artefacts: Dict = {}) -> TestResult:
    """
    Perform the independent F-test on the provided groups.

    Args:
        groups (List[pd.DataFrame]): List of DataFrames representing the groups to compare.
        artefacts (dict): Dictionary to store additional results.

    Returns:
        TestResult: Object containing the p-value.
    """

    ns = list(map(len, groups))
    N = sum(ns)
    k = len(groups)

    X_dot_j = [group[self.value_column].sum() / len(group) for group in groups]

    X_sum = sum(group[self.value_column].sum() for group in groups) / N

    V_in = sum(
        ((group[self.value_column] - X_dot) ** 2).sum()
        for group, X_dot in zip(groups, X_dot_j)
    )

    V_out = sum(n_j * ((X_dot - X_sum) ** 2) for n_j, X_dot in zip(ns, X_dot_j))

    F_X = (V_out / (k - 1)) / (V_in / (N - k))
    distr = sps.f(dfn=k - 1, dfd=N - k)
    pvalue = distr.sf(F_X)

    return TestResult(pvalue=pvalue)

FRelatedTest

FRelatedTest

Bases: BaseTest

Performs a custom F-test for comparing multiple related (paired) groups, akin to repeated-measures ANOVA.

ATTRIBUTE DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str

Source code in aboba/tests/multiple/f_test.py
class FRelatedTest(BaseTest):
    """
    Performs a custom F-test for comparing multiple related (paired) groups,
    akin to repeated-measures ANOVA.

    Attributes:
        value_column (str): Name of the column containing the values to test.
    """

    def __init__(
        self,
        value_column="target",
    ):
        """
        Related (paired) F-test for comparing variances between groups.

        This test compares the variances of two or more related groups (repeated measures)
        to determine if there are statistically significant differences between them.

        Args:
            value_column (str): Name of the column containing the values to test.

        Examples:
            ```python
            import pandas as pd
            import numpy as np
            from aboba.tests.multiple.f_test import FRelatedTest

            # Create sample paired data
            np.random.seed(42)
            subjects = 30
            treatments = 3
            data = []
            for i in range(subjects):
                base = np.random.normal(10, 2)
                for j in range(treatments):
                    data.append({
                        'subject': i,
                        'treatment': j,
                        'target': base + np.random.normal(0, 0.5) + j * 0.5
                    })
            df = pd.DataFrame(data)

            # Split into groups
            groups = [df[df['treatment'] == i][['target']] for i in range(treatments)]

            # Perform the test
            test = FRelatedTest(value_column='target')
            result = test.test(groups, {})
            print(f"P-value: {result.pvalue:.4f}")
            ```
        """
        super().__init__()
        self.value_column = value_column

    def test(self, groups: List[pd.DataFrame], artefacts) -> TestResult:
        """
        Executes an F-test for multiple related groups by computing an overall
        between-group and within-subject variability measure.

        Args:
            groups (List[pd.DataFrame]): A list of DataFrames, each representing a group/sample.
            artefacts (dict): A dictionary for storing or retrieving additional test information.

        Returns:
            TestResult: A `TestResult` object containing the p-value from the computed F-test.
        """

        ns = list(map(len, groups))
        n = ns[0]

        k = len(groups)

        assert all(n == ns[0] for n in ns), "Groups must have the same size"

        X_dot_j = [group[self.value_column].sum() / len(group) for group in groups]
        X_i_dot = sum(group[self.value_column].to_numpy() for group in groups) / k
        X_sum = sum(group[self.value_column].sum() for group in groups) / (k * n)

        V_beta = sum(((X_dot - X_sum) ** 2).sum() for X_dot in X_dot_j) * n

        V_in = sum(
            ((group[self.value_column] - X_j - X_i_dot + X_sum) ** 2).sum()
            for X_j, group in zip(X_dot_j, groups)
        )

        if np.isclose(V_in, 0):
            if np.isclose(V_beta, 0):
                F_X = 0
            else:
                F_X = 1e10
        else:
            F_X = (V_beta / (k - 1)) / (V_in / ((n - 1) * (k - 1)))
        distr = sps.f(dfn=k - 1, dfd=(n - 1) * (k - 1))
        pvalue = distr.sf(F_X)

        return TestResult(pvalue=pvalue)

__init__

__init__(value_column='target')

Related (paired) F-test for comparing variances between groups.

This test compares the variances of two or more related groups (repeated measures) to determine if there are statistically significant differences between them.

PARAMETER DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str DEFAULT: 'target'

Examples:

import pandas as pd
import numpy as np
from aboba.tests.multiple.f_test import FRelatedTest

# Create sample paired data
np.random.seed(42)
subjects = 30
treatments = 3
data = []
for i in range(subjects):
    base = np.random.normal(10, 2)
    for j in range(treatments):
        data.append({
            'subject': i,
            'treatment': j,
            'target': base + np.random.normal(0, 0.5) + j * 0.5
        })
df = pd.DataFrame(data)

# Split into groups
groups = [df[df['treatment'] == i][['target']] for i in range(treatments)]

# Perform the test
test = FRelatedTest(value_column='target')
result = test.test(groups, {})
print(f"P-value: {result.pvalue:.4f}")
Source code in aboba/tests/multiple/f_test.py
def __init__(
    self,
    value_column="target",
):
    """
    Related (paired) F-test for comparing variances between groups.

    This test compares the variances of two or more related groups (repeated measures)
    to determine if there are statistically significant differences between them.

    Args:
        value_column (str): Name of the column containing the values to test.

    Examples:
        ```python
        import pandas as pd
        import numpy as np
        from aboba.tests.multiple.f_test import FRelatedTest

        # Create sample paired data
        np.random.seed(42)
        subjects = 30
        treatments = 3
        data = []
        for i in range(subjects):
            base = np.random.normal(10, 2)
            for j in range(treatments):
                data.append({
                    'subject': i,
                    'treatment': j,
                    'target': base + np.random.normal(0, 0.5) + j * 0.5
                })
        df = pd.DataFrame(data)

        # Split into groups
        groups = [df[df['treatment'] == i][['target']] for i in range(treatments)]

        # Perform the test
        test = FRelatedTest(value_column='target')
        result = test.test(groups, {})
        print(f"P-value: {result.pvalue:.4f}")
        ```
    """
    super().__init__()
    self.value_column = value_column

test

test(groups: List[DataFrame], artefacts) -> TestResult

Executes an F-test for multiple related groups by computing an overall between-group and within-subject variability measure.

PARAMETER DESCRIPTION
groups

A list of DataFrames, each representing a group/sample.

TYPE: List[DataFrame]

artefacts

A dictionary for storing or retrieving additional test information.

TYPE: dict

RETURNS DESCRIPTION
TestResult

A TestResult object containing the p-value from the computed F-test.

TYPE: TestResult

Source code in aboba/tests/multiple/f_test.py
def test(self, groups: List[pd.DataFrame], artefacts) -> TestResult:
    """
    Executes an F-test for multiple related groups by computing an overall
    between-group and within-subject variability measure.

    Args:
        groups (List[pd.DataFrame]): A list of DataFrames, each representing a group/sample.
        artefacts (dict): A dictionary for storing or retrieving additional test information.

    Returns:
        TestResult: A `TestResult` object containing the p-value from the computed F-test.
    """

    ns = list(map(len, groups))
    n = ns[0]

    k = len(groups)

    assert all(n == ns[0] for n in ns), "Groups must have the same size"

    X_dot_j = [group[self.value_column].sum() / len(group) for group in groups]
    X_i_dot = sum(group[self.value_column].to_numpy() for group in groups) / k
    X_sum = sum(group[self.value_column].sum() for group in groups) / (k * n)

    V_beta = sum(((X_dot - X_sum) ** 2).sum() for X_dot in X_dot_j) * n

    V_in = sum(
        ((group[self.value_column] - X_j - X_i_dot + X_sum) ** 2).sum()
        for X_j, group in zip(X_dot_j, groups)
    )

    if np.isclose(V_in, 0):
        if np.isclose(V_beta, 0):
            F_X = 0
        else:
            F_X = 1e10
    else:
        F_X = (V_beta / (k - 1)) / (V_in / ((n - 1) * (k - 1)))
    distr = sps.f(dfn=k - 1, dfd=(n - 1) * (k - 1))
    pvalue = distr.sf(F_X)

    return TestResult(pvalue=pvalue)

FOneWayIndependentTest

FOneWayIndependentTest

Bases: BaseTest

Performs a one-way ANOVA test using SciPy's built-in f_oneway function for multiple independent groups.

ATTRIBUTE DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str

Source code in aboba/tests/multiple/f_test.py
class FOneWayIndependentTest(BaseTest):
    """
    Performs a one-way ANOVA test using SciPy's built-in `f_oneway` function
    for multiple independent groups.

    Attributes:
        value_column (str): Name of the column containing the values to test.
    """

    def __init__(
        self,
        value_column="target",
    ):
        super().__init__()
        self.value_column = value_column

    def test(self, groups: List[pd.DataFrame], artefacts) -> TestResult:
        """
        Executes a one-way ANOVA on the provided groups using `scipy.stats.f_oneway`.

        Args:
            groups (List[pd.DataFrame]): A list of DataFrames, each representing a group/sample.
            artefacts (dict): A dictionary for storing or retrieving additional test information.

        Returns:
            TestResult: A `TestResult` object containing the p-value from the ANOVA.
        """
        result = sps.f_oneway(*[group[self.value_column] for group in groups])
        return TestResult(pvalue=result.pvalue)

test

test(groups: List[DataFrame], artefacts) -> TestResult

Executes a one-way ANOVA on the provided groups using scipy.stats.f_oneway.

PARAMETER DESCRIPTION
groups

A list of DataFrames, each representing a group/sample.

TYPE: List[DataFrame]

artefacts

A dictionary for storing or retrieving additional test information.

TYPE: dict

RETURNS DESCRIPTION
TestResult

A TestResult object containing the p-value from the ANOVA.

TYPE: TestResult

Source code in aboba/tests/multiple/f_test.py
def test(self, groups: List[pd.DataFrame], artefacts) -> TestResult:
    """
    Executes a one-way ANOVA on the provided groups using `scipy.stats.f_oneway`.

    Args:
        groups (List[pd.DataFrame]): A list of DataFrames, each representing a group/sample.
        artefacts (dict): A dictionary for storing or retrieving additional test information.

    Returns:
        TestResult: A `TestResult` object containing the p-value from the ANOVA.
    """
    result = sps.f_oneway(*[group[self.value_column] for group in groups])
    return TestResult(pvalue=result.pvalue)

Variance Tests

BartletIndependentTest

BartletIndependentTest

Bases: BaseTest

Performs Bartlett's test to check if groups have equal variance.

ATTRIBUTE DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str

Source code in aboba/tests/multiple/bartlet.py
class BartletIndependentTest(BaseTest):
    """
    Performs Bartlett's test to check if groups have equal variance.

    Attributes:
        value_column (str): Name of the column containing the values to test.
    """
    def __init__(
        self,
        value_column="target",
    ):
        """
        Bartlett's test for equal variances across multiple groups.

        This test checks the null hypothesis that all input samples are from populations
        with equal variances. It is commonly used before performing ANOVA to verify
        the assumption of homoscedasticity.

        Args:
            value_column (str): Name of the column containing the values to test.

        Examples:
            ```python
            import pandas as pd
            import numpy as np
            from aboba.tests.multiple.bartlet import BartletIndependentTest

            # Create sample data with equal variances
            np.random.seed(42)
            group1 = pd.DataFrame({'target': np.random.normal(10, 2, 50)})
            group2 = pd.DataFrame({'target': np.random.normal(12, 2, 50)})
            group3 = pd.DataFrame({'target': np.random.normal(11, 2, 50)})

            # Perform the test
            test = BartletIndependentTest(value_column='target')
            result = test.test([group1, group2, group3], {})
            print(f"P-value: {result.pvalue:.4f}")

            # Create data with unequal variances
            group1_unequal = pd.DataFrame({'target': np.random.normal(10, 1, 50)})
            group2_unequal = pd.DataFrame({'target': np.random.normal(12, 3, 50)})
            group3_unequal = pd.DataFrame({'target': np.random.normal(11, 2, 50)})

            result_unequal = test.test([group1_unequal, group2_unequal, group3_unequal], {})
            print(f"P-value (unequal variances): {result_unequal.pvalue:.4f}")
            ```
        """
        super().__init__()
        self.value_column = value_column

    def test(self, groups: List[pd.DataFrame], artefacts: Dict = {}) -> TestResult:
        """
        Perform Bartlett's test for equal variances.

        Args:
            groups (List[pd.DataFrame]): List of DataFrames representing the groups to compare.

        Returns:
            TestResult: Object containing the p-value.
        """

        xs = [g[self.value_column].to_numpy() for g in groups]

        k = len(xs)
        ns = np.array([len(x) for x in xs], dtype=float)
        vars_ = np.array([x.var(ddof=1) for x in xs], dtype=float)

        n_total = ns.sum()

        pooled_var = np.sum((ns - 1) * vars_) / (n_total - k)

        numerator = (
            (n_total - k) * np.log(pooled_var)
            - np.sum((ns - 1) * np.log(vars_))
        )

        correction = 1.0 + (1.0 / (3 * (k - 1))) * (
            np.sum(1.0 / (ns - 1)) - 1.0 / (n_total - k)
        )

        chi2_stat = numerator / correction
        df = k - 1

        pvalue = chi2.sf(chi2_stat, df)

        return TestResult(
            pvalue=pvalue          
        )

__init__

__init__(value_column='target')

Bartlett's test for equal variances across multiple groups.

This test checks the null hypothesis that all input samples are from populations with equal variances. It is commonly used before performing ANOVA to verify the assumption of homoscedasticity.

PARAMETER DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str DEFAULT: 'target'

Examples:

import pandas as pd
import numpy as np
from aboba.tests.multiple.bartlet import BartletIndependentTest

# Create sample data with equal variances
np.random.seed(42)
group1 = pd.DataFrame({'target': np.random.normal(10, 2, 50)})
group2 = pd.DataFrame({'target': np.random.normal(12, 2, 50)})
group3 = pd.DataFrame({'target': np.random.normal(11, 2, 50)})

# Perform the test
test = BartletIndependentTest(value_column='target')
result = test.test([group1, group2, group3], {})
print(f"P-value: {result.pvalue:.4f}")

# Create data with unequal variances
group1_unequal = pd.DataFrame({'target': np.random.normal(10, 1, 50)})
group2_unequal = pd.DataFrame({'target': np.random.normal(12, 3, 50)})
group3_unequal = pd.DataFrame({'target': np.random.normal(11, 2, 50)})

result_unequal = test.test([group1_unequal, group2_unequal, group3_unequal], {})
print(f"P-value (unequal variances): {result_unequal.pvalue:.4f}")
Source code in aboba/tests/multiple/bartlet.py
def __init__(
    self,
    value_column="target",
):
    """
    Bartlett's test for equal variances across multiple groups.

    This test checks the null hypothesis that all input samples are from populations
    with equal variances. It is commonly used before performing ANOVA to verify
    the assumption of homoscedasticity.

    Args:
        value_column (str): Name of the column containing the values to test.

    Examples:
        ```python
        import pandas as pd
        import numpy as np
        from aboba.tests.multiple.bartlet import BartletIndependentTest

        # Create sample data with equal variances
        np.random.seed(42)
        group1 = pd.DataFrame({'target': np.random.normal(10, 2, 50)})
        group2 = pd.DataFrame({'target': np.random.normal(12, 2, 50)})
        group3 = pd.DataFrame({'target': np.random.normal(11, 2, 50)})

        # Perform the test
        test = BartletIndependentTest(value_column='target')
        result = test.test([group1, group2, group3], {})
        print(f"P-value: {result.pvalue:.4f}")

        # Create data with unequal variances
        group1_unequal = pd.DataFrame({'target': np.random.normal(10, 1, 50)})
        group2_unequal = pd.DataFrame({'target': np.random.normal(12, 3, 50)})
        group3_unequal = pd.DataFrame({'target': np.random.normal(11, 2, 50)})

        result_unequal = test.test([group1_unequal, group2_unequal, group3_unequal], {})
        print(f"P-value (unequal variances): {result_unequal.pvalue:.4f}")
        ```
    """
    super().__init__()
    self.value_column = value_column

test

test(groups: List[DataFrame], artefacts: Dict = {}) -> TestResult

Perform Bartlett's test for equal variances.

PARAMETER DESCRIPTION
groups

List of DataFrames representing the groups to compare.

TYPE: List[DataFrame]

RETURNS DESCRIPTION
TestResult

Object containing the p-value.

TYPE: TestResult

Source code in aboba/tests/multiple/bartlet.py
def test(self, groups: List[pd.DataFrame], artefacts: Dict = {}) -> TestResult:
    """
    Perform Bartlett's test for equal variances.

    Args:
        groups (List[pd.DataFrame]): List of DataFrames representing the groups to compare.

    Returns:
        TestResult: Object containing the p-value.
    """

    xs = [g[self.value_column].to_numpy() for g in groups]

    k = len(xs)
    ns = np.array([len(x) for x in xs], dtype=float)
    vars_ = np.array([x.var(ddof=1) for x in xs], dtype=float)

    n_total = ns.sum()

    pooled_var = np.sum((ns - 1) * vars_) / (n_total - k)

    numerator = (
        (n_total - k) * np.log(pooled_var)
        - np.sum((ns - 1) * np.log(vars_))
    )

    correction = 1.0 + (1.0 / (3 * (k - 1))) * (
        np.sum(1.0 / (ns - 1)) - 1.0 / (n_total - k)
    )

    chi2_stat = numerator / correction
    df = k - 1

    pvalue = chi2.sf(chi2_stat, df)

    return TestResult(
        pvalue=pvalue          
    )

Post-Hoc Tests

HSDTukeyTest

HSDTukeyTest

Bases: BaseTest

Performs Tukey's HSD (honestly significant difference) test for multiple comparison of group means.

ATTRIBUTE DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str

Source code in aboba/tests/multiple/hsd.py
class HSDTukeyTest(BaseTest):
    """
    Performs Tukey's HSD (honestly significant difference) test for multiple
    comparison of group means.

    Attributes:
        value_column (str): Name of the column containing the values to test.
    """
    def __init__(
        self,
        value_column="target",
    ):
        """
        Tukey's Honestly Significant Difference (HSD) test for multiple comparisons.

        This post-hoc test is used to find means that are significantly different
        from each other after an ANOVA test indicates significant differences exist.
        It controls the family-wise error rate.

        Args:
            value_column (str): Name of the column containing the values to test.

        Examples:
            ```python
            import pandas as pd
            import numpy as np
            from aboba.tests.multiple.hsd import HSDTukeyTest

            # Create sample data with three groups
            np.random.seed(42)
            group1 = pd.DataFrame({'target': np.random.normal(10, 2, 50)})
            group2 = pd.DataFrame({'target': np.random.normal(12, 2, 50)})
            group3 = pd.DataFrame({'target': np.random.normal(11, 2, 50)})

            # Perform the test
            test = HSDTukeyTest(value_column='target')
            result = test.test([group1, group2, group3], {})
            print(f"Minimum p-value: {result.pvalue:.4f}")
            ```
        """
        super().__init__()
        self.value_column = value_column

        raise NotImplementedError("")

    def test(self, groups: List[pd.DataFrame], artefacts: Dict = {}) -> TestResult:
        """
        Perform Tukey's HSD test for multiple comparisons.

        Args:
            groups (List[pd.DataFrame]): List of DataFrames representing the groups to compare.

        Returns:
            TestResult: Object containing the minimum p-value from all pairwise comparisons.
        """

        k = len(groups)
        if k < 2:
            raise ValueError("Tukey HSD requires at least two groups.")

        samples = [
            pd.to_numeric(g[self.value_column], errors="coerce")
              .dropna()
              .to_numpy()
            for g in groups
                  ]

        ns = np.array([x.size for x in samples])
        means = np.array([x.mean() for x in samples])

        N = ns.sum()
        df = N - k

        sse = sum(((x - m) ** 2).sum() for x, m in zip(samples, means))
        mse = sse / df

        min_p = 1.0
        for i in range(k):
            for j in range(i + 1, k):
                se = np.sqrt((mse / 2.0) * (1.0 / ns[i] + 1.0 / ns[j]))
                if se == 0:
                    continue
                q = abs(means[i] - means[j]) / se
                p = studentized_range.sf(q, k, df)
                if p < min_p:
                    min_p = p

        return TestResult(pvalue=float(min_p))

__init__

__init__(value_column='target')

Tukey's Honestly Significant Difference (HSD) test for multiple comparisons.

This post-hoc test is used to find means that are significantly different from each other after an ANOVA test indicates significant differences exist. It controls the family-wise error rate.

PARAMETER DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str DEFAULT: 'target'

Examples:

import pandas as pd
import numpy as np
from aboba.tests.multiple.hsd import HSDTukeyTest

# Create sample data with three groups
np.random.seed(42)
group1 = pd.DataFrame({'target': np.random.normal(10, 2, 50)})
group2 = pd.DataFrame({'target': np.random.normal(12, 2, 50)})
group3 = pd.DataFrame({'target': np.random.normal(11, 2, 50)})

# Perform the test
test = HSDTukeyTest(value_column='target')
result = test.test([group1, group2, group3], {})
print(f"Minimum p-value: {result.pvalue:.4f}")
Source code in aboba/tests/multiple/hsd.py
def __init__(
    self,
    value_column="target",
):
    """
    Tukey's Honestly Significant Difference (HSD) test for multiple comparisons.

    This post-hoc test is used to find means that are significantly different
    from each other after an ANOVA test indicates significant differences exist.
    It controls the family-wise error rate.

    Args:
        value_column (str): Name of the column containing the values to test.

    Examples:
        ```python
        import pandas as pd
        import numpy as np
        from aboba.tests.multiple.hsd import HSDTukeyTest

        # Create sample data with three groups
        np.random.seed(42)
        group1 = pd.DataFrame({'target': np.random.normal(10, 2, 50)})
        group2 = pd.DataFrame({'target': np.random.normal(12, 2, 50)})
        group3 = pd.DataFrame({'target': np.random.normal(11, 2, 50)})

        # Perform the test
        test = HSDTukeyTest(value_column='target')
        result = test.test([group1, group2, group3], {})
        print(f"Minimum p-value: {result.pvalue:.4f}")
        ```
    """
    super().__init__()
    self.value_column = value_column

    raise NotImplementedError("")

test

test(groups: List[DataFrame], artefacts: Dict = {}) -> TestResult

Perform Tukey's HSD test for multiple comparisons.

PARAMETER DESCRIPTION
groups

List of DataFrames representing the groups to compare.

TYPE: List[DataFrame]

RETURNS DESCRIPTION
TestResult

Object containing the minimum p-value from all pairwise comparisons.

TYPE: TestResult

Source code in aboba/tests/multiple/hsd.py
def test(self, groups: List[pd.DataFrame], artefacts: Dict = {}) -> TestResult:
    """
    Perform Tukey's HSD test for multiple comparisons.

    Args:
        groups (List[pd.DataFrame]): List of DataFrames representing the groups to compare.

    Returns:
        TestResult: Object containing the minimum p-value from all pairwise comparisons.
    """

    k = len(groups)
    if k < 2:
        raise ValueError("Tukey HSD requires at least two groups.")

    samples = [
        pd.to_numeric(g[self.value_column], errors="coerce")
          .dropna()
          .to_numpy()
        for g in groups
              ]

    ns = np.array([x.size for x in samples])
    means = np.array([x.mean() for x in samples])

    N = ns.sum()
    df = N - k

    sse = sum(((x - m) ** 2).sum() for x, m in zip(samples, means))
    mse = sse / df

    min_p = 1.0
    for i in range(k):
        for j in range(i + 1, k):
            se = np.sqrt((mse / 2.0) * (1.0 / ns[i] + 1.0 / ns[j]))
            if se == 0:
                continue
            q = abs(means[i] - means[j]) / se
            p = studentized_range.sf(q, k, df)
            if p < min_p:
                min_p = p

    return TestResult(pvalue=float(min_p))

PostHocDunnTest

PostHocDunnTest

Bases: BaseTest

Performs a post-hoc Dunn test, typically used following a Kruskal-Wallis test, for multiple comparisons between groups.

ATTRIBUTE DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str

p_adjust

Method used for p-value adjustment (e.g., 'bonferroni', 'holm', etc.).

TYPE: str

Source code in aboba/tests/multiple/dunn.py
class PostHocDunnTest(BaseTest):
    """
    Performs a post-hoc Dunn test, typically used following a Kruskal-Wallis test,
    for multiple comparisons between groups.

    Attributes:
        value_column (str): Name of the column containing the values to test.
        p_adjust (str): Method used for p-value adjustment (e.g., 'bonferroni', 'holm', etc.).
    """

    def __init__(
        self,
        value_column="target",
        p_adjust="bonferroni",
    ):
        """
        Post-hoc Dunn's test for multiple comparisons.

        This test performs pairwise comparisons between groups after an omnibus test
        (like Kruskal-Wallis) indicates significant differences exist. It is a
        non-parametric alternative to Tukey's HSD test.

        Args:
            value_column (str): Name of the column containing the values to test.
            p_adjust (str): Method for adjusting p-values for multiple comparisons.
                Default is 'bonferroni'. Other options include 'holm', 'holm-sidak',
                'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by'.

        Examples:
            ```python
            import pandas as pd
            import numpy as np
            from aboba.tests.multiple.dunn import PostHocDunnTest

            # Create sample data with three groups
            np.random.seed(42)
            group1 = pd.DataFrame({'target': np.random.normal(10, 2, 50)})
            group2 = pd.DataFrame({'target': np.random.normal(12, 2, 50)})
            group3 = pd.DataFrame({'target': np.random.normal(11, 2, 50)})

            # Perform the test
            test = PostHocDunnTest(value_column='target', p_adjust='bonferroni')
            artefacts = {}
            result = test.test([group1, group2, group3], artefacts)
            print(f"Minimum p-value: {result.pvalue:.4f}")
            print("Pairwise comparison results:")
            print(artefacts['post_hoc_dunn_result'])
            ```
        """
        super().__init__()
        self.value_column = value_column
        self.p_adjust = p_adjust

    def test(self, groups: List[pd.DataFrame], artefacts: Dict = {}) -> TestResult:
        """
        Perform Dunn's post-hoc test for multiple comparisons.

        Args:
            groups (List[pd.DataFrame]): List of DataFrames representing the groups to compare.
            artefacts (dict): Dictionary to store additional results, including the full
                pairwise comparison matrix under the key 'post_hoc_dunn_result'.

        Returns:
            TestResult: Object containing the minimum p-value from all pairwise comparisons.
        """
        if artefacts is None:
            artefacts = {}
        result = sp.posthoc_dunn(
            [group[self.value_column] for group in groups], p_adjust=self.p_adjust
        )
        artefacts["post_hoc_dunn_result"] = result
        return TestResult(pvalue=result.min())

__init__

__init__(value_column='target', p_adjust='bonferroni')

Post-hoc Dunn's test for multiple comparisons.

This test performs pairwise comparisons between groups after an omnibus test (like Kruskal-Wallis) indicates significant differences exist. It is a non-parametric alternative to Tukey's HSD test.

PARAMETER DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str DEFAULT: 'target'

p_adjust

Method for adjusting p-values for multiple comparisons. Default is 'bonferroni'. Other options include 'holm', 'holm-sidak', 'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by'.

TYPE: str DEFAULT: 'bonferroni'

Examples:

import pandas as pd
import numpy as np
from aboba.tests.multiple.dunn import PostHocDunnTest

# Create sample data with three groups
np.random.seed(42)
group1 = pd.DataFrame({'target': np.random.normal(10, 2, 50)})
group2 = pd.DataFrame({'target': np.random.normal(12, 2, 50)})
group3 = pd.DataFrame({'target': np.random.normal(11, 2, 50)})

# Perform the test
test = PostHocDunnTest(value_column='target', p_adjust='bonferroni')
artefacts = {}
result = test.test([group1, group2, group3], artefacts)
print(f"Minimum p-value: {result.pvalue:.4f}")
print("Pairwise comparison results:")
print(artefacts['post_hoc_dunn_result'])
Source code in aboba/tests/multiple/dunn.py
def __init__(
    self,
    value_column="target",
    p_adjust="bonferroni",
):
    """
    Post-hoc Dunn's test for multiple comparisons.

    This test performs pairwise comparisons between groups after an omnibus test
    (like Kruskal-Wallis) indicates significant differences exist. It is a
    non-parametric alternative to Tukey's HSD test.

    Args:
        value_column (str): Name of the column containing the values to test.
        p_adjust (str): Method for adjusting p-values for multiple comparisons.
            Default is 'bonferroni'. Other options include 'holm', 'holm-sidak',
            'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by'.

    Examples:
        ```python
        import pandas as pd
        import numpy as np
        from aboba.tests.multiple.dunn import PostHocDunnTest

        # Create sample data with three groups
        np.random.seed(42)
        group1 = pd.DataFrame({'target': np.random.normal(10, 2, 50)})
        group2 = pd.DataFrame({'target': np.random.normal(12, 2, 50)})
        group3 = pd.DataFrame({'target': np.random.normal(11, 2, 50)})

        # Perform the test
        test = PostHocDunnTest(value_column='target', p_adjust='bonferroni')
        artefacts = {}
        result = test.test([group1, group2, group3], artefacts)
        print(f"Minimum p-value: {result.pvalue:.4f}")
        print("Pairwise comparison results:")
        print(artefacts['post_hoc_dunn_result'])
        ```
    """
    super().__init__()
    self.value_column = value_column
    self.p_adjust = p_adjust

test

test(groups: List[DataFrame], artefacts: Dict = {}) -> TestResult

Perform Dunn's post-hoc test for multiple comparisons.

PARAMETER DESCRIPTION
groups

List of DataFrames representing the groups to compare.

TYPE: List[DataFrame]

artefacts

Dictionary to store additional results, including the full pairwise comparison matrix under the key 'post_hoc_dunn_result'.

TYPE: dict DEFAULT: {}

RETURNS DESCRIPTION
TestResult

Object containing the minimum p-value from all pairwise comparisons.

TYPE: TestResult

Source code in aboba/tests/multiple/dunn.py
def test(self, groups: List[pd.DataFrame], artefacts: Dict = {}) -> TestResult:
    """
    Perform Dunn's post-hoc test for multiple comparisons.

    Args:
        groups (List[pd.DataFrame]): List of DataFrames representing the groups to compare.
        artefacts (dict): Dictionary to store additional results, including the full
            pairwise comparison matrix under the key 'post_hoc_dunn_result'.

    Returns:
        TestResult: Object containing the minimum p-value from all pairwise comparisons.
    """
    if artefacts is None:
        artefacts = {}
    result = sp.posthoc_dunn(
        [group[self.value_column] for group in groups], p_adjust=self.p_adjust
    )
    artefacts["post_hoc_dunn_result"] = result
    return TestResult(pvalue=result.min())

Non-Parametric Tests

KruskalIndependentTest

KruskalIndependentTest

Bases: BaseTest

Performs a Kruskal-Wallis H-test for multiple independent samples, a non-parametric alternative to one-way ANOVA.

ATTRIBUTE DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str

Source code in aboba/tests/multiple/kruskal.py
class KruskalIndependentTest(BaseTest):
    """
    Performs a Kruskal-Wallis H-test for multiple independent samples, a non-parametric
    alternative to one-way ANOVA.

    Attributes:
        value_column (str): Name of the column containing the values to test.
    """

    def __init__(
        self,
        value_column="target",
    ):
        """
        Kruskal-Wallis H-test for comparing distributions between independent groups.

        This non-parametric test compares the distributions of two or more independent
        groups to determine if they come from the same distribution. It's an alternative
        to one-way ANOVA when the assumptions of normality are not met.

        Args:
            value_column (str): Name of the column containing the values to test.

        Examples:
            ```python
            import pandas as pd
            import numpy as np
            from aboba.tests.multiple.kruskal import KruskalIndependentTest

            # Create sample non-normal data
            np.random.seed(42)
            group1 = pd.DataFrame({'target': np.random.exponential(2, 50)})
            group2 = pd.DataFrame({'target': np.random.exponential(3, 50)})
            group3 = pd.DataFrame({'target': np.random.exponential(2.5, 50)})

            # Perform the test
            test = KruskalIndependentTest(value_column='target')
            result = test.test([group1, group2, group3], {})
            print(f"P-value: {result.pvalue:.4f}")
            ```
        """
        super().__init__()
        self.value_column = value_column

    @staticmethod
    def average_ranks(x: np.ndarray):
        """
        Average ranks with tie counts (for tie correction).
        """
        order = np.argsort(x, kind="mergesort")
        xs = x[order]
        N = xs.size

        ranks_sorted = np.empty(N, dtype=float)
        tie_counts = []

        i = 0
        while i < N:
            j = i + 1
            while j < N and xs[j] == xs[i]:
                j += 1

            midrank = 0.5 * ((i + 1) + j)
            ranks_sorted[i:j] = midrank

            cnt = j - i
            if cnt >= 2:
                tie_counts.append(cnt)

            i = j

        ranks = np.empty(N, dtype=float)
        ranks[order] = ranks_sorted
        return ranks, np.asarray(tie_counts, dtype=float)

    def test(self, groups: List[pd.DataFrame], artefacts: Dict = {}) -> TestResult:
        """
        Executes the Kruskal-Wallis H-test on the provided groups.

        Args:
            groups (List[pd.DataFrame]): A list of DataFrames, each representing a group/sample.

        Returns:
            TestResult: A `TestResult` object containing the p-value from the Kruskal-Wallis test.
        """

        samples = [
            pd.to_numeric(g[self.value_column], errors="coerce")
              .dropna()
              .to_numpy()
            for g in groups
        ]

        k = len(samples)
        if k < 2:
            raise ValueError("Kruskal–Wallis test requires at least two groups.")

        ns = np.array([s.size for s in samples], dtype=int)
        if np.any(ns < 1):
            raise ValueError("All groups must contain at least one observation.")

        x = np.concatenate(samples)
        N = x.size

        ranks, tie_counts = self.average_ranks(x)

        if N > 1 and tie_counts.size > 0:
            gamma = 1.0 - np.sum(tie_counts**3 - tie_counts) / (N**3 - N)
        else:
            gamma = 1.0

        R_bar = (N + 1) / 2.0

        idx = 0
        sum_term = 0.0
        for n in ns:
            Rj = ranks[idx:idx + n].mean()
            sum_term += n * (Rj - R_bar) ** 2
            idx += n

        W = (12.0 / (N * (N + 1))) * sum_term
        W /= gamma

        pvalue = chi2.sf(W, df=k - 1)
        return TestResult(pvalue=float(pvalue))

__init__

__init__(value_column='target')

Kruskal-Wallis H-test for comparing distributions between independent groups.

This non-parametric test compares the distributions of two or more independent groups to determine if they come from the same distribution. It's an alternative to one-way ANOVA when the assumptions of normality are not met.

PARAMETER DESCRIPTION
value_column

Name of the column containing the values to test.

TYPE: str DEFAULT: 'target'

Examples:

import pandas as pd
import numpy as np
from aboba.tests.multiple.kruskal import KruskalIndependentTest

# Create sample non-normal data
np.random.seed(42)
group1 = pd.DataFrame({'target': np.random.exponential(2, 50)})
group2 = pd.DataFrame({'target': np.random.exponential(3, 50)})
group3 = pd.DataFrame({'target': np.random.exponential(2.5, 50)})

# Perform the test
test = KruskalIndependentTest(value_column='target')
result = test.test([group1, group2, group3], {})
print(f"P-value: {result.pvalue:.4f}")
Source code in aboba/tests/multiple/kruskal.py
def __init__(
    self,
    value_column="target",
):
    """
    Kruskal-Wallis H-test for comparing distributions between independent groups.

    This non-parametric test compares the distributions of two or more independent
    groups to determine if they come from the same distribution. It's an alternative
    to one-way ANOVA when the assumptions of normality are not met.

    Args:
        value_column (str): Name of the column containing the values to test.

    Examples:
        ```python
        import pandas as pd
        import numpy as np
        from aboba.tests.multiple.kruskal import KruskalIndependentTest

        # Create sample non-normal data
        np.random.seed(42)
        group1 = pd.DataFrame({'target': np.random.exponential(2, 50)})
        group2 = pd.DataFrame({'target': np.random.exponential(3, 50)})
        group3 = pd.DataFrame({'target': np.random.exponential(2.5, 50)})

        # Perform the test
        test = KruskalIndependentTest(value_column='target')
        result = test.test([group1, group2, group3], {})
        print(f"P-value: {result.pvalue:.4f}")
        ```
    """
    super().__init__()
    self.value_column = value_column

average_ranks staticmethod

average_ranks(x: ndarray)

Average ranks with tie counts (for tie correction).

Source code in aboba/tests/multiple/kruskal.py
@staticmethod
def average_ranks(x: np.ndarray):
    """
    Average ranks with tie counts (for tie correction).
    """
    order = np.argsort(x, kind="mergesort")
    xs = x[order]
    N = xs.size

    ranks_sorted = np.empty(N, dtype=float)
    tie_counts = []

    i = 0
    while i < N:
        j = i + 1
        while j < N and xs[j] == xs[i]:
            j += 1

        midrank = 0.5 * ((i + 1) + j)
        ranks_sorted[i:j] = midrank

        cnt = j - i
        if cnt >= 2:
            tie_counts.append(cnt)

        i = j

    ranks = np.empty(N, dtype=float)
    ranks[order] = ranks_sorted
    return ranks, np.asarray(tie_counts, dtype=float)

test

test(groups: List[DataFrame], artefacts: Dict = {}) -> TestResult

Executes the Kruskal-Wallis H-test on the provided groups.

PARAMETER DESCRIPTION
groups

A list of DataFrames, each representing a group/sample.

TYPE: List[DataFrame]

RETURNS DESCRIPTION
TestResult

A TestResult object containing the p-value from the Kruskal-Wallis test.

TYPE: TestResult

Source code in aboba/tests/multiple/kruskal.py
def test(self, groups: List[pd.DataFrame], artefacts: Dict = {}) -> TestResult:
    """
    Executes the Kruskal-Wallis H-test on the provided groups.

    Args:
        groups (List[pd.DataFrame]): A list of DataFrames, each representing a group/sample.

    Returns:
        TestResult: A `TestResult` object containing the p-value from the Kruskal-Wallis test.
    """

    samples = [
        pd.to_numeric(g[self.value_column], errors="coerce")
          .dropna()
          .to_numpy()
        for g in groups
    ]

    k = len(samples)
    if k < 2:
        raise ValueError("Kruskal–Wallis test requires at least two groups.")

    ns = np.array([s.size for s in samples], dtype=int)
    if np.any(ns < 1):
        raise ValueError("All groups must contain at least one observation.")

    x = np.concatenate(samples)
    N = x.size

    ranks, tie_counts = self.average_ranks(x)

    if N > 1 and tie_counts.size > 0:
        gamma = 1.0 - np.sum(tie_counts**3 - tie_counts) / (N**3 - N)
    else:
        gamma = 1.0

    R_bar = (N + 1) / 2.0

    idx = 0
    sum_term = 0.0
    for n in ns:
        Rj = ranks[idx:idx + n].mean()
        sum_term += n * (Rj - R_bar) ** 2
        idx += n

    W = (12.0 / (N * (N + 1))) * sum_term
    W /= gamma

    pvalue = chi2.sf(W, df=k - 1)
    return TestResult(pvalue=float(pvalue))