This calculator helps you examine how two different factors (independent variables) affect an outcome (dependent variable), both individually and in combination. Unlike simpler tests that only look at one factor at a time, Two-Way ANOVA reveals whether your factors work together in unexpected ways.
Ready to explore your data? to see how it works, or upload your own data to begin your analysis.
Total Sum of Squares Decomposition:
where is the grand mean
where is the mean of level of Factor A, and is the number of levels in Factor B.
where is the mean of level $j$ of Factor B, and $a$ is the number of levels in Factor A.
Where:
Mean Square:
F-Statistic for each factor:
F-statistics are calculated separately for each factor and interaction effect
Weight loss study examining effects of diet and exercise:
| Diet | Exercise | Weight Loss (pounds) |
|---|---|---|
| Low-fat | Yes | 8, 10, 9 |
| Low-fat | No | 6, 7, 8 |
| High-fat | Yes | 5, 7, 6 |
| High-fat | No | 3, 4, 5 |
| Diet | Exercise | Mean | N |
|---|---|---|---|
| Low-fat | Yes | 9.00 | 3 |
| Low-fat | No | 7.00 | 3 |
| High-fat | Yes | 6.00 | 3 |
| High-fat | No | 4.00 | 3 |
Main Effects:
Interaction:
| Source | df | SS | MS | F | p-value |
|---|---|---|---|---|---|
| Diet | 1 | 27.0 | 27.0 | 27.0 | 0.000826 |
| Exercise | 1 | 12.0 | 12.0 | 12.0 | 0.008516 |
| Diet:Exercise | 1 | 0.0 | 0.0 | 0.0 | 1.0000 |
| Residuals | 8 | 8 | 1 |
Partial Eta-squared:
For the example above,
# Create the data
library(tidyverse)
data <- tibble(
Diet = factor(rep(c("Low-fat", "High-fat"), each = 6)),
Exercise = factor(rep(c("Yes", "No"), each = 3, times = 2)),
WeightLoss = c(8, 10, 9, 6, 7, 8, 5, 7, 6, 3, 4, 5)
)
# Perform two-way ANOVA
model <- aov(WeightLoss ~ Diet * Exercise, data = data)
# Get summary
summary(model)
#------ Manual calculations ------#
# Compute the grand mean
grand_mean <- data |>
summarize(grand_mean = mean(WeightLoss)) |>
pull(grand_mean)
# Compute SS Total
ss_total <- data |>
summarize(ss_total = sum((WeightLoss - grand_mean)^2)) |>
pull(ss_total)
# Compute SS for Diet
ss_diet <- data |>
group_by(Diet) |>
summarize(group_mean = mean(WeightLoss), n = n()) |>
ungroup() |>
summarize(ss_diet = sum((group_mean - grand_mean)^2 * n)) |>
pull(ss_diet)
# Compute SS for Exercise
ss_exercise <- data |>
group_by(Exercise) |>
summarize(group_mean = mean(WeightLoss), n = n()) |>
ungroup() |>
summarize(ss_exercise = sum((group_mean - grand_mean)^2 * n)) |>
pull(ss_exercise)
# Compute SS Interaction
ss_interaction <- data |>
group_by(Diet, Exercise) |>
mutate(group_mean = mean(WeightLoss)) |>
ungroup() |>
group_by(Diet) |>
mutate(diet_mean = mean(WeightLoss)) |>
ungroup() |>
group_by(Exercise) |>
mutate(exercise_mean = mean(WeightLoss)) |>
ungroup() |>
mutate(interaction_term = (group_mean - diet_mean - exercise_mean + grand_mean)^2) |>
summarize(ss_interaction = sum(interaction_term)) |>
pull(ss_interaction)
ss_error <- ss_total - ss_diet - ss_exercise - ss_interaction
print(str_glue("SS total: {ss_total}"))
print(str_glue("SS diet: {ss_diet}"))
print(str_glue("SS exercise: {ss_exercise}"))
print(str_glue("SS interaction: {ss_interaction}"))
print(str_glue("SS error: {ss_error}"))
ms_diet = ss_diet / (2 - 1)
ms_exercise = ss_exercise / (2 - 1)
ms_error = ss_error / (2 * 2 * (3 - 1))
f_diet = ms_diet / ms_error
f_exercise = ms_exercise / ms_error
print(str_glue("F Diet: {f_diet}"))
print(str_glue("F Exercise: {f_exercise}"))import pandas as pd
import numpy as np
from scipy import stats
# Create the data
data = pd.DataFrame({
'Diet': pd.Categorical(np.repeat(['Low-fat', 'High-fat'], 6)),
'Exercise': pd.Categorical(np.tile(np.repeat(['Yes', 'No'], 3), 2)),
'WeightLoss': [8, 10, 9, 6, 7, 8, 5, 7, 6, 3, 4, 5]
})
# Using statsmodels for ANOVA
import statsmodels.api as sm
from statsmodels.stats.anova import anova_lm
# Fit the model using statsmodels
model = sm.OLS.from_formula('WeightLoss ~ Diet + Exercise + Diet:Exercise', data=data)
fit = model.fit()
anova_table = anova_lm(fit, typ=2)
print("ANOVA results from statsmodels:")
print(anova_table)
#------ Manual calculations ------#
grand_mean = data['WeightLoss'].mean()
ss_total = np.sum((data['WeightLoss'] - grand_mean) ** 2)
# Compute SS for Diet
diet_means = data.groupby('Diet', observed=True)['WeightLoss'].agg(['mean', 'size']).reset_index()
ss_diet = np.sum((diet_means['mean'] - grand_mean) ** 2 * diet_means['size'])
# Compute SS for Exercise
exercise_means = data.groupby('Exercise', observed=True)['WeightLoss'].agg(['mean', 'size']).reset_index()
ss_exercise = np.sum((exercise_means['mean'] - grand_mean) ** 2 * exercise_means['size'])
# Compute SS Interaction
cell_means = data.groupby(['Diet', 'Exercise'], observed=True)['WeightLoss'].mean().reset_index()
cell_means = cell_means.merge(
data.groupby('Diet', observed=True)['WeightLoss'].mean().reset_index().rename(columns={'WeightLoss': 'diet_mean'}),
on='Diet'
)
cell_means = cell_means.merge(
data.groupby('Exercise', observed=True)['WeightLoss'].mean().reset_index().rename(columns={'WeightLoss': 'exercise_mean'}),
on='Exercise'
)
cell_means['interaction_term'] = (
(cell_means['WeightLoss'] - cell_means['diet_mean'] -
cell_means['exercise_mean'] + grand_mean) ** 2
)
ss_interaction = cell_means['interaction_term'].sum()
ss_error = ss_total - ss_diet - ss_exercise - ss_interaction
# Calculate Mean Squares
df_diet = len(data['Diet'].unique()) - 1
df_exercise = len(data['Exercise'].unique()) - 1
df_interaction = df_diet * df_exercise
df_error = len(data) - (df_diet + 1) * (df_exercise + 1)
ms_diet = ss_diet / df_diet
ms_exercise = ss_exercise / df_exercise
ms_error = ss_error / df_error
# Calculate F statistics and p-values
f_diet = ms_diet / ms_error
f_exercise = ms_exercise / ms_error
p_diet = 1 - stats.f.cdf(f_diet, df_diet, df_error)
p_exercise = 1 - stats.f.cdf(f_exercise, df_exercise, df_error)
# Create ANOVA table
anova_manual = pd.DataFrame({
'df': [df_diet, df_exercise, df_interaction, df_error],
'sum_sq': [ss_diet, ss_exercise, ss_interaction, ss_error],
'mean_sq': [ms_diet, ms_exercise, ss_interaction/df_interaction, ms_error],
'F': [f_diet, f_exercise, (ss_interaction/df_interaction)/ms_error, np.nan],
'PR(>F)': [p_diet, p_exercise,
1 - stats.f.cdf((ss_interaction/df_interaction)/ms_error, df_interaction, df_error),
np.nan]
}, index=['Diet', 'Exercise', 'Diet:Exercise', 'Residuals'])
print("ANOVA Table:")
print(anova_manual.round(4))This calculator helps you examine how two different factors (independent variables) affect an outcome (dependent variable), both individually and in combination. Unlike simpler tests that only look at one factor at a time, Two-Way ANOVA reveals whether your factors work together in unexpected ways.
Ready to explore your data? to see how it works, or upload your own data to begin your analysis.
Total Sum of Squares Decomposition:
where is the grand mean
where is the mean of level of Factor A, and is the number of levels in Factor B.
where is the mean of level $j$ of Factor B, and $a$ is the number of levels in Factor A.
Where:
Mean Square:
F-Statistic for each factor:
F-statistics are calculated separately for each factor and interaction effect
Weight loss study examining effects of diet and exercise:
| Diet | Exercise | Weight Loss (pounds) |
|---|---|---|
| Low-fat | Yes | 8, 10, 9 |
| Low-fat | No | 6, 7, 8 |
| High-fat | Yes | 5, 7, 6 |
| High-fat | No | 3, 4, 5 |
| Diet | Exercise | Mean | N |
|---|---|---|---|
| Low-fat | Yes | 9.00 | 3 |
| Low-fat | No | 7.00 | 3 |
| High-fat | Yes | 6.00 | 3 |
| High-fat | No | 4.00 | 3 |
Main Effects:
Interaction:
| Source | df | SS | MS | F | p-value |
|---|---|---|---|---|---|
| Diet | 1 | 27.0 | 27.0 | 27.0 | 0.000826 |
| Exercise | 1 | 12.0 | 12.0 | 12.0 | 0.008516 |
| Diet:Exercise | 1 | 0.0 | 0.0 | 0.0 | 1.0000 |
| Residuals | 8 | 8 | 1 |
Partial Eta-squared:
For the example above,
# Create the data
library(tidyverse)
data <- tibble(
Diet = factor(rep(c("Low-fat", "High-fat"), each = 6)),
Exercise = factor(rep(c("Yes", "No"), each = 3, times = 2)),
WeightLoss = c(8, 10, 9, 6, 7, 8, 5, 7, 6, 3, 4, 5)
)
# Perform two-way ANOVA
model <- aov(WeightLoss ~ Diet * Exercise, data = data)
# Get summary
summary(model)
#------ Manual calculations ------#
# Compute the grand mean
grand_mean <- data |>
summarize(grand_mean = mean(WeightLoss)) |>
pull(grand_mean)
# Compute SS Total
ss_total <- data |>
summarize(ss_total = sum((WeightLoss - grand_mean)^2)) |>
pull(ss_total)
# Compute SS for Diet
ss_diet <- data |>
group_by(Diet) |>
summarize(group_mean = mean(WeightLoss), n = n()) |>
ungroup() |>
summarize(ss_diet = sum((group_mean - grand_mean)^2 * n)) |>
pull(ss_diet)
# Compute SS for Exercise
ss_exercise <- data |>
group_by(Exercise) |>
summarize(group_mean = mean(WeightLoss), n = n()) |>
ungroup() |>
summarize(ss_exercise = sum((group_mean - grand_mean)^2 * n)) |>
pull(ss_exercise)
# Compute SS Interaction
ss_interaction <- data |>
group_by(Diet, Exercise) |>
mutate(group_mean = mean(WeightLoss)) |>
ungroup() |>
group_by(Diet) |>
mutate(diet_mean = mean(WeightLoss)) |>
ungroup() |>
group_by(Exercise) |>
mutate(exercise_mean = mean(WeightLoss)) |>
ungroup() |>
mutate(interaction_term = (group_mean - diet_mean - exercise_mean + grand_mean)^2) |>
summarize(ss_interaction = sum(interaction_term)) |>
pull(ss_interaction)
ss_error <- ss_total - ss_diet - ss_exercise - ss_interaction
print(str_glue("SS total: {ss_total}"))
print(str_glue("SS diet: {ss_diet}"))
print(str_glue("SS exercise: {ss_exercise}"))
print(str_glue("SS interaction: {ss_interaction}"))
print(str_glue("SS error: {ss_error}"))
ms_diet = ss_diet / (2 - 1)
ms_exercise = ss_exercise / (2 - 1)
ms_error = ss_error / (2 * 2 * (3 - 1))
f_diet = ms_diet / ms_error
f_exercise = ms_exercise / ms_error
print(str_glue("F Diet: {f_diet}"))
print(str_glue("F Exercise: {f_exercise}"))import pandas as pd
import numpy as np
from scipy import stats
# Create the data
data = pd.DataFrame({
'Diet': pd.Categorical(np.repeat(['Low-fat', 'High-fat'], 6)),
'Exercise': pd.Categorical(np.tile(np.repeat(['Yes', 'No'], 3), 2)),
'WeightLoss': [8, 10, 9, 6, 7, 8, 5, 7, 6, 3, 4, 5]
})
# Using statsmodels for ANOVA
import statsmodels.api as sm
from statsmodels.stats.anova import anova_lm
# Fit the model using statsmodels
model = sm.OLS.from_formula('WeightLoss ~ Diet + Exercise + Diet:Exercise', data=data)
fit = model.fit()
anova_table = anova_lm(fit, typ=2)
print("ANOVA results from statsmodels:")
print(anova_table)
#------ Manual calculations ------#
grand_mean = data['WeightLoss'].mean()
ss_total = np.sum((data['WeightLoss'] - grand_mean) ** 2)
# Compute SS for Diet
diet_means = data.groupby('Diet', observed=True)['WeightLoss'].agg(['mean', 'size']).reset_index()
ss_diet = np.sum((diet_means['mean'] - grand_mean) ** 2 * diet_means['size'])
# Compute SS for Exercise
exercise_means = data.groupby('Exercise', observed=True)['WeightLoss'].agg(['mean', 'size']).reset_index()
ss_exercise = np.sum((exercise_means['mean'] - grand_mean) ** 2 * exercise_means['size'])
# Compute SS Interaction
cell_means = data.groupby(['Diet', 'Exercise'], observed=True)['WeightLoss'].mean().reset_index()
cell_means = cell_means.merge(
data.groupby('Diet', observed=True)['WeightLoss'].mean().reset_index().rename(columns={'WeightLoss': 'diet_mean'}),
on='Diet'
)
cell_means = cell_means.merge(
data.groupby('Exercise', observed=True)['WeightLoss'].mean().reset_index().rename(columns={'WeightLoss': 'exercise_mean'}),
on='Exercise'
)
cell_means['interaction_term'] = (
(cell_means['WeightLoss'] - cell_means['diet_mean'] -
cell_means['exercise_mean'] + grand_mean) ** 2
)
ss_interaction = cell_means['interaction_term'].sum()
ss_error = ss_total - ss_diet - ss_exercise - ss_interaction
# Calculate Mean Squares
df_diet = len(data['Diet'].unique()) - 1
df_exercise = len(data['Exercise'].unique()) - 1
df_interaction = df_diet * df_exercise
df_error = len(data) - (df_diet + 1) * (df_exercise + 1)
ms_diet = ss_diet / df_diet
ms_exercise = ss_exercise / df_exercise
ms_error = ss_error / df_error
# Calculate F statistics and p-values
f_diet = ms_diet / ms_error
f_exercise = ms_exercise / ms_error
p_diet = 1 - stats.f.cdf(f_diet, df_diet, df_error)
p_exercise = 1 - stats.f.cdf(f_exercise, df_exercise, df_error)
# Create ANOVA table
anova_manual = pd.DataFrame({
'df': [df_diet, df_exercise, df_interaction, df_error],
'sum_sq': [ss_diet, ss_exercise, ss_interaction, ss_error],
'mean_sq': [ms_diet, ms_exercise, ss_interaction/df_interaction, ms_error],
'F': [f_diet, f_exercise, (ss_interaction/df_interaction)/ms_error, np.nan],
'PR(>F)': [p_diet, p_exercise,
1 - stats.f.cdf((ss_interaction/df_interaction)/ms_error, df_interaction, df_error),
np.nan]
}, index=['Diet', 'Exercise', 'Diet:Exercise', 'Residuals'])
print("ANOVA Table:")
print(anova_manual.round(4))