This Two-Sample Z-Test Calculator helps you compare means between two independent groups when both population standard deviations are known. For example, you could compare the average output between two production lines, given known variability in each line's process. The calculator performs comprehensive statistical analysis including descriptive statistics and hypothesis testing. It also generates publication-ready APA format reports. To learn about the data format required and test this calculator, .
Two-Sample Z-Test is a statistical test used to determine whether the means of two populations are significantly different from each other when both population standard deviations are known. It's particularly useful for large samples and when working with known population parameters.
Test Statistic:
Where:
Confidence Interval for Mean Difference:
Comparing the efficiency of two production lines with known process variations:
Z-statistic:
For two-tailed test:
Critical value at 5% significance level:
Since and , we fail to reject . There is no significant difference between the two production lines.
Cohen's d for two-sample z-test:
Interpretation guidelines:
Required sample size per group for equal sample sizes:
Where:
Reject if:
Standard format:
library(tidyverse)
# Sample data
group1 <- c(95.2, 94.8, 96.1, 95.7, 94.9, 95.5, 95.8, 95.3)
group2 <- c(93.8, 93.5, 94.2, 93.9, 93.6, 94.1, 93.7, 94.0)
# Known population standard deviations
pop_sd1 <- 4.0
pop_sd2 <- 3.8
# Calculate statistics
n1 <- length(group1)
n2 <- length(group2)
mean1 <- mean(group1)
mean2 <- mean(group2)
# Calculate standard error
se <- sqrt((pop_sd1^2/n1) + (pop_sd2^2/n2))
# Calculate z-statistic
z_stat <- (mean1 - mean2) / se
# Calculate two-tailed p-value
p_value <- 2 * (1 - pnorm(abs(z_stat)))
# Calculate 95% confidence interval
alpha <- 0.05
z_critical <- qnorm(1 - alpha/2)
ci_lower <- (mean1 - mean2) - z_critical * se
ci_upper <- (mean1 - mean2) + z_critical * se
# Calculate effect size (Cohen's d)
pooled_sd <- sqrt((pop_sd1^2 + pop_sd2^2) / 2)
cohens_d <- (mean1 - mean2) / pooled_sd
# Print results
cat("Two-Sample Z-Test Results\n")
cat("========================\n")
cat(sprintf("Group 1 Mean: %.2f (n=%d, σ=%.1f)\n", mean1, n1, pop_sd1))
cat(sprintf("Group 2 Mean: %.2f (n=%d, σ=%.1f)\n", mean2, n2, pop_sd2))
cat(sprintf("Mean Difference: %.2f\n", mean1 - mean2))
cat(sprintf("Standard Error: %.3f\n", se))
cat(sprintf("Z-statistic: %.3f\n", z_stat))
cat(sprintf("P-value (two-tailed): %.4f\n", p_value))
cat(sprintf("95%% CI: [%.2f, %.2f]\n", ci_lower, ci_upper))
cat(sprintf("Cohen's d: %.3f\n", cohens_d))import numpy as np
from scipy import stats
# Sample data
group1 = np.array([95.2, 94.8, 96.1, 95.7, 94.9, 95.5, 95.8, 95.3])
group2 = np.array([93.8, 93.5, 94.2, 93.9, 93.6, 94.1, 93.7, 94.0])
# Known population standard deviations
pop_sd1 = 4.0
pop_sd2 = 3.8
# Calculate statistics
n1 = len(group1)
n2 = len(group2)
mean1 = np.mean(group1)
mean2 = np.mean(group2)
# Calculate standard error
se = np.sqrt((pop_sd1**2 / n1) + (pop_sd2**2 / n2))
# Calculate z-statistic
z_stat = (mean1 - mean2) / se
# Calculate two-tailed p-value
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
# Calculate 95% confidence interval
alpha = 0.05
z_critical = stats.norm.ppf(1 - alpha/2)
ci_lower = (mean1 - mean2) - z_critical * se
ci_upper = (mean1 - mean2) + z_critical * se
# Calculate effect size (Cohen's d)
pooled_sd = np.sqrt((pop_sd1**2 + pop_sd2**2) / 2)
cohens_d = (mean1 - mean2) / pooled_sd
# Print results
print("Two-Sample Z-Test Results")
print("========================")
print(f"Group 1 Mean: {mean1:.2f} (n={n1}, σ={pop_sd1:.1f})")
print(f"Group 2 Mean: {mean2:.2f} (n={n2}, σ={pop_sd2:.1f})")
print(f"Mean Difference: {mean1 - mean2:.2f}")
print(f"Standard Error: {se:.3f}")
print(f"Z-statistic: {z_stat:.3f}")
print(f"P-value (two-tailed): {p_value:.4f}")
print(f"95% CI: [{ci_lower:.2f}, {ci_upper:.2f}]")
print(f"Cohen's d: {cohens_d:.3f}")
# Visualization
import matplotlib.pyplot as plt
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
# Box plot comparison
ax1.boxplot([group1, group2], labels=['Group 1', 'Group 2'])
ax1.set_ylabel('Values')
ax1.set_title('Group Comparison')
ax1.grid(True, alpha=0.3)
# Z-distribution with test statistic
x = np.linspace(-4, 4, 1000)
y = stats.norm.pdf(x)
ax2.plot(x, y, 'b-', linewidth=2, label='Standard Normal')
ax2.axvline(z_stat, color='r', linestyle='--', linewidth=2, label=f'Z = {z_stat:.3f}')
ax2.axvline(-z_critical, color='g', linestyle=':', linewidth=2, label=f'Critical values (±{z_critical:.3f})')
ax2.axvline(z_critical, color='g', linestyle=':', linewidth=2)
ax2.fill_between(x[x <= -z_critical], y[x <= -z_critical], alpha=0.3, color='red')
ax2.fill_between(x[x >= z_critical], y[x >= z_critical], alpha=0.3, color='red')
ax2.set_xlabel('Z-score')
ax2.set_ylabel('Probability Density')
ax2.set_title('Z-Test Distribution')
ax2.legend()
ax2.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()This Two-Sample Z-Test Calculator helps you compare means between two independent groups when both population standard deviations are known. For example, you could compare the average output between two production lines, given known variability in each line's process. The calculator performs comprehensive statistical analysis including descriptive statistics and hypothesis testing. It also generates publication-ready APA format reports. To learn about the data format required and test this calculator, .
Two-Sample Z-Test is a statistical test used to determine whether the means of two populations are significantly different from each other when both population standard deviations are known. It's particularly useful for large samples and when working with known population parameters.
Test Statistic:
Where:
Confidence Interval for Mean Difference:
Comparing the efficiency of two production lines with known process variations:
Z-statistic:
For two-tailed test:
Critical value at 5% significance level:
Since and , we fail to reject . There is no significant difference between the two production lines.
Cohen's d for two-sample z-test:
Interpretation guidelines:
Required sample size per group for equal sample sizes:
Where:
Reject if:
Standard format:
library(tidyverse)
# Sample data
group1 <- c(95.2, 94.8, 96.1, 95.7, 94.9, 95.5, 95.8, 95.3)
group2 <- c(93.8, 93.5, 94.2, 93.9, 93.6, 94.1, 93.7, 94.0)
# Known population standard deviations
pop_sd1 <- 4.0
pop_sd2 <- 3.8
# Calculate statistics
n1 <- length(group1)
n2 <- length(group2)
mean1 <- mean(group1)
mean2 <- mean(group2)
# Calculate standard error
se <- sqrt((pop_sd1^2/n1) + (pop_sd2^2/n2))
# Calculate z-statistic
z_stat <- (mean1 - mean2) / se
# Calculate two-tailed p-value
p_value <- 2 * (1 - pnorm(abs(z_stat)))
# Calculate 95% confidence interval
alpha <- 0.05
z_critical <- qnorm(1 - alpha/2)
ci_lower <- (mean1 - mean2) - z_critical * se
ci_upper <- (mean1 - mean2) + z_critical * se
# Calculate effect size (Cohen's d)
pooled_sd <- sqrt((pop_sd1^2 + pop_sd2^2) / 2)
cohens_d <- (mean1 - mean2) / pooled_sd
# Print results
cat("Two-Sample Z-Test Results\n")
cat("========================\n")
cat(sprintf("Group 1 Mean: %.2f (n=%d, σ=%.1f)\n", mean1, n1, pop_sd1))
cat(sprintf("Group 2 Mean: %.2f (n=%d, σ=%.1f)\n", mean2, n2, pop_sd2))
cat(sprintf("Mean Difference: %.2f\n", mean1 - mean2))
cat(sprintf("Standard Error: %.3f\n", se))
cat(sprintf("Z-statistic: %.3f\n", z_stat))
cat(sprintf("P-value (two-tailed): %.4f\n", p_value))
cat(sprintf("95%% CI: [%.2f, %.2f]\n", ci_lower, ci_upper))
cat(sprintf("Cohen's d: %.3f\n", cohens_d))import numpy as np
from scipy import stats
# Sample data
group1 = np.array([95.2, 94.8, 96.1, 95.7, 94.9, 95.5, 95.8, 95.3])
group2 = np.array([93.8, 93.5, 94.2, 93.9, 93.6, 94.1, 93.7, 94.0])
# Known population standard deviations
pop_sd1 = 4.0
pop_sd2 = 3.8
# Calculate statistics
n1 = len(group1)
n2 = len(group2)
mean1 = np.mean(group1)
mean2 = np.mean(group2)
# Calculate standard error
se = np.sqrt((pop_sd1**2 / n1) + (pop_sd2**2 / n2))
# Calculate z-statistic
z_stat = (mean1 - mean2) / se
# Calculate two-tailed p-value
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
# Calculate 95% confidence interval
alpha = 0.05
z_critical = stats.norm.ppf(1 - alpha/2)
ci_lower = (mean1 - mean2) - z_critical * se
ci_upper = (mean1 - mean2) + z_critical * se
# Calculate effect size (Cohen's d)
pooled_sd = np.sqrt((pop_sd1**2 + pop_sd2**2) / 2)
cohens_d = (mean1 - mean2) / pooled_sd
# Print results
print("Two-Sample Z-Test Results")
print("========================")
print(f"Group 1 Mean: {mean1:.2f} (n={n1}, σ={pop_sd1:.1f})")
print(f"Group 2 Mean: {mean2:.2f} (n={n2}, σ={pop_sd2:.1f})")
print(f"Mean Difference: {mean1 - mean2:.2f}")
print(f"Standard Error: {se:.3f}")
print(f"Z-statistic: {z_stat:.3f}")
print(f"P-value (two-tailed): {p_value:.4f}")
print(f"95% CI: [{ci_lower:.2f}, {ci_upper:.2f}]")
print(f"Cohen's d: {cohens_d:.3f}")
# Visualization
import matplotlib.pyplot as plt
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
# Box plot comparison
ax1.boxplot([group1, group2], labels=['Group 1', 'Group 2'])
ax1.set_ylabel('Values')
ax1.set_title('Group Comparison')
ax1.grid(True, alpha=0.3)
# Z-distribution with test statistic
x = np.linspace(-4, 4, 1000)
y = stats.norm.pdf(x)
ax2.plot(x, y, 'b-', linewidth=2, label='Standard Normal')
ax2.axvline(z_stat, color='r', linestyle='--', linewidth=2, label=f'Z = {z_stat:.3f}')
ax2.axvline(-z_critical, color='g', linestyle=':', linewidth=2, label=f'Critical values (±{z_critical:.3f})')
ax2.axvline(z_critical, color='g', linestyle=':', linewidth=2)
ax2.fill_between(x[x <= -z_critical], y[x <= -z_critical], alpha=0.3, color='red')
ax2.fill_between(x[x >= z_critical], y[x >= z_critical], alpha=0.3, color='red')
ax2.set_xlabel('Z-score')
ax2.set_ylabel('Probability Density')
ax2.set_title('Z-Test Distribution')
ax2.legend()
ax2.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()