This calculator helps you compute the probabilities of a hypergeometric distribution given the parameters N (population size), K (success states), and n (number of draws). You can find the probability of values being equal to, less than, greater than, or between specific values. The distribution chart shows the probability mass function (PMF) of the hypergeometric distribution, which models sampling without replacement from a finite population.
Definition:The hypergeometric distribution describes the probability of obtaining exactly successes in draws without replacement from a finite population of size that contains exactly successes.
Where:
In this case:
Plugging these values into the formula:
Therefore, the probability of drawing exactly 3 blue marbles is about 23.8%.
library(tidyverse)
# Parameters
N <- 100 # population size
K <- 20 # success states in population
n <- 10 # number of draws
x1 <- 2 # lower comparison point
x2 <- 5 # upper comparison point
# calculate P(X = x1)
p_exact <- dhyper(x1, K, N-K, n)
print(str_glue("P(X = {x1}) = {round(p_exact, 4)}")) # P(X = 2) = 0.3182
# calculate P(X <= x1)
p_cumulative <- phyper(x1, K, N-K, n)
print(str_glue("P(X ≤ {x1}) = {round(p_cumulative, 4)}")) # P(X ≤ 2) = 0.6812
# calculate P(x1 <= X <= x2)
p_between <- phyper(x2, K, N-K, n) - phyper(x1-1, K, N-K, n)
print(str_glue("P({x1} ≤ X ≤ {x2}) = {round(p_between, 4)}")) # P(2 ≤ X ≤ 5) = 0.633
# mean and variance
mean <- n * (K/N)
variance <- n * (K/N) * ((N-K)/N) * ((N-n)/(N-1))
print(str_glue("Mean: {round(mean, 4)}")) # Mean: 2
print(str_glue("Variance: {round(variance, 4)}")) # Variance: 1.4545
# plot the PMF
x_values <- 0:min(n, K)
pmf <- dhyper(x_values, K, N-K, n)
pmf_data <- tibble(x = x_values, probability = pmf)
ggplot(pmf_data, aes(x = x, y = probability)) +
geom_col(fill = "blue", alpha = 0.7) +
geom_vline(xintercept = c(x1, x2), linetype = "dashed", color = "red") +
geom_rect(data = subset(pmf_data, x >= x1 & x <= x2),
aes(xmin = x - 0.4, xmax = x + 0.4, ymin = 0, ymax = probability),
fill = "red", alpha = 0.3) +
labs(title = str_glue("Hypergeometric Distribution PMF (N={N}, K={K}, n={n})"),
subtitle = str_glue("P({x1} ≤ X ≤ {x2}) = {round(p_between, 4)}"),
x = "Number of Successes",
y = "Probability") +
theme_minimal()import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
# Parameters
N = 100 # population size
K = 20 # success states in population
n = 10 # number of draws
x1 = 2 # lower comparison point
x2 = 5 # upper comparison point
# Calculate P(X = x1)
p_exact = stats.hypergeom.pmf(x1, N, K, n)
print(f"P(X = {x1}) = {p_exact:.4f}")
# Calculate P(X <= x1)
p_cumulative = stats.hypergeom.cdf(x1, N, K, n)
print(f"P(X ≤ {x1}) = {p_cumulative:.4f}")
# Calculate P(x1 <= X <= x2)
p_between = stats.hypergeom.cdf(x2, N, K, n) - stats.hypergeom.cdf(x1-1, N, K, n)
print(f"P({x1} ≤ X ≤ {x2}) = {p_between:.4f}")
# Calculate mean and variance
mean = n * (K/N)
variance = n * (K/N) * ((N-K)/N) * ((N-n)/(N-1))
print(f"Mean: {mean:.4f}")
print(f"Variance: {variance:.4f}")
# Create PMF plot
x_values = np.arange(0, min(n, K) + 1)
pmf = stats.hypergeom.pmf(x_values, N, K, n)
plt.figure(figsize=(10, 6))
plt.bar(x_values, pmf, alpha=0.7, color='blue')
# Highlight the area between x1 and x2
highlight_x = x_values[(x_values >= x1) & (x_values <= x2)]
highlight_pmf = pmf[(x_values >= x1) & (x_values <= x2)]
plt.bar(highlight_x, highlight_pmf, alpha=0.5, color='red')
# Add vertical lines at x1 and x2
plt.axvline(x=x1, color='red', linestyle='--', alpha=0.7)
plt.axvline(x=x2, color='red', linestyle='--', alpha=0.7)
# Add labels and title
plt.title(f'Hypergeometric Distribution PMF (N={N}, K={K}, n={n})')
plt.xlabel('Number of Successes')
plt.ylabel('Probability')
plt.grid(True, alpha=0.3)
plt.xticks(x_values)
plt.tight_layout()
plt.show()This calculator helps you compute the probabilities of a hypergeometric distribution given the parameters N (population size), K (success states), and n (number of draws). You can find the probability of values being equal to, less than, greater than, or between specific values. The distribution chart shows the probability mass function (PMF) of the hypergeometric distribution, which models sampling without replacement from a finite population.
Definition:The hypergeometric distribution describes the probability of obtaining exactly successes in draws without replacement from a finite population of size that contains exactly successes.
Where:
In this case:
Plugging these values into the formula:
Therefore, the probability of drawing exactly 3 blue marbles is about 23.8%.
library(tidyverse)
# Parameters
N <- 100 # population size
K <- 20 # success states in population
n <- 10 # number of draws
x1 <- 2 # lower comparison point
x2 <- 5 # upper comparison point
# calculate P(X = x1)
p_exact <- dhyper(x1, K, N-K, n)
print(str_glue("P(X = {x1}) = {round(p_exact, 4)}")) # P(X = 2) = 0.3182
# calculate P(X <= x1)
p_cumulative <- phyper(x1, K, N-K, n)
print(str_glue("P(X ≤ {x1}) = {round(p_cumulative, 4)}")) # P(X ≤ 2) = 0.6812
# calculate P(x1 <= X <= x2)
p_between <- phyper(x2, K, N-K, n) - phyper(x1-1, K, N-K, n)
print(str_glue("P({x1} ≤ X ≤ {x2}) = {round(p_between, 4)}")) # P(2 ≤ X ≤ 5) = 0.633
# mean and variance
mean <- n * (K/N)
variance <- n * (K/N) * ((N-K)/N) * ((N-n)/(N-1))
print(str_glue("Mean: {round(mean, 4)}")) # Mean: 2
print(str_glue("Variance: {round(variance, 4)}")) # Variance: 1.4545
# plot the PMF
x_values <- 0:min(n, K)
pmf <- dhyper(x_values, K, N-K, n)
pmf_data <- tibble(x = x_values, probability = pmf)
ggplot(pmf_data, aes(x = x, y = probability)) +
geom_col(fill = "blue", alpha = 0.7) +
geom_vline(xintercept = c(x1, x2), linetype = "dashed", color = "red") +
geom_rect(data = subset(pmf_data, x >= x1 & x <= x2),
aes(xmin = x - 0.4, xmax = x + 0.4, ymin = 0, ymax = probability),
fill = "red", alpha = 0.3) +
labs(title = str_glue("Hypergeometric Distribution PMF (N={N}, K={K}, n={n})"),
subtitle = str_glue("P({x1} ≤ X ≤ {x2}) = {round(p_between, 4)}"),
x = "Number of Successes",
y = "Probability") +
theme_minimal()import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
# Parameters
N = 100 # population size
K = 20 # success states in population
n = 10 # number of draws
x1 = 2 # lower comparison point
x2 = 5 # upper comparison point
# Calculate P(X = x1)
p_exact = stats.hypergeom.pmf(x1, N, K, n)
print(f"P(X = {x1}) = {p_exact:.4f}")
# Calculate P(X <= x1)
p_cumulative = stats.hypergeom.cdf(x1, N, K, n)
print(f"P(X ≤ {x1}) = {p_cumulative:.4f}")
# Calculate P(x1 <= X <= x2)
p_between = stats.hypergeom.cdf(x2, N, K, n) - stats.hypergeom.cdf(x1-1, N, K, n)
print(f"P({x1} ≤ X ≤ {x2}) = {p_between:.4f}")
# Calculate mean and variance
mean = n * (K/N)
variance = n * (K/N) * ((N-K)/N) * ((N-n)/(N-1))
print(f"Mean: {mean:.4f}")
print(f"Variance: {variance:.4f}")
# Create PMF plot
x_values = np.arange(0, min(n, K) + 1)
pmf = stats.hypergeom.pmf(x_values, N, K, n)
plt.figure(figsize=(10, 6))
plt.bar(x_values, pmf, alpha=0.7, color='blue')
# Highlight the area between x1 and x2
highlight_x = x_values[(x_values >= x1) & (x_values <= x2)]
highlight_pmf = pmf[(x_values >= x1) & (x_values <= x2)]
plt.bar(highlight_x, highlight_pmf, alpha=0.5, color='red')
# Add vertical lines at x1 and x2
plt.axvline(x=x1, color='red', linestyle='--', alpha=0.7)
plt.axvline(x=x2, color='red', linestyle='--', alpha=0.7)
# Add labels and title
plt.title(f'Hypergeometric Distribution PMF (N={N}, K={K}, n={n})')
plt.xlabel('Number of Successes')
plt.ylabel('Probability')
plt.grid(True, alpha=0.3)
plt.xticks(x_values)
plt.tight_layout()
plt.show()