# Silence messages/warnings across the doc
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
library(sf)
library(sp)
library(spdep)
library(ggplot2)
# If you ever see s2/geodesic messages for sf, you can disable s2 for this doc:
# sf::sf_use_s2(FALSE)
A spatial correlogram summarizes how spatial autocorrelation changes with distance. We compute a statistic (e.g., Moran’s I or Geary’s C) for pairs of points in successive distance bins (rings), then plot the statistic against the bin midpoints. This idea is closely related to a semivariogram, which also examines how similarity (or dissimilarity) changes with distance. The key difference is in the statistic being plotted:
Both correlograms and semivariograms therefore use distance bands to summarize spatial dependence. Geary’s C is especially similar to a semivariogram in that higher values indicate greater dissimilarity, whereas Moran’s I is more directly comparable to a correlation coefficient.
Note on dispersion:
One especially useful feature of Moran’s I is that it can take negative values, which indicate dispersion (sometimes called over-dispersion or checkerboard patterns). In this case, nearby observations tend to be more dissimilar than expected under randomness, reflecting a regular spacing of values. This is something that the semivariogram (always non-negative, variance-like) cannot show directly, and while Geary’s C values greater than 1 also reflect dissimilarity, they are less straightforward to interpret in terms of clear dispersion. In practice, Moran’s I is therefore often the easiest tool for diagnosing whether a spatial pattern is clustered, random, or dispersed.
The three examples below visualize dispersion (I ≈
−1), no spatial autocorrelation (I ≈ 0), and
clustering (I ≈ +1) on a simple 5×5 lattice.
Left panel: the grid (white = −1, dark blue = +1).
Right panel: Moran scatterplot with counts at each
unique point to avoid overlapping labels. The dashed line’s slope equals
Moran’s I and is labeled near the center.
# 5×5 checkerboard with alternating +/-1
nr <- 5; nc <- 5
grid_vals <- outer(1:nr, 1:nc, function(i, j) ifelse((i + j) %% 2 == 0, 1, -1))
z <- as.numeric(t(grid_vals))
# Rook neighbors & weights
nb <- spdep::cell2nb(nr, nc, type = "rook", torus = FALSE)
lw <- spdep::nb2listw(nb, style = "W", zero.policy = TRUE)
# Moran's I & spatial lag
mi <- spdep::moran.test(z, lw, zero.policy = TRUE)
lagz <- spdep::lag.listw(lw, z, zero.policy = TRUE)
# Standardized coords for Moran scatterplot
zs <- as.numeric(scale(z))
lz_s <- as.numeric(scale(lagz))
# Collapse duplicates and count multiplicities
zr <- round(zs, 4); lr <- round(lz_s, 4)
df <- data.frame(z = zr, l = lr)
counts <- aggregate(list(n = rep(1, nrow(df))), by = list(z = df$z, l = df$l), FUN = sum)
op <- par(mfrow = c(1, 2), mar = c(4, 4, 3, 1))
# (1) Grid
image(t(apply(grid_vals, 2, rev)), axes = FALSE, col = c("white", "#08306b"))
box()
title(sprintf("Dispersion (Moran's I ≈ %.2f)", mi$estimate[1]))
# (2) Moran scatterplot with counts
plot(zs, lz_s, xlab = "Standardized z", ylab = "Spatial lag of z",
main = "Moran scatterplot", xlim = c(-1.2, 1.2), ylim = c(-1.2, 1.2), type = "n")
abline(h = 0, v = 0, lty = 2)
fit <- lm(lz_s ~ zs)
abline(fit, lty = 2, lwd = 2)
text(0.1, -0.1, sprintf("Moran's I = %.2f", mi$estimate[1]), cex = 0.9)
points(counts$z, counts$l, pch = 16, cex = 1.2)
text(counts$z, counts$l, labels = counts$n, pos = 3, cex = 0.9)
par(op) # restore original settings; not strictly needed in Rmd, but prevents layout carry-over in scripts
set.seed(123) # change to try other random layouts
nr <- 5; nc <- 5
grid_vals <- matrix(sample(c(-1, 1), nr*nc, replace = TRUE), nrow = nr, ncol = nc)
z <- as.numeric(t(grid_vals))
nb <- spdep::cell2nb(nr, nc, type = "rook", torus = FALSE)
lw <- spdep::nb2listw(nb, style = "W", zero.policy = TRUE)
mi <- spdep::moran.test(z, lw, zero.policy = TRUE)
lagz <- spdep::lag.listw(lw, z, zero.policy = TRUE)
zs <- as.numeric(scale(z))
lz_s <- as.numeric(scale(lagz))
zr <- round(zs, 4); lr <- round(lz_s, 4)
df <- data.frame(z = zr, l = lr)
counts <- aggregate(list(n = rep(1, nrow(df))), by = list(z = df$z, l = df$l), FUN = sum)
op <- par(mfrow = c(1, 2), mar = c(4, 4, 3, 1))
image(t(apply(grid_vals, 2, rev)), axes = FALSE, col = c("white", "#08306b"))
box()
title(sprintf("Random (Moran's I ≈ %.2f)", mi$estimate[1]))
plot(zs, lz_s, xlab = "Standardized z", ylab = "Spatial lag of z",
main = "Moran scatterplot", xlim = c(-1.2, 1.2), ylim = c(-1.2, 1.2), type = "n")
abline(h = 0, v = 0, lty = 2)
fit <- lm(lz_s ~ zs)
abline(fit, lty = 2, lwd = 2)
text(0.1, 0.1, sprintf("Moran's I = %.2f", mi$estimate[1]), cex = 0.9)
points(counts$z, counts$l, pch = 16, cex = 1.2)
text(counts$z, counts$l, labels = counts$n, pos = 3, cex = 0.9)
par(op) # restore original settings; not strictly needed in Rmd, but prevents layout carry-over in scripts
nr <- 5; nc <- 5
grid_vals <- matrix(-1, nrow = nr, ncol = nc)
grid_vals[1:3, ] <- 1 # large +1 block above, -1 block below
z <- as.numeric(t(grid_vals))
nb <- spdep::cell2nb(nr, nc, type = "rook", torus = FALSE)
lw <- spdep::nb2listw(nb, style = "W", zero.policy = TRUE)
mi <- spdep::moran.test(z, lw, zero.policy = TRUE)
lagz <- spdep::lag.listw(lw, z, zero.policy = TRUE)
zs <- as.numeric(scale(z))
lz_s <- as.numeric(scale(lagz))
zr <- round(zs, 4); lr <- round(lz_s, 4)
df <- data.frame(z = zr, l = lr)
counts <- aggregate(list(n = rep(1, nrow(df))), by = list(z = df$z, l = df$l), FUN = sum)
op <- par(mfrow = c(1, 2), mar = c(4, 4, 3, 1))
image(t(apply(grid_vals, 2, rev)), axes = FALSE, col = c("white", "#08306b"))
box()
title(sprintf("Clustering (Moran's I ≈ %.2f)", mi$estimate[1]))
plot(zs, lz_s, xlab = "Standardized z", ylab = "Spatial lag of z",
main = "Moran scatterplot", xlim = c(-1.2, 1.2), ylim = c(-1.2, 1.2), type = "n")
abline(h = 0, v = 0, lty = 2)
fit <- lm(lz_s ~ zs)
abline(fit, lty = 2, lwd = 2)
text(0.1, 0.1, sprintf("Moran's I = %.2f", mi$estimate[1]), cex = 0.9)
points(counts$z, counts$l, pch = 16, cex = 1.2)
text(counts$z, counts$l, labels = counts$n, pos = 3, cex = 0.9)
par(op) # restore original settings; not strictly needed in Rmd, but prevents layout carry-over in scripts
Why “Standardized z”?
The Moran scatterplot convention is to use standardized values (mean 0, variance 1) on both axes so results are comparable across datasets.
# Simulated, schematic curves for teaching/visualization only
# Distance bins (arbitrary units, e.g., meters)
d <- seq(0, 3000, by = 250)
# Correlogram-like curve (bounded, correlation-style; wiggles decay to ~0)
set.seed(1)
moran_I <- cos(d/800) * exp(-d/2000) + rnorm(length(d), 0, 0.05)
# Semivariogram-like curve (variance-style; rises to a sill ≈ 1)
semivar <- 1 - exp(-d/800) + rnorm(length(d), 0, 0.02)
op <- par(mfrow = c(1, 2), mar = c(4, 4, 3, 1))
# Correlogram panel
plot(d, moran_I, type = "b", pch = 16, ylim = c(-1, 1),
xlab = "Distance", ylab = "Moran's I",
main = "Illustrative Correlogram")
abline(h = 0, lty = 2)
# Semivariogram panel
plot(d, semivar, type = "b", pch = 16, ylim = c(0, 1.2),
xlab = "Distance", ylab = "Semivariance",
main = "Illustrative Semivariogram")
abline(h = 1, lty = 2)
Illustrative (simulated) examples. LEFT: a correlogram (Moran’s I) oscillates around 0 and decays toward 0 with increasing distance (dashed line at 0). RIGHT: a semivariogram starts near 0 and rises toward a sill (here ≈1; dashed line), reflecting increasing dissimilarity with distance. Both are shown over distance bins; shapes are schematic and not fitted to data.
par(op) # restore original settings; not strictly needed in Rmd, but prevents layout carry-over in scripts
Note: These curves are simulated to highlight interpretation, not estimated from real data.
- Correlogram (left): dimensionless, bounded statistic (Moran’s I), typically decays toward 0 as spatial autocorrelation weakens with distance.
- Semivariogram (right): variance-like, non-negative statistic that usually increases with distance and levels off at a sill near the overall variance.
# 'meuse' ships with the {sp} package
data(meuse, package = "sp")
meuse_sf <- st_as_sf(meuse, coords = c("x","y"), crs = 28992) # RD Amersfoort
summary(meuse_sf$zinc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 113.0 198.0 326.0 469.7 674.5 1839.0
If your data are long/lat (EPSG:4326), project to meters first:
# meuse_sf <- st_transform(meuse_sf, 32633) # Example: UTM zone 33N
Choose a max distance and number of bins (equal width).
max_distance <- 3000 # meters
n_bins <- 12
edges <- seq(0, max_distance, length.out = n_bins + 1)
bins <- data.frame(
bin_id = seq_len(n_bins),
d_min = edges[-length(edges)],
d_max = edges[-1]
)
bins
## bin_id d_min d_max
## 1 1 0 250
## 2 2 250 500
## 3 3 500 750
## 4 4 750 1000
## 5 5 1000 1250
## 6 6 1250 1500
## 7 7 1500 1750
## 8 8 1750 2000
## 9 9 2000 2250
## 10 10 2250 2500
## 11 11 2500 2750
## 12 12 2750 3000
Here we define how to treat pairs of points within a given distance bin: if two points fall inside the same ring, we code them as neighbors (1); if they fall outside the ring (across bins), we code them as not neighbors (0). This creates a binary weights matrix for each ring.
# Build binary weights for a distance ring [d_min, d_max)
ring_listw <- function(coords, d_min, d_max, zero_ok = TRUE) {
nb <- spdep::dnearneigh(coords, d1 = d_min, d2 = d_max, longlat = FALSE)
lw <- spdep::nb2listw(nb, style = "B", zero.policy = zero_ok)
list(nb = nb, lw = lw)
}
moran_ring <- function(x, coords, d_min, d_max, nsim = 499, zero_ok = TRUE) {
w <- ring_listw(coords, d_min, d_max, zero_ok)
if (sum(unlist(w$lw$weights), na.rm = TRUE) == 0) {
return(list(I = NA_real_, p_mc = NA_real_, n_links = 0))
}
mc <- spdep::moran.mc(x, w$lw, nsim = nsim, zero.policy = zero_ok, alternative = "two.sided")
list(I = as.numeric(mc$statistic), p_mc = mc$p.value,
n_links = sum(unlist(w$lw$weights)))
}
coords <- st_coordinates(meuse_sf)
x <- meuse_sf$zinc
set.seed(123)
res_I <- do.call(rbind, lapply(seq_len(nrow(bins)), function(i) {
b <- bins[i,]
r <- moran_ring(x, coords, b$d_min, b$d_max, nsim = 499, zero_ok = TRUE)
data.frame(bin_id = b$bin_id, d_min = b$d_min, d_max = b$d_max,
I = r$I, p_mc = r$p_mc, n_links = r$n_links)
}))
res_I
## bin_id d_min d_max I p_mc n_links
## 1 1 0 250 0.39127989 0.000 988
## 2 2 250 500 0.13933742 0.000 2214
## 3 3 500 750 -0.05362660 0.036 2634
## 4 4 750 1000 -0.15226589 0.004 2682
## 5 5 1000 1250 -0.13342358 0.004 2380
## 6 6 1250 1500 -0.01373099 0.820 2114
## 7 7 1500 1750 0.02022258 0.360 1952
## 8 8 1750 2000 0.11461365 0.000 1776
## 9 9 2000 2250 -0.01550013 0.804 1478
## 10 10 2250 2500 -0.09088637 0.020 1224
## 11 11 2500 2750 -0.06786426 0.080 1128
## 12 12 2750 3000 -0.01714103 0.764 926
res_I$mid <- (res_I$d_min + res_I$d_max)/2
ggplot(res_I, aes(mid, I)) +
geom_hline(yintercept = 0, linetype = 2) +
geom_point() + geom_line() +
labs(x = "Distance (bin mid, m)", y = "Moran's I",
title = "Moran's I Correlogram (meuse: zinc)") +
theme_minimal()
geary_ring <- function(x, coords, d_min, d_max, nsim = 499, zero_ok = TRUE) {
w <- ring_listw(coords, d_min, d_max, zero_ok)
if (sum(unlist(w$lw$weights), na.rm = TRUE) == 0) {
return(list(C = NA_real_, p_mc = NA_real_, n_links = 0))
}
mc <- spdep::geary.mc(x, w$lw, nsim = nsim, zero.policy = zero_ok, alternative = "two.sided")
list(C = as.numeric(mc$statistic), p_mc = mc$p.value,
n_links = sum(unlist(w$lw$weights)))
}
res_C <- do.call(rbind, lapply(seq_len(nrow(bins)), function(i) {
b <- bins[i,]
r <- geary_ring(x, coords, b$d_min, b$d_max, nsim = 499, zero_ok = TRUE)
data.frame(bin_id = b$bin_id, d_min = b$d_min, d_max = b$d_max,
C = r$C, p_mc = r$p_mc, n_links = r$n_links)
}))
res_C
## bin_id d_min d_max C p_mc n_links
## 1 1 0 250 0.4746401 0.004 988
## 2 2 250 500 0.8107016 0.004 2214
## 3 3 500 750 1.0684984 0.212 2634
## 4 4 750 1000 1.1830636 0.000 2682
## 5 5 1000 1250 1.2664111 0.000 2380
## 6 6 1250 1500 1.1917233 0.016 2114
## 7 7 1500 1750 1.0711949 0.280 1952
## 8 8 1750 2000 0.9913117 0.892 1776
## 9 9 2000 2250 1.0716951 0.264 1478
## 10 10 2250 2500 1.0481101 0.552 1224
## 11 11 2500 2750 0.8946940 0.968 1128
## 12 12 2750 3000 0.5845133 0.112 926
res_C$mid <- (res_C$d_min + res_C$d_max)/2
ggplot(res_C, aes(mid, C)) +
geom_hline(yintercept = 1, linetype = 2) +
geom_point() + geom_line() +
labs(x = "Distance (bin mid, m)", y = "Geary's C",
title = "Geary's C Correlogram (meuse: zinc)") +
theme_minimal()
# Next Step
For practice with your own CSV data (and side-by-side comparison of correlograms and semivariograms), see: