# Silence messages/warnings across the doc
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
library(sf)
library(sp)
library(spdep)
library(ggplot2)

# If you ever see s2/geodesic messages for sf, you can disable s2 for this doc:
# sf::sf_use_s2(FALSE)

1) What is a spatial correlogram?

A spatial correlogram summarizes how spatial autocorrelation changes with distance. We compute a statistic (e.g., Moran’s I or Geary’s C) for pairs of points in successive distance bins (rings), then plot the statistic against the bin midpoints. This idea is closely related to a semivariogram, which also examines how similarity (or dissimilarity) changes with distance. The key difference is in the statistic being plotted:

Both correlograms and semivariograms therefore use distance bands to summarize spatial dependence. Geary’s C is especially similar to a semivariogram in that higher values indicate greater dissimilarity, whereas Moran’s I is more directly comparable to a correlation coefficient.

Note on dispersion:
One especially useful feature of Moran’s I is that it can take negative values, which indicate dispersion (sometimes called over-dispersion or checkerboard patterns). In this case, nearby observations tend to be more dissimilar than expected under randomness, reflecting a regular spacing of values. This is something that the semivariogram (always non-negative, variance-like) cannot show directly, and while Geary’s C values greater than 1 also reflect dissimilarity, they are less straightforward to interpret in terms of clear dispersion. In practice, Moran’s I is therefore often the easiest tool for diagnosing whether a spatial pattern is clustered, random, or dispersed.

Toy lattice examples of Moran’s I

The three examples below visualize dispersion (I ≈ −1), no spatial autocorrelation (I ≈ 0), and clustering (I ≈ +1) on a simple 5×5 lattice.
Left panel: the grid (white = −1, dark blue = +1).
Right panel: Moran scatterplot with counts at each unique point to avoid overlapping labels. The dashed line’s slope equals Moran’s I and is labeled near the center.

Dispersion (Moran’s I ≈ −1)

# 5×5 checkerboard with alternating +/-1
nr <- 5; nc <- 5
grid_vals <- outer(1:nr, 1:nc, function(i, j) ifelse((i + j) %% 2 == 0, 1, -1))
z <- as.numeric(t(grid_vals))

# Rook neighbors & weights
nb <- spdep::cell2nb(nr, nc, type = "rook", torus = FALSE)
lw <- spdep::nb2listw(nb, style = "W", zero.policy = TRUE)

# Moran's I & spatial lag
mi   <- spdep::moran.test(z, lw, zero.policy = TRUE)
lagz <- spdep::lag.listw(lw, z, zero.policy = TRUE)

# Standardized coords for Moran scatterplot
zs   <- as.numeric(scale(z))
lz_s <- as.numeric(scale(lagz))

# Collapse duplicates and count multiplicities
zr <- round(zs, 4); lr <- round(lz_s, 4)
df <- data.frame(z = zr, l = lr)
counts <- aggregate(list(n = rep(1, nrow(df))), by = list(z = df$z, l = df$l), FUN = sum)

op <- par(mfrow = c(1, 2), mar = c(4, 4, 3, 1))

# (1) Grid
image(t(apply(grid_vals, 2, rev)), axes = FALSE, col = c("white", "#08306b"))
box()
title(sprintf("Dispersion (Moran's I ≈ %.2f)", mi$estimate[1]))

# (2) Moran scatterplot with counts
plot(zs, lz_s, xlab = "Standardized z", ylab = "Spatial lag of z",
     main = "Moran scatterplot", xlim = c(-1.2, 1.2), ylim = c(-1.2, 1.2), type = "n")
abline(h = 0, v = 0, lty = 2)
fit <- lm(lz_s ~ zs)
abline(fit, lty = 2, lwd = 2)
text(0.1, -0.1, sprintf("Moran's I = %.2f", mi$estimate[1]), cex = 0.9)
points(counts$z, counts$l, pch = 16, cex = 1.2)
text(counts$z, counts$l, labels = counts$n, pos = 3, cex = 0.9)

par(op) # restore original settings; not strictly needed in Rmd, but prevents layout carry-over in scripts

No spatial autocorrelation (Moran’s I ≈ 0)

set.seed(123)  # change to try other random layouts
nr <- 5; nc <- 5
grid_vals <- matrix(sample(c(-1, 1), nr*nc, replace = TRUE), nrow = nr, ncol = nc)
z <- as.numeric(t(grid_vals))

nb <- spdep::cell2nb(nr, nc, type = "rook", torus = FALSE)
lw <- spdep::nb2listw(nb, style = "W", zero.policy = TRUE)

mi   <- spdep::moran.test(z, lw, zero.policy = TRUE)
lagz <- spdep::lag.listw(lw, z, zero.policy = TRUE)

zs   <- as.numeric(scale(z))
lz_s <- as.numeric(scale(lagz))

zr <- round(zs, 4); lr <- round(lz_s, 4)
df <- data.frame(z = zr, l = lr)
counts <- aggregate(list(n = rep(1, nrow(df))), by = list(z = df$z, l = df$l), FUN = sum)

op <- par(mfrow = c(1, 2), mar = c(4, 4, 3, 1))

image(t(apply(grid_vals, 2, rev)), axes = FALSE, col = c("white", "#08306b"))
box()
title(sprintf("Random (Moran's I ≈ %.2f)", mi$estimate[1]))

plot(zs, lz_s, xlab = "Standardized z", ylab = "Spatial lag of z",
     main = "Moran scatterplot", xlim = c(-1.2, 1.2), ylim = c(-1.2, 1.2), type = "n")
abline(h = 0, v = 0, lty = 2)
fit <- lm(lz_s ~ zs)
abline(fit, lty = 2, lwd = 2)
text(0.1, 0.1, sprintf("Moran's I = %.2f", mi$estimate[1]), cex = 0.9)
points(counts$z, counts$l, pch = 16, cex = 1.2)
text(counts$z, counts$l, labels = counts$n, pos = 3, cex = 0.9)

par(op) # restore original settings; not strictly needed in Rmd, but prevents layout carry-over in scripts

Clustering (Moran’s I ≈ +1)

nr <- 5; nc <- 5
grid_vals <- matrix(-1, nrow = nr, ncol = nc)
grid_vals[1:3, ] <- 1  # large +1 block above, -1 block below
z <- as.numeric(t(grid_vals))

nb <- spdep::cell2nb(nr, nc, type = "rook", torus = FALSE)
lw <- spdep::nb2listw(nb, style = "W", zero.policy = TRUE)

mi   <- spdep::moran.test(z, lw, zero.policy = TRUE)
lagz <- spdep::lag.listw(lw, z, zero.policy = TRUE)

zs   <- as.numeric(scale(z))
lz_s <- as.numeric(scale(lagz))

zr <- round(zs, 4); lr <- round(lz_s, 4)
df <- data.frame(z = zr, l = lr)
counts <- aggregate(list(n = rep(1, nrow(df))), by = list(z = df$z, l = df$l), FUN = sum)

op <- par(mfrow = c(1, 2), mar = c(4, 4, 3, 1))

image(t(apply(grid_vals, 2, rev)), axes = FALSE, col = c("white", "#08306b"))
box()
title(sprintf("Clustering (Moran's I ≈ %.2f)", mi$estimate[1]))

plot(zs, lz_s, xlab = "Standardized z", ylab = "Spatial lag of z",
     main = "Moran scatterplot", xlim = c(-1.2, 1.2), ylim = c(-1.2, 1.2), type = "n")
abline(h = 0, v = 0, lty = 2)
fit <- lm(lz_s ~ zs)
abline(fit, lty = 2, lwd = 2)
text(0.1, 0.1, sprintf("Moran's I = %.2f", mi$estimate[1]), cex = 0.9)
points(counts$z, counts$l, pch = 16, cex = 1.2)
text(counts$z, counts$l, labels = counts$n, pos = 3, cex = 0.9)

par(op) # restore original settings; not strictly needed in Rmd, but prevents layout carry-over in scripts

Why “Standardized z”?
The Moran scatterplot convention is to use standardized values (mean 0, variance 1) on both axes so results are comparable across datasets.

Illustrative comparison: correlogram vs. semivariogram

# Simulated, schematic curves for teaching/visualization only

# Distance bins (arbitrary units, e.g., meters)
d <- seq(0, 3000, by = 250)

# Correlogram-like curve (bounded, correlation-style; wiggles decay to ~0)
set.seed(1)
moran_I <- cos(d/800) * exp(-d/2000) + rnorm(length(d), 0, 0.05)

# Semivariogram-like curve (variance-style; rises to a sill ≈ 1)
semivar <- 1 - exp(-d/800) + rnorm(length(d), 0, 0.02)

op <- par(mfrow = c(1, 2), mar = c(4, 4, 3, 1))

# Correlogram panel
plot(d, moran_I, type = "b", pch = 16, ylim = c(-1, 1),
     xlab = "Distance", ylab = "Moran's I",
     main = "Illustrative Correlogram")
abline(h = 0, lty = 2)

# Semivariogram panel
plot(d, semivar, type = "b", pch = 16, ylim = c(0, 1.2),
     xlab = "Distance", ylab = "Semivariance",
     main = "Illustrative Semivariogram")
abline(h = 1, lty = 2)
Illustrative (simulated) examples. LEFT: a correlogram (Moran’s I) oscillates around 0 and decays toward 0 with increasing distance (dashed line at 0). RIGHT: a semivariogram starts near 0 and rises toward a sill (here ≈1; dashed line), reflecting increasing dissimilarity with distance. Both are shown over distance bins; shapes are schematic and not fitted to data.

Illustrative (simulated) examples. LEFT: a correlogram (Moran’s I) oscillates around 0 and decays toward 0 with increasing distance (dashed line at 0). RIGHT: a semivariogram starts near 0 and rises toward a sill (here ≈1; dashed line), reflecting increasing dissimilarity with distance. Both are shown over distance bins; shapes are schematic and not fitted to data.

par(op) # restore original settings; not strictly needed in Rmd, but prevents layout carry-over in scripts

Note: These curves are simulated to highlight interpretation, not estimated from real data.
- Correlogram (left): dimensionless, bounded statistic (Moran’s I), typically decays toward 0 as spatial autocorrelation weakens with distance.
- Semivariogram (right): variance-like, non-negative statistic that usually increases with distance and levels off at a sill near the overall variance.

2) Example data (meuse)

# 'meuse' ships with the {sp} package
data(meuse, package = "sp")
meuse_sf <- st_as_sf(meuse, coords = c("x","y"), crs = 28992)  # RD Amersfoort
summary(meuse_sf$zinc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   113.0   198.0   326.0   469.7   674.5  1839.0

If your data are long/lat (EPSG:4326), project to meters first:

# meuse_sf <- st_transform(meuse_sf, 32633)  # Example: UTM zone 33N

3) Create distance bins

Choose a max distance and number of bins (equal width).

max_distance <- 3000   # meters
n_bins       <- 12
edges <- seq(0, max_distance, length.out = n_bins + 1)
bins  <- data.frame(
  bin_id = seq_len(n_bins),
  d_min  = edges[-length(edges)],
  d_max  = edges[-1]
)
bins
##    bin_id d_min d_max
## 1       1     0   250
## 2       2   250   500
## 3       3   500   750
## 4       4   750  1000
## 5       5  1000  1250
## 6       6  1250  1500
## 7       7  1500  1750
## 8       8  1750  2000
## 9       9  2000  2250
## 10     10  2250  2500
## 11     11  2500  2750
## 12     12  2750  3000

4) Helper to build binary weights for a ring

Here we define how to treat pairs of points within a given distance bin: if two points fall inside the same ring, we code them as neighbors (1); if they fall outside the ring (across bins), we code them as not neighbors (0). This creates a binary weights matrix for each ring.

# Build binary weights for a distance ring [d_min, d_max)
ring_listw <- function(coords, d_min, d_max, zero_ok = TRUE) {
  nb <- spdep::dnearneigh(coords, d1 = d_min, d2 = d_max, longlat = FALSE)
  lw <- spdep::nb2listw(nb, style = "B", zero.policy = zero_ok)
  list(nb = nb, lw = lw)
}

5) Moran’s I correlogram with permutation p-values

moran_ring <- function(x, coords, d_min, d_max, nsim = 499, zero_ok = TRUE) {
  w <- ring_listw(coords, d_min, d_max, zero_ok)
  if (sum(unlist(w$lw$weights), na.rm = TRUE) == 0) {
    return(list(I = NA_real_, p_mc = NA_real_, n_links = 0))
  }
  mc <- spdep::moran.mc(x, w$lw, nsim = nsim, zero.policy = zero_ok, alternative = "two.sided")
  list(I = as.numeric(mc$statistic), p_mc = mc$p.value,
       n_links = sum(unlist(w$lw$weights)))
}

coords <- st_coordinates(meuse_sf)
x <- meuse_sf$zinc
set.seed(123)

res_I <- do.call(rbind, lapply(seq_len(nrow(bins)), function(i) {
  b <- bins[i,]
  r <- moran_ring(x, coords, b$d_min, b$d_max, nsim = 499, zero_ok = TRUE)
  data.frame(bin_id = b$bin_id, d_min = b$d_min, d_max = b$d_max,
             I = r$I, p_mc = r$p_mc, n_links = r$n_links)
}))
res_I
##    bin_id d_min d_max           I  p_mc n_links
## 1       1     0   250  0.39127989 0.000     988
## 2       2   250   500  0.13933742 0.000    2214
## 3       3   500   750 -0.05362660 0.036    2634
## 4       4   750  1000 -0.15226589 0.004    2682
## 5       5  1000  1250 -0.13342358 0.004    2380
## 6       6  1250  1500 -0.01373099 0.820    2114
## 7       7  1500  1750  0.02022258 0.360    1952
## 8       8  1750  2000  0.11461365 0.000    1776
## 9       9  2000  2250 -0.01550013 0.804    1478
## 10     10  2250  2500 -0.09088637 0.020    1224
## 11     11  2500  2750 -0.06786426 0.080    1128
## 12     12  2750  3000 -0.01714103 0.764     926

Plot Moran’s I correlogram

res_I$mid <- (res_I$d_min + res_I$d_max)/2
ggplot(res_I, aes(mid, I)) +
  geom_hline(yintercept = 0, linetype = 2) +
  geom_point() + geom_line() +
  labs(x = "Distance (bin mid, m)", y = "Moran's I",
       title = "Moran's I Correlogram (meuse: zinc)") +
  theme_minimal()

6) Geary’s C correlogram (optional)

geary_ring <- function(x, coords, d_min, d_max, nsim = 499, zero_ok = TRUE) {
  w <- ring_listw(coords, d_min, d_max, zero_ok)
  if (sum(unlist(w$lw$weights), na.rm = TRUE) == 0) {
    return(list(C = NA_real_, p_mc = NA_real_, n_links = 0))
  }
  mc <- spdep::geary.mc(x, w$lw, nsim = nsim, zero.policy = zero_ok, alternative = "two.sided")
  list(C = as.numeric(mc$statistic), p_mc = mc$p.value,
       n_links = sum(unlist(w$lw$weights)))
}

res_C <- do.call(rbind, lapply(seq_len(nrow(bins)), function(i) {
  b <- bins[i,]
  r <- geary_ring(x, coords, b$d_min, b$d_max, nsim = 499, zero_ok = TRUE)
  data.frame(bin_id = b$bin_id, d_min = b$d_min, d_max = b$d_max,
             C = r$C, p_mc = r$p_mc, n_links = r$n_links)
}))
res_C
##    bin_id d_min d_max         C  p_mc n_links
## 1       1     0   250 0.4746401 0.004     988
## 2       2   250   500 0.8107016 0.004    2214
## 3       3   500   750 1.0684984 0.212    2634
## 4       4   750  1000 1.1830636 0.000    2682
## 5       5  1000  1250 1.2664111 0.000    2380
## 6       6  1250  1500 1.1917233 0.016    2114
## 7       7  1500  1750 1.0711949 0.280    1952
## 8       8  1750  2000 0.9913117 0.892    1776
## 9       9  2000  2250 1.0716951 0.264    1478
## 10     10  2250  2500 1.0481101 0.552    1224
## 11     11  2500  2750 0.8946940 0.968    1128
## 12     12  2750  3000 0.5845133 0.112     926

Plot Geary’s C correlogram

res_C$mid <- (res_C$d_min + res_C$d_max)/2
ggplot(res_C, aes(mid, C)) +
  geom_hline(yintercept = 1, linetype = 2) +
  geom_point() + geom_line() +
  labs(x = "Distance (bin mid, m)", y = "Geary's C",
       title = "Geary's C Correlogram (meuse: zinc)") +
  theme_minimal()

# Next Step

For practice with your own CSV data (and side-by-side comparison of correlograms and semivariograms), see:

8) References