Proxy geos update

I redid the usual proxy PUMAs crosswalk in the cwi package, for both county-based PUMAs (2021 ACS) and COG-based PUMAs (2022). This is a sanity check / reference of them.

library(dplyr)
library(purrr)
library(sf)
library(ggplot2)
ct_sf <- st_union(cwi::town_sf)

puma_sf <- list(county = 2021, cog = 2022) |>
  map(\(x) tigris::pumas(state = "09", year = x)) |>
  map(select, puma = matches("GEOID")) |>
  map(st_intersection, ct_sf) # cut to shorelines

reg_df <- tibble::enframe(cwi::regions, name = "region", value = "town") |>
  tidyr::unnest(town) |>
  filter(region %in% cwi::proxy_pumas$county$region | region %in% cwi::proxy_pumas$cog$region)

reg_sf <- reg_df |>
  inner_join(cwi::town_sf, by = c("town" = "name")) |>
  st_as_sf() |>
  group_by(region) |>
  summarise()
imap(puma_sf, function(sf, id) {
  proxy <- cwi::proxy_pumas[[id]]
  inner_join(sf, proxy, by = "puma") |>
    mutate(popwt_brk = cut(pop_weight, breaks = c(0, 0.25, 0.5, 0.75, 0.9, 1), include.lowest = TRUE)) |>
    ggplot() +
    geom_sf(aes(fill = popwt_brk), color = "gray20", linewidth = 0.2) +
    geom_sf(data = reg_sf |> filter(region %in% proxy$region), fill = NA, color = "black", linewidth = 0.6) +
    scale_fill_brewer(palette = "PuRd") +
    facet_wrap(vars(region)) +
    theme_void() +
    labs(title = stringr::str_glue("Proxies, {id}-based"))
})
$county


$cog

Extra towns added to each region’s proxies, i.e. these are towns in a puma that’s used in the proxy but not in the original region, with that puma’s population-based weight.

town_puma <- distinct(cwi::xwalk, town, puma_fips_cog)
inner_join(town_puma, cwi::proxy_pumas$cog, by = c("puma_fips_cog" = "puma")) |>
  left_join(reg_df |> mutate(in_reg = TRUE), by = c("region", "town")) |>
  tidyr::replace_na(list(in_reg = FALSE)) |>
  group_by(region, in_reg, pop_weight) |>
  summarise(towns = toString(sort(town))) |>
  filter(!in_reg) |>
  knitr::kable()
region in_reg pop_weight towns
Fairfield County FALSE 0.2579971 Ansonia, Beacon Falls, Derby, Naugatuck, Oxford, Seymour, Southbury
Fairfield County FALSE 0.8495731 Bridgewater, New Milford
Greater Hartford FALSE 0.1073474 Ashford, Brooklyn, Canterbury, Chaplin, Eastford, Hampton, Killingly, Plainfield, Pomfret, Putnam, Scotland, Sterling, Thompson, Union, Voluntown, Woodstock
Greater New Haven FALSE 0.3681606 Meriden, Wallingford
Greater Waterbury FALSE 0.1504269 Bethel, Brookfield, Danbury, New Fairfield, Newtown, Sherman
Greater Waterbury FALSE 0.1835735 Barkhamsted, Burlington, Canaan, Colebrook, Cornwall, Hartland, Harwinton, Kent, New Hartford, Norfolk, North Canaan, Salisbury, Sharon, Torrington, Winchester
Greater Waterbury FALSE 0.4411045 Ansonia, Derby, Seymour, Shelton
Greater Waterbury FALSE 0.5903079 Bristol, Plymouth
Hartford County FALSE 0.0528725 Andover, Bolton, Columbia, Coventry, Ellington, Hebron, Mansfield, Somers, Tolland, Willington
Hartford County FALSE 0.1008288 Barkhamsted, Canaan, Colebrook, Cornwall, Goshen, Harwinton, Kent, Litchfield, Morris, New Hartford, Norfolk, North Canaan, Roxbury, Salisbury, Sharon, Torrington, Warren, Washington, Winchester
Hartford County FALSE 0.3435833 Bethlehem, Cheshire, Middlebury, Plymouth, Prospect, Thomaston, Watertown, Wolcott, Woodbury
Hartford County FALSE 0.7847891 Vernon
Litchfield County FALSE 0.1504269 Bethel, Brookfield, Danbury, New Fairfield, Newtown, Sherman
Litchfield County FALSE 0.3069919 Bristol, Cheshire, Middlebury, Prospect, Wolcott
Litchfield County FALSE 0.8991712 Burlington, Hartland
Lower Naugatuck Valley FALSE 0.8753968 Southbury
Middlesex County FALSE 0.9427199 Lyme, Old Lyme
New Haven County FALSE 0.3492279 Bethlehem, Bristol, Plymouth, Thomaston, Watertown, Woodbury
New Haven County FALSE 0.7422221 Shelton
New London County FALSE 0.0239472 Ashford, Brooklyn, Canterbury, Chaplin, Eastford, Hampton, Killingly, Plainfield, Pomfret, Putnam, Scotland, Stafford, Sterling, Thompson, Union, Woodstock
New London County FALSE 0.0572801 Chester, Clinton, Cromwell, Deep River, Durham, East Haddam, East Hampton, Essex, Haddam, Killingworth, Middlefield, Middletown, Old Saybrook, Portland, Westbrook
New London County FALSE 0.8451581 Windham
Tolland County FALSE 0.1159020 Ashford, Brooklyn, Canterbury, Chaplin, Eastford, Hampton, Killingly, Plainfield, Pomfret, Putnam, Scotland, Sterling, Thompson, Voluntown, Woodstock
Tolland County FALSE 0.2152109 East Hartford, Manchester
Tolland County FALSE 0.9471275 Marlborough
Windham County FALSE 0.1548419 Bozrah, Colchester, Franklin, Griswold, Lebanon, Ledyard, Lisbon, Montville, North Stonington, Norwich, Preston, Salem, Sprague
Windham County FALSE 0.8601508 Stafford, Union, Voluntown

Cool that some of these fit better. Greater Hartford is almost exactly right—they had to steal Stafford for the Windham County-area PUMA to make it to 100k. The Valley is very close, and Southbury is similar enough to the other towns in the Valley. Greater Watebury is still weird.

Once there’s 2022 PUMS data, would be good to add a sanity check here to make sure numbers match closely enough btw ACS tables and PUMS-based estimates.