library(tidyverse)
library(sf)
library(here)
library(DT)
<- here("data/raw_data/v2/zonificacion/zonificacion_GAU/zonificacion_gaus.shp")
gau_boundaries_data_file <- here("data/raw_data/v2/zonificacion/zonificacion_GAU/nombres_gaus.csv")
gau_names_file <- here("data/raw_data/v2/zonificacion/zonificacion_GAU/poblacion_gaus.csv")
gau_population_file <- here("data/raw_data/v2/zonificacion/poblacion.csv") all_population_file
Large Urban Areas population CSV missing population counts
tabular data
missing data
importance: low
Status: ⚠️ active
Importance: 1 - low
Summary: The dataset contains missing population counts for XX Large Urban Areas in the CSV file poblacion_gaus.csv
in the zonificacion_GAU
folder, as well as in the poblacion.csv
file in the zonificacion
folder.
Expected Results: The population data for all Large Urban Areas should be available in the CSV file poblacion_gaus.csv
in the zonificacion_GAU
folder and/or in the poblacion.csv
file in the zonificacion
folder.
Steps to Reproduce
- Load Data
Load libraries and define data files.
Load the data and join the district names to the boundaries, as well as population from all population file and districts population file.
<- read_sf(gau_boundaries_data_file)
gau_boundaries <- gau_boundaries |>
gau_boundaries_spain_only filter(! grepl("FR|PT|externo", ID) )
<- read_delim(gau_names_file,
gau_names delim = "|", show_col_types = FALSE, name_repair = "unique_quiet")
<- read_delim(gau_population_file, col_names = c("ID", "population"),
gau_population delim = "|", show_col_types = FALSE, name_repair = "unique_quiet")
<- read_delim(all_population_file,
all_population delim = "|", show_col_types = FALSE, name_repair = "unique_quiet")
<- gau_boundaries_spain_only |>
gau_boundaries_spain_only left_join(gau_names |> select(ID, name), by = c("ID")) |>
left_join(gau_population, by = c("ID")) |>
left_join(all_population |>
group_by(distrito) |>
summarise(population_all = sum(poblacion, na.rm = TRUE), .groups = "drop") |>
rename(ID = distrito),
by = c("ID"))
Results
- Missing population (loaded from the
poblacion_gaus.csv
inzonificacion_GAU
folder)
|>
gau_boundaries_spain_only filter(is.na(population)) |>
nrow()
[1] 49
- Population data for Large Urban Areas is also unavailable in the
poblacion.csv
in thezonificacion
There are no population counts for these districts in the poblacion.csv
file either.
|>
gau_boundaries_spain_only filter(!is.na(population_all)) |>
filter(population_all == 0) |>
nrow()
[1] 49
- Names of districts with missing population data
::datatable(gau_boundaries_spain_only |>
DTst_drop_geometry() |>
filter(is.na(population))
)
Links to the original files
source(here("R/901-download-helpers.R"))
<- load_latest_v2_xml()
files
# Filter relevant files
<- files |>
relevant_files filter(basename(local_path) %in% basename(c(gau_boundaries_data_file, gau_names_file,
gau_population_file, all_population_file)) )
# Create HTML links
<- relevant_files |>
relevant_files mutate(target_url = paste0("<a href='", target_url, "' target='_blank'>", target_url, "</a>"))
# Render the DT table with links
datatable(relevant_files, escape = FALSE, options = list(pageLength = 5))