Data wrangling and preparation of Gross Domestic Product (GDP) and Purchasing Power Parity (PPP) data.
# setting data directory
dirRawData = here("data-raw", "/")
dirProcData = here("data-processed", "/")
# World Bank Country and Lending Groups
classification_raw <-
readxl::read_excel(
path = glue(dirRawData, "WorldBank-Country-Income-Class.xls"),
sheet = "List of economies",
range = "A5:I224") %>%
janitor::clean_names()
glimpse(classification_raw)
Rows: 219
Columns: 9
$ x_1 <chr> "x", "1", "2", "3", "4", "5", "6", "7", "8"…
$ x_2 <chr> "x", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ economy <chr> "x", "Afghanistan", "Albania", "Algeria", "…
$ code <chr> "x", "AFG", "ALB", "DZA", "ASM", "AND", "AG…
$ x <chr> "x", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ region <chr> "x", "South Asia", "Europe & Central Asia",…
$ income_group <chr> "x", "Low income", "Upper middle income", "…
$ lending_category <chr> "x", "IDA", "IBRD", "IBRD", "..", "..", "IB…
$ other <chr> "x", "HIPC", NA, NA, NA, NA, NA, NA, NA, NA…
# Purchasing Power Parity
gdp_by_ppp_raw <-
read_csv(glue(dirRawData, "ICP-2017-GDP-PPP-Data.csv"),
col_types = "fffffcn") %>%
janitor::clean_names()
glimpse(gdp_by_ppp_raw)
Rows: 449
Columns: 7
$ country_name <fct> "Afghanistan", "Afghanistan", "Albania",…
$ country_code <fct> AFG, AFG, ALB, ALB, DZA, DZA, ASM, ASM, …
$ classification_name <fct> "Expenditure per capita, PPP-based (US$)…
$ classification_code <fct> PCAP.PP, PPPGlob, PCAP.PP, PPPGlob, PCAP…
$ series_name <fct> 1000000:GROSS DOMESTIC PRODUCT, 1000000:…
$ series_code <chr> "1000000", "1000000", "1000000", "100000…
$ x2017_yr2017 <dbl> 2202.570851, 17.205558, 13093.660066, 41…
gdp_by_ppp_meta_raw <-
read_csv(glue(dirRawData, "ICP-2017-GDP-PPP-Metadata.csv")) %>%
janitor::clean_names()
classification <-
classification_raw %>%
filter(economy != "x") %>%
select(-x_1) %>%
remove_empty(c("rows", "cols")) %>%
rename("economy_name"="economy", "economy_code"="code") %>%
mutate_at(vars(region, income_group, lending_category), factor) %>%
mutate(income_group = factor(income_group, levels = unique(income_group)),
income_group = fct_relevel(income_group,
"Low income", "Lower middle income"))
glimpse(classification)
Rows: 218
Columns: 6
$ economy_name <chr> "Afghanistan", "Albania", "Algeria", "Ameri…
$ economy_code <chr> "AFG", "ALB", "DZA", "ASM", "AND", "AGO", "…
$ region <fct> South Asia, Europe & Central Asia, Middle E…
$ income_group <fct> Low income, Upper middle income, Lower midd…
$ lending_category <fct> IDA, IBRD, IBRD, .., .., IBRD, IBRD, IBRD, …
$ other <chr> "HIPC", NA, NA, NA, NA, NA, NA, NA, NA, NA,…
gdp_by_ppp <-
gdp_by_ppp_raw %>%
select(-series_code) %>%
filter(!is.na(country_code)) %>%
rename("economy_name"="country_name", "economy_code"="country_code",
"value"="x2017_yr2017") %>%
mutate(value = round(value, 2),
series_name =
ifelse(series_name == "1000000:GROSS DOMESTIC PRODUCT",
"GDP", NA))
glimpse(gdp_by_ppp)
Rows: 444
Columns: 6
$ economy_name <fct> "Afghanistan", "Afghanistan", "Albania",…
$ economy_code <fct> AFG, AFG, ALB, ALB, DZA, DZA, ASM, ASM, …
$ classification_name <fct> "Expenditure per capita, PPP-based (US$)…
$ classification_code <fct> PCAP.PP, PPPGlob, PCAP.PP, PPPGlob, PCAP…
$ series_name <chr> "GDP", "GDP", "GDP", "GDP", "GDP", "GDP"…
$ value <dbl> 2202.57, 17.21, 13093.66, 41.23, 11560.5…
# data trimming
classification_trim <-
classification %>%
select(economy_name, economy_code, region, income_group)
glimpse(classification_trim)
Rows: 218
Columns: 4
$ economy_name <chr> "Afghanistan", "Albania", "Algeria", "American …
$ economy_code <chr> "AFG", "ALB", "DZA", "ASM", "AND", "AGO", "ATG"…
$ region <fct> South Asia, Europe & Central Asia, Middle East …
$ income_group <fct> Low income, Upper middle income, Lower middle i…
# writing to Rds and CSV files
write_rds(classification_trim,
here(dirProcData,
"economy-classification.Rds"))
write_csv(classification_trim,
here(dirProcData,
"economy-classification.csv"))
# data trimming
gdp_by_ppp_trim <-
gdp_by_ppp %>%
select(economy_name, economy_code,
classification_code, value) %>%
pivot_wider(names_from = classification_code, values_from = value) %>%
rename("PPP_GDPCap"="PCAP.PP", "PPP"="PPPGlob")
glimpse(gdp_by_ppp_trim)
Rows: 222
Columns: 4
$ economy_name <fct> "Afghanistan", "Albania", "Algeria", "American …
$ economy_code <fct> AFG, ALB, DZA, ASM, AGO, AIA, ATG, ARG, ARM, AB…
$ PPP_GDPCap <dbl> 2202.57, 13093.66, 11560.52, NA, 7348.11, 22877…
$ PPP <dbl> 17.21, 41.23, 38.86, NA, 92.95, 2.29, 2.09, 10.…
# writing to Rds and CSV files
write_rds(gdp_by_ppp_trim,
here(dirProcData,
"economy-GDP.Rds"))
write_csv(gdp_by_ppp_trim,
here(dirProcData,
"economy-GDP.csv"))
gdp_ppp_full <-
classification_trim %>%
left_join(gdp_by_ppp_trim)
glimpse(gdp_ppp_full)
Rows: 218
Columns: 6
$ economy_name <chr> "Afghanistan", "Albania", "Algeria", "American …
$ economy_code <chr> "AFG", "ALB", "DZA", "ASM", "AND", "AGO", "ATG"…
$ region <fct> South Asia, Europe & Central Asia, Middle East …
$ income_group <fct> Low income, Upper middle income, Lower middle i…
$ PPP_GDPCap <dbl> 2202.57, 13093.66, 11560.52, NA, NA, 7348.11, 2…
$ PPP <dbl> 17.21, 41.23, 38.86, NA, NA, 92.95, 2.09, 10.26…
# writing to Rds and CSV files
write_rds(gdp_ppp_full,
here(dirProcData,
"economy-classification-w-GDP.Rds"))
write_csv(gdp_ppp_full,
here(dirProcData,
"economy-classification-w-GDP.csv"))
If you see mistakes or want to suggest changes, please create an issue on the source repository.
Text and figures are licensed under Creative Commons Attribution CC BY-SA 4.0. Source code is available at https://github.com/spcanelon/useR2021-cost-conversion-tool, unless otherwise noted. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".