Data Processing

Data wrangling and preparation of Gross Domestic Product (GDP) and Purchasing Power Parity (PPP) data.

Data import

# setting data directory
dirRawData = here("data-raw", "/")
dirProcData = here("data-processed", "/")

# World Bank Country and Lending Groups
classification_raw <- 
  readxl::read_excel(
    path = glue(dirRawData, "WorldBank-Country-Income-Class.xls"),
    sheet = "List of economies",
    range = "A5:I224") %>%
  janitor::clean_names()
glimpse(classification_raw)
Rows: 219
Columns: 9
$ x_1              <chr> "x", "1", "2", "3", "4", "5", "6", "7", "8"…
$ x_2              <chr> "x", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ economy          <chr> "x", "Afghanistan", "Albania", "Algeria", "…
$ code             <chr> "x", "AFG", "ALB", "DZA", "ASM", "AND", "AG…
$ x                <chr> "x", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ region           <chr> "x", "South Asia", "Europe & Central Asia",…
$ income_group     <chr> "x", "Low income", "Upper middle income", "…
$ lending_category <chr> "x", "IDA", "IBRD", "IBRD", "..", "..", "IB…
$ other            <chr> "x", "HIPC", NA, NA, NA, NA, NA, NA, NA, NA…
# Purchasing Power Parity
gdp_by_ppp_raw <- 
 read_csv(glue(dirRawData, "ICP-2017-GDP-PPP-Data.csv"),
          col_types = "fffffcn") %>%
  janitor::clean_names()
glimpse(gdp_by_ppp_raw)
Rows: 449
Columns: 7
$ country_name        <fct> "Afghanistan", "Afghanistan", "Albania",…
$ country_code        <fct> AFG, AFG, ALB, ALB, DZA, DZA, ASM, ASM, …
$ classification_name <fct> "Expenditure per capita, PPP-based (US$)…
$ classification_code <fct> PCAP.PP, PPPGlob, PCAP.PP, PPPGlob, PCAP…
$ series_name         <fct> 1000000:GROSS DOMESTIC PRODUCT, 1000000:…
$ series_code         <chr> "1000000", "1000000", "1000000", "100000…
$ x2017_yr2017        <dbl> 2202.570851, 17.205558, 13093.660066, 41…
gdp_by_ppp_meta_raw <- 
  read_csv(glue(dirRawData, "ICP-2017-GDP-PPP-Metadata.csv")) %>%
  janitor::clean_names()

Data wranging

classification <- 
  classification_raw %>%
  filter(economy != "x") %>%
  select(-x_1) %>%
  remove_empty(c("rows", "cols")) %>%
  rename("economy_name"="economy", "economy_code"="code") %>%
  mutate_at(vars(region, income_group, lending_category), factor) %>%
  mutate(income_group = factor(income_group, levels = unique(income_group)),
         income_group = fct_relevel(income_group, 
                                    "Low income", "Lower middle income"))
glimpse(classification)
Rows: 218
Columns: 6
$ economy_name     <chr> "Afghanistan", "Albania", "Algeria", "Ameri…
$ economy_code     <chr> "AFG", "ALB", "DZA", "ASM", "AND", "AGO", "…
$ region           <fct> South Asia, Europe & Central Asia, Middle E…
$ income_group     <fct> Low income, Upper middle income, Lower midd…
$ lending_category <fct> IDA, IBRD, IBRD, .., .., IBRD, IBRD, IBRD, …
$ other            <chr> "HIPC", NA, NA, NA, NA, NA, NA, NA, NA, NA,…
gdp_by_ppp <-
  gdp_by_ppp_raw %>%
    select(-series_code) %>%
    filter(!is.na(country_code)) %>%
    rename("economy_name"="country_name", "economy_code"="country_code",
           "value"="x2017_yr2017") %>%
    mutate(value = round(value, 2),
           series_name = 
             ifelse(series_name == "1000000:GROSS DOMESTIC PRODUCT", 
                    "GDP", NA)) 
glimpse(gdp_by_ppp)
Rows: 444
Columns: 6
$ economy_name        <fct> "Afghanistan", "Afghanistan", "Albania",…
$ economy_code        <fct> AFG, AFG, ALB, ALB, DZA, DZA, ASM, ASM, …
$ classification_name <fct> "Expenditure per capita, PPP-based (US$)…
$ classification_code <fct> PCAP.PP, PPPGlob, PCAP.PP, PPPGlob, PCAP…
$ series_name         <chr> "GDP", "GDP", "GDP", "GDP", "GDP", "GDP"…
$ value               <dbl> 2202.57, 17.21, 13093.66, 41.23, 11560.5…

Data filtering

Economy classification

# data trimming
classification_trim <-
  classification %>%
  select(economy_name, economy_code, region, income_group)
glimpse(classification_trim)
Rows: 218
Columns: 4
$ economy_name <chr> "Afghanistan", "Albania", "Algeria", "American …
$ economy_code <chr> "AFG", "ALB", "DZA", "ASM", "AND", "AGO", "ATG"…
$ region       <fct> South Asia, Europe & Central Asia, Middle East …
$ income_group <fct> Low income, Upper middle income, Lower middle i…
# writing to Rds and CSV files
write_rds(classification_trim, 
          here(dirProcData, 
               "economy-classification.Rds"))
write_csv(classification_trim, 
          here(dirProcData, 
               "economy-classification.csv"))

PPP-adjusted GDP

# data trimming
gdp_by_ppp_trim <-
  gdp_by_ppp %>%
    select(economy_name, economy_code, 
           classification_code, value) %>%
  pivot_wider(names_from = classification_code, values_from = value) %>%
  rename("PPP_GDPCap"="PCAP.PP", "PPP"="PPPGlob")
glimpse(gdp_by_ppp_trim)
Rows: 222
Columns: 4
$ economy_name <fct> "Afghanistan", "Albania", "Algeria", "American …
$ economy_code <fct> AFG, ALB, DZA, ASM, AGO, AIA, ATG, ARG, ARM, AB…
$ PPP_GDPCap   <dbl> 2202.57, 13093.66, 11560.52, NA, 7348.11, 22877…
$ PPP          <dbl> 17.21, 41.23, 38.86, NA, 92.95, 2.29, 2.09, 10.…
# writing to Rds and CSV files
write_rds(gdp_by_ppp_trim, 
          here(dirProcData, 
               "economy-GDP.Rds"))
write_csv(gdp_by_ppp_trim, 
          here(dirProcData, 
               "economy-GDP.csv"))

Joining classification data with PPP data

gdp_ppp_full <- 
  classification_trim %>% 
  left_join(gdp_by_ppp_trim)
glimpse(gdp_ppp_full)
Rows: 218
Columns: 6
$ economy_name <chr> "Afghanistan", "Albania", "Algeria", "American …
$ economy_code <chr> "AFG", "ALB", "DZA", "ASM", "AND", "AGO", "ATG"…
$ region       <fct> South Asia, Europe & Central Asia, Middle East …
$ income_group <fct> Low income, Upper middle income, Lower middle i…
$ PPP_GDPCap   <dbl> 2202.57, 13093.66, 11560.52, NA, NA, 7348.11, 2…
$ PPP          <dbl> 17.21, 41.23, 38.86, NA, NA, 92.95, 2.09, 10.26…
# writing to Rds and CSV files
write_rds(gdp_ppp_full, 
          here(dirProcData, 
               "economy-classification-w-GDP.Rds"))
write_csv(gdp_ppp_full, 
          here(dirProcData, 
               "economy-classification-w-GDP.csv"))

Corrections

If you see mistakes or want to suggest changes, please create an issue on the source repository.

Reuse

Text and figures are licensed under Creative Commons Attribution CC BY-SA 4.0. Source code is available at https://github.com/spcanelon/useR2021-cost-conversion-tool, unless otherwise noted. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".