library(dplyr) # for data manipulation
library(readxl) # to read Excel files
library(ggplot2) # for plotting (optional)
library(tidyr) # for reshaping if needed
url <- "HDR25_Statistical_Annex_HDI_Table.xlsx" # Stores the file path or GitHub URL of the HDI Excel file in a variable called url
raw.data <- read_excel(url) # Reads the Excel file into R as a dataframe called raw.data
head(raw.data) # shows first few rows
str(raw.data) # checks column names and types
tibble [278 × 15] (S3: tbl_df/tbl/data.frame)
$ ...1 : chr [1:278] NA NA NA NA ...
$ Table 1. Human Development Index and its components: chr [1:278] NA NA NA NA ...
$ ...3 : chr [1:278] NA NA NA "Human Development Index (HDI)" ...
$ ...4 : logi [1:278] NA NA NA NA NA NA ...
$ ...5 : chr [1:278] NA "SDG3" NA "Life expectancy at birth" ...
$ ...6 : chr [1:278] NA NA NA NA ...
$ ...7 : chr [1:278] NA "SDG4.3" NA "Expected years of schooling" ...
$ ...8 : chr [1:278] NA NA NA NA ...
$ ...9 : chr [1:278] NA "SDG4.4" NA "Mean years of schooling" ...
$ ...10 : chr [1:278] NA NA NA NA ...
$ ...11 : chr [1:278] NA "SDG8.5" NA "Gross national income (GNI) per capita" ...
$ ...12 : chr [1:278] NA NA NA NA ...
$ ...13 : chr [1:278] NA NA NA "GNI per capita rank minus HDI rank" ...
$ ...14 : chr [1:278] NA NA NA NA ...
$ ...15 : chr [1:278] NA NA NA "HDI rank" ...
library(dplyr)
raw.data <- raw.data %>% # renames column names
rename(
Country = `table_1_human_development_index_and_its_components`,
'Human Development Index' = `_3`,
'HDI Rank' = `_1`,
'Life expectancy at birth' = `_5`,
'Expected years of schooling' = '_7',
'Mean years of schooling' = '_9',
'GNI per capita' = '_11',
'2022 HDI Rank' = '_15'
)
names(raw.data) <- tolower(gsub("[^0-9a-zA-Z]+", "_", names(raw.data))) #Converts all column names to lowercase and replace spaces or special characters with _ for consistency.
hdi.data <- raw.data[-c(1:7), ] # Removes the first 7 rows which contained headers or notes, not data
hdi.clean <- hdi.data[ , -c(4, 6, 8, 10, 12, 14)] # Removes specific unnecessary columns by their index
hdi.clean <- hdi.clean %>%
drop_na() # Removes any rows that have missing values in any column
str(hdi.clean) # Checks the structure of hdi.clean to confirm types and remaining columns
tibble [193 × 9] (S3: tbl_df/tbl/data.frame)
$ hdi_rank : chr [1:193] "1" "2" "2" "4" ...
$ country : chr [1:193] "Iceland" "Norway" "Switzerland" "Denmark" ...
$ human_development_index : chr [1:193] "0.97199999999999998" "0.97" "0.97" "0.96199999999999997" ...
$ life_expectancy_at_birth : chr [1:193] "82.691000000000003" "83.308000000000007" "83.953999999999994" "81.933000000000007" ...
$ expected_years_of_schooling: chr [1:193] "18.850589750000001" "18.792850489999999" "16.667530060000001" "18.704010010000001" ...
$ mean_years_of_schooling : chr [1:193] "13.908926279999999" "13.117962179999999" "13.94912109" "13.027320599999999" ...
$ gni_per_capita : chr [1:193] "69116.937359999996" "112710.0211" "81948.901769999997" "76007.856690000001" ...
$ _13 : chr [1:193] "12" "0" "5" "4" ...
$ 2022_hdi_rank : chr [1:193] "3" "1" "2" "4" ...
hdi.clean <- hdi.clean %>% # Starts a pipeline to clean and convert columns to numeric
mutate(
`life_expectancy_at_birth` = as.numeric(gsub(",", "", `life_expectancy_at_birth`)),
`expected_years_of_schooling` = as.numeric(gsub(",", "", `expected_years_of_schooling`)),
`mean_years_of_schooling` = as.numeric(gsub(",", "", `mean_years_of_schooling`)),
`gni_per_capita` = as.numeric(gsub(",", "", `gni_per_capita`))
)
hdi_means <- hdi.clean %>% # Starts a pipeline to calculate summary statistics
summarise(
life_expectancy = mean(`life_expectancy_at_birth`, na.rm = TRUE),
expected_years_schooling = mean(`expected_years_of_schooling`, na.rm = TRUE),
mean_years_schooling = mean(`mean_years_of_schooling`, na.rm = TRUE),
gni_per_capita = mean(`gni_per_capita`, na.rm = TRUE)
)
hdi_means # Displays the table of computed mean values
NA
LS0tCnRpdGxlOiAiRmluYWwgSFcgRXhlcmNpc2UgMiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkoZHBseXIpICAgICAjIGZvciBkYXRhIG1hbmlwdWxhdGlvbgpsaWJyYXJ5KHJlYWR4bCkgICAgIyB0byByZWFkIEV4Y2VsIGZpbGVzCmxpYnJhcnkoZ2dwbG90MikgICAjIGZvciBwbG90dGluZyAob3B0aW9uYWwpCmxpYnJhcnkodGlkeXIpICAgICAjIGZvciByZXNoYXBpbmcgaWYgbmVlZGVkCmBgYAoKYGBge3J9CnVybCA8LSAiSERSMjVfU3RhdGlzdGljYWxfQW5uZXhfSERJX1RhYmxlLnhsc3giICMgU3RvcmVzIHRoZSBmaWxlIHBhdGggb3IgR2l0SHViIFVSTCBvZiB0aGUgSERJIEV4Y2VsIGZpbGUgaW4gYSB2YXJpYWJsZSBjYWxsZWQgdXJsCnJhdy5kYXRhIDwtIHJlYWRfZXhjZWwodXJsKSAjIFJlYWRzIHRoZSBFeGNlbCBmaWxlIGludG8gUiBhcyBhIGRhdGFmcmFtZSBjYWxsZWQgcmF3LmRhdGEKYGBgCgpgYGB7cn0KaGVhZChyYXcuZGF0YSkgICAgICAgIyBzaG93cyBmaXJzdCBmZXcgcm93cwpzdHIocmF3LmRhdGEpICAgICAgICAjIGNoZWNrcyBjb2x1bW4gbmFtZXMgYW5kIHR5cGVzCmBgYApgYGB7cn0KbGlicmFyeShkcGx5cikKCnJhdy5kYXRhIDwtIHJhdy5kYXRhICU+JSAjIHJlbmFtZXMgY29sdW1uIG5hbWVzCiAgcmVuYW1lKAogICAgQ291bnRyeSA9IGB0YWJsZV8xX2h1bWFuX2RldmVsb3BtZW50X2luZGV4X2FuZF9pdHNfY29tcG9uZW50c2AsCiAgICAnSHVtYW4gRGV2ZWxvcG1lbnQgSW5kZXgnID0gYF8zYCwKICAgICdIREkgUmFuaycgPSBgXzFgLAogICAgJ0xpZmUgZXhwZWN0YW5jeSBhdCBiaXJ0aCcgPSBgXzVgLAogICAnRXhwZWN0ZWQgeWVhcnMgb2Ygc2Nob29saW5nJyA9ICdfNycsCiAgICdNZWFuIHllYXJzIG9mIHNjaG9vbGluZycgPSAnXzknLAogICAgJ0dOSSBwZXIgY2FwaXRhJyA9ICdfMTEnLAogICAgJzIwMjIgSERJIFJhbmsnID0gJ18xNScKICApCgpgYGAKCmBgYHtyfQpuYW1lcyhyYXcuZGF0YSkgPC0gdG9sb3dlcihnc3ViKCJbXjAtOWEtekEtWl0rIiwgIl8iLCBuYW1lcyhyYXcuZGF0YSkpKSAjQ29udmVydHMgYWxsIGNvbHVtbiBuYW1lcyB0byBsb3dlcmNhc2UgYW5kIHJlcGxhY2Ugc3BhY2VzIG9yIHNwZWNpYWwgY2hhcmFjdGVycyB3aXRoIF8gZm9yIGNvbnNpc3RlbmN5LgpgYGAKCmBgYHtyfQpoZGkuZGF0YSA8LSByYXcuZGF0YVstYygxOjcpLCBdICMgUmVtb3ZlcyB0aGUgZmlyc3QgNyByb3dzIHdoaWNoIGNvbnRhaW5lZCBoZWFkZXJzIG9yIG5vdGVzLCBub3QgZGF0YQpoZGkuY2xlYW4gPC0gaGRpLmRhdGFbICwgLWMoNCwgNiwgOCwgMTAsIDEyLCAxNCldICMgUmVtb3ZlcyBzcGVjaWZpYyB1bm5lY2Vzc2FyeSBjb2x1bW5zIGJ5IHRoZWlyIGluZGV4CmBgYAoKYGBge3J9CmhkaS5jbGVhbiA8LSBoZGkuY2xlYW4gJT4lIAogIGRyb3BfbmEoKSAjIFJlbW92ZXMgYW55IHJvd3MgdGhhdCBoYXZlIG1pc3NpbmcgdmFsdWVzIGluIGFueSBjb2x1bW4KYGBgCgpgYGB7cn0Kc3RyKGhkaS5jbGVhbikgICMgQ2hlY2tzIHRoZSBzdHJ1Y3R1cmUgb2YgaGRpLmNsZWFuIHRvIGNvbmZpcm0gdHlwZXMgYW5kIHJlbWFpbmluZyBjb2x1bW5zCmBgYAoKYGBge3J9CmhkaS5jbGVhbiA8LSBoZGkuY2xlYW4gJT4lICMgU3RhcnRzIGEgcGlwZWxpbmUgdG8gY2xlYW4gYW5kIGNvbnZlcnQgY29sdW1ucyB0byBudW1lcmljCiAgbXV0YXRlKAogICAgYGxpZmVfZXhwZWN0YW5jeV9hdF9iaXJ0aGAgPSBhcy5udW1lcmljKGdzdWIoIiwiLCAiIiwgYGxpZmVfZXhwZWN0YW5jeV9hdF9iaXJ0aGApKSwKICAgIGBleHBlY3RlZF95ZWFyc19vZl9zY2hvb2xpbmdgID0gYXMubnVtZXJpYyhnc3ViKCIsIiwgIiIsIGBleHBlY3RlZF95ZWFyc19vZl9zY2hvb2xpbmdgKSksCiAgICBgbWVhbl95ZWFyc19vZl9zY2hvb2xpbmdgID0gYXMubnVtZXJpYyhnc3ViKCIsIiwgIiIsIGBtZWFuX3llYXJzX29mX3NjaG9vbGluZ2ApKSwKICAgIGBnbmlfcGVyX2NhcGl0YWAgPSBhcy5udW1lcmljKGdzdWIoIiwiLCAiIiwgYGduaV9wZXJfY2FwaXRhYCkpCiAgKQpgYGAKCgpgYGB7cn0KaGRpX21lYW5zIDwtIGhkaS5jbGVhbiAlPiUgIyBTdGFydHMgYSBwaXBlbGluZSB0byBjYWxjdWxhdGUgc3VtbWFyeSBzdGF0aXN0aWNzCiAgc3VtbWFyaXNlKAogICAgbGlmZV9leHBlY3RhbmN5ID0gbWVhbihgbGlmZV9leHBlY3RhbmN5X2F0X2JpcnRoYCwgbmEucm0gPSBUUlVFKSwKICAgIGV4cGVjdGVkX3llYXJzX3NjaG9vbGluZyA9IG1lYW4oYGV4cGVjdGVkX3llYXJzX29mX3NjaG9vbGluZ2AsIG5hLnJtID0gVFJVRSksCiAgICBtZWFuX3llYXJzX3NjaG9vbGluZyA9IG1lYW4oYG1lYW5feWVhcnNfb2Zfc2Nob29saW5nYCwgbmEucm0gPSBUUlVFKSwKICAgIGduaV9wZXJfY2FwaXRhID0gbWVhbihgZ25pX3Blcl9jYXBpdGFgLCBuYS5ybSA9IFRSVUUpCiAgKQoKaGRpX21lYW5zICAjIERpc3BsYXlzIHRoZSB0YWJsZSBvZiBjb21wdXRlZCBtZWFuIHZhbHVlcwoKYGBgCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCg==