Holiday ideas

Yes, it is a first world problem, but we always struggle with holiday destinations. I prefer flying upside down towards somewhere different, whilst my wife and boy aren’t keen on heat or long flights. For that reason, I made this analysis to find somewhere that suits us all.

Robin Penfold
2019-02-15

You can find the code in the appendix below, but here’s the app:


For completeness (and reproducibility), here’s the code that I used to calculate what’s above.

Libraries and data


library(airportr)
library(crosstalk)
library(DT)
library(leaflet)
library(lubridate)
library(rvest)
suppressMessages(library(tidyverse))
library(xml2)

index_page_LHR <- 'https://www.flightsfrom.com/LHR/destinations'
index_page_LGW <- 'https://www.flightsfrom.com/LGW/destinations'

airports_LHR <- read_html(index_page_LHR) %>%
  html_nodes(".airport-content-destination-list-name") %>% 
  html_text() %>% 
  str_trim()

times_LHR <- read_html(index_page_LHR) %>%
  html_nodes(".airport-content-destination-list-time") %>% 
  html_text() %>% 
  str_trim()

airports_LGW <- read_html(index_page_LGW) %>%
  html_nodes(".airport-content-destination-list-name") %>% 
  html_text() %>% 
  str_trim()

times_LGW <- read_html(index_page_LGW) %>%
  html_nodes(".airport-content-destination-list-time") %>% 
  html_text() %>% 
  str_trim()

data <- bind_rows(
  tibble(airports = airports_LHR, times = times_LHR, home = 'LHR'),
  tibble(airports = airports_LGW, times = times_LGW, home = 'LGW')
) %>% 
  arrange(home) %>% 
  group_by(airports) %>% 
  slice(1) %>% 
  ungroup() %>% 
  arrange(airports) %>% 
  rowwise() %>% 
  mutate(
    code = str_extract(airports, '[[:upper:]][[:upper:]][[:upper:]]'),
    position = str_locate_all(airports, '[[:upper:]][[:upper:]][[:upper:]]')
    ) %>% 
  separate(col = times, into = c('split', 'detail'), sep = ": ") %>% 
  separate(col = detail, into = c('hours', 'mins'), sep = "h ") %>% 
  rowwise() %>% 
  mutate(
    mins = as.integer(str_sub(mins, start = 1L, end = -2L)),
    hours = as.integer(hours),
    minutes = (60 * hours) + mins,
    position1 = position[[1]] - 2, 
    position2 = position[[2]] + 2,
    city = str_trim(str_to_lower(str_sub(string = airports, start = 1L, end = position1))),
    country = str_trim(str_to_lower(str_sub(string = airports, start = position2, end = -1L))))

airport_data <- airportr::airports

output <- data[, c('code', 'city', 'home', 'country', 'minutes')] %>% 
  left_join(airport_data, by = c('code' = 'IATA')) %>% 
  filter(!is.na(ICAO)) %>% 
  rowwise() %>% 
  mutate(
    distance = airport_distance(home, code),
    City = str_to_lower(City), 
    Country = str_to_lower(Country)
    )

# 'GlobalLandTemperaturesByCity.csv' downloaded from https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data#GlobalLandTemperaturesByCity.csv

temp_data <- read_csv('GlobalLandTemperaturesByCity.csv') %>% 
  mutate(
    year = year(dt),
    month = month(dt),
    City = str_to_lower(City),
    Country = str_to_lower(Country)
  ) %>% 
  filter(
    year > 2000
    ) %>% 
  group_by(City, Country, month) %>% 
  summarise(temp_av = mean(AverageTemperature, na.rm = TRUE)) %>% 
  ungroup()

overall <- output %>% 
  left_join(temp_data, by = c('city' = 'City', 'country' = 'Country')) %>% 
  select(code, city, country, month, minutes, ICAO, Latitude, Longitude, UTC, distance, temp_av) %>% 
  filter(
    !is.na(temp_av)
    ) %>% 
  mutate(
    minutes = round(minutes, 0),
    temp_av = round(temp_av, 0),
    Latitude = round(Latitude, 2),
    Longitude = round(Longitude, 2),
    month_label = case_when(
      month == 1 ~ "01 - January",
      month == 2 ~ "02 - February",
      month == 3 ~ "03 - March",
      month == 4 ~ "04 - April",
      month == 5 ~ "05 - May",
      month == 6 ~ "06 - June",
      month == 7 ~ "07 - July",
      month == 8 ~ "08 - August",
      month == 9 ~ "09 - September",
      month == 10 ~ "10 - October",
      month == 11 ~ "11 - November",
      month == 12 ~ "12 - December",
      TRUE ~ NA_character_
      )
    ) %>% 
  arrange(code) %>% 
  group_by(country, city, month_label) %>% 
  slice(1) %>% 
  ungroup() %>% 
  arrange(city, month) %>% 
  select(country, city, temp_av, minutes, Longitude, Latitude, month_label)

# write_rds(x = overall, path = 'overall.rds')

overall <- read_rds('overall.rds')

pal <- colorNumeric(
  palette = "Reds",
  domain = overall$temp_av)

sd <- SharedData$new(overall)

bscols(
  filter_slider("temp", "Temperature", sd, column=~temp_av, step=1, width=100),
  filter_slider("duration", "Flight minutes", sd, column=~minutes, step=1, width=100),
  filter_select("mon", "Month", sd, group=~month_label, multiple = FALSE)
  )

Analysis


output_map <- leaflet(sd) %>% 
  addTiles() %>% 
  setView(
    lng = 15, 
    lat = 10, 
    zoom = 2) %>% 
  addCircles(
    lng = ~Longitude, 
    lat = ~Latitude, 
    weight = 2,
    radius = ~sqrt(minutes) * 10000, 
    popup = ~paste(city, "- time: ", round(minutes/60, 1), "hrs; temp: ", round(temp_av, 2), "C"), 
    color = ~pal(temp_av)
  )

output_table <- datatable(
  sd,
  rownames = FALSE,
  colnames = c('country' = 'country', 'city' = 'city', 'temp' = 'temp_av', 'minutes' = 'minutes', 'Long' = 'Longitude', 'Lat' = 'Latitude', 'month' = 'month_label'),
  extensions="Scroller", 
  style="bootstrap", 
  class="compact", 
  width="100%",
  options=list(
    dom = 'tip',
    deferRender=TRUE, 
    scrollY=300, 
    scroller=TRUE
    )
  ) %>% 
  formatRound(
    columns = c(3, 4, 5, 6), 
    digits = 0
    )

bscols(output_map, output_table)

System settings


R version 4.0.0 (2020-04-24)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.5

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib

locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods  
[7] base     

other attached packages:
 [1] forcats_0.5.0     stringr_1.4.0     dplyr_1.0.0      
 [4] purrr_0.3.4       readr_1.3.1       tidyr_1.1.0      
 [7] tibble_3.0.1      ggplot2_3.3.2     tidyverse_1.3.0  
[10] rvest_0.3.5       xml2_1.3.2        lubridate_1.7.9  
[13] leaflet_2.0.3     DT_0.14           crosstalk_1.1.0.1
[16] airportr_0.1.3   

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.4.6       lattice_0.20-41    assertthat_0.2.1  
 [4] digest_0.6.25      mime_0.9           R6_2.4.1          
 [7] cellranger_1.1.0   backports_1.1.6    reprex_0.3.0      
[10] evaluate_0.14      httr_1.4.1         pillar_1.4.4      
[13] rlang_0.4.6        lazyeval_0.2.2     readxl_1.3.1      
[16] rstudioapi_0.11    blob_1.2.1         rmarkdown_2.3     
[19] htmlwidgets_1.5.1  munsell_0.5.0      shiny_1.5.0       
[22] broom_0.5.6        compiler_4.0.0     httpuv_1.5.4      
[25] modelr_0.1.7       xfun_0.15          pkgconfig_2.0.3   
[28] htmltools_0.5.0    tidyselect_1.1.0   fansi_0.4.1       
[31] crayon_1.3.4       dbplyr_1.4.4       withr_2.2.0       
[34] later_1.1.0.1      grid_4.0.0         xtable_1.8-4      
[37] nlme_3.1-147       jsonlite_1.6.1     gtable_0.3.0      
[40] lifecycle_0.2.0    DBI_1.1.0          magrittr_1.5      
[43] scales_1.1.1       cli_2.0.2          stringi_1.4.6     
[46] farver_2.0.3       promises_1.1.1     fs_1.4.1          
[49] ellipsis_0.3.1     generics_0.0.2     vctrs_0.3.1       
[52] distill_0.8        RColorBrewer_1.1-2 tools_4.0.0       
[55] glue_1.4.1         hms_0.5.3          fastmap_1.0.1     
[58] yaml_2.2.1         colorspace_1.4-1   knitr_1.29        
[61] haven_2.3.0