Peter ahead

Welcome to this year’s symmetrical PremPredict, where there are twenty teams and twenty players … and so a massive £100 up for grabs.

Early congratulations go to Peter for leading the pack. Well done to Andrea, though, for stopping an all-Finnis top six.

 

Latest standings

Here’s what you really want to know:


Fortunately, there’s still a long way to go!


Collective expectations

But what are we collectively expecting from the Premier League this season? Are we expecting it to be a repeat of last season?

By the look of our picks, we seem to side with the bookies. As a group, we predict Manchester City to finish highest on average, with the recently-promoted teams struggling.

 

And, just like last year, we collectively feel that Huddersfield will come last. But will we be more accurate than before?!

Appendix

For completeness (and reproducibility), here’s the code that I used to calculate what’s above.

suppressMessages(library(tidyverse))
library(rvest)
library(xml2)
library(googlesheets)
library(htmlwidgets)
library(widgetframe)
library(DT)
library(ggridges)
  
# Each update, run this dimmed code for the Premier League table

# club_order <- read_html("http://www.theguardian.com/football/premierleague/table") %>%
#   html_nodes(".table--striped") %>%
#   .[[1]] %>%
#   html_table()
# write_rds(club_order, "data/club_order-230818.rds") # + date
    
club_order <- read_rds("data/club_order-230818.rds")  # Add date
 
   
# Run this dimmed code each season for the predictions [after running googlesheets::gs_auth() just once]

# gsheet <- gs_url("https://docs.google.com/spreadsheets/d/1KgzlXAdcH5dwFllIU0Nktq-_QTwWwBuj76Di-UYQbtA/edit?usp=sharing")
# data_early <- gs_read(gsheet, verbose = FALSE) %>% 
#   select(-Timestamp, -`If you want email updates on the competition, please provide your email address`) %>% 
#   arrange(`Who is making this prediction?`) %>% 
#   t
# data_rowname <- c("Club", row.names(data_early)[-1])
# data_full <- cbind(data_rowname, data_early)
# colnames(data_full) <- data_full[1,]
# data_input <- data_full %>% 
#   as_tibble() %>% 
#   slice(-1) %>% 
#   mutate(Club = str_sub(Club, start = 30, end = -2)) %>% 
#   mutate_at(
#     .vars = vars(-Club),
#     .funs = funs(as.integer(str_extract(., "([0-9]+)")))
#     ) %>% 
#   mutate(Club = recode(
#     .x = Club, 
#     `Bournemouth` = "AFC Bournemouth",
#     `Brighton and Hove Albion` = "Brighton",
#     `Cardiff City` = "Cardiff",
#     `Crystal Palace` = "C Palace",
#     `Huddersfield Town` = "Huddersfield",
#     `Leicester City` = "Leicester",
#     `Manchester City` = "Man City",
#     `Manchester United` = "Man Utd",
#     `Newcastle United` = "Newcastle",
#     `Tottenham Hotspur` = "Spurs",
#     `West Ham United` = "West Ham",
#     `Wolverhampton Wanderers` = "Wolves")
#     ) %>% 
#   arrange(Club)
# write_rds(x = data_input, path = "data/data_input_2018-19.rds")
  
data_input <- read_rds(path = "data/data_input_2018-19.rds")
  
  
# Make the league table consistent with our inputs

clubsABC <- sort(club_order$Team)
clubStandings <- match(data_input$Club, club_order$Team, 0)
topClub <- match(1, clubStandings, 0)
  
predictions <- data_input[,-1]
bonus <- -50*(predictions[topClub,]==1)
ssq <- function(x){(x-clubStandings)^2}
squares <- apply(predictions,2,ssq)
score <- colSums(squares) + bonus
names <- colnames(score)
names1 <- str_replace_all(names, "_", " ")
worst <- apply(squares,2,max)
findWorst <- function(y){match(worst[y],squares[,y],0)}
worstClubNo <- sapply(1:ncol(predictions),findWorst)
worstClub <- clubsABC[worstClubNo]
  
output <- rbind(score, bonus, worst, worstClubNo)
output1 <- data.frame(names1,t(output))
row.names(output1) <- NULL 
output2 <- output1 %>% 
  mutate(worstClub=clubsABC[worstClubNo]) %>%
  select(-worstClubNo) 
colnames(output2) <- c("Names", "Scores", "Bonus", "WorstCost", "WorstClub")
  
report <- output2 %>% 
  arrange(Scores) %>% 
  select(Names, Scores, Bonus, WorstClub, WorstCost)
  
dtReport <- datatable(
  report, 
  rownames = FALSE, 
  options = list(
    dom = 't', 
    pageLength = 23, 
    order = list(
      list(1, 'asc'), 
      list(0, 'asc')),
    columnDefs = list(
      list(
        className = 'dt-left', 
        targets = c(0)
        ),
      list(
        className = 'dt-right', 
        targets = c(1, 2, 3)
        )
      )
    )
  )
   
frameWidget(dtReport, width = '100%', height = 800)


… and …


data_input1 <- as_data_frame(data_input)
averageView <- round(rowMeans(data_input1[, -1]), 2)
views <- cbind(data_input1[,1], averageView)
  
data_input2 <- data_input1 %>% 
  gather(key = "Player", -Club, value = "Prediction") %>% 
  left_join(views, by = "Club")
  
ggplot(
  data = data_input2,
  mapping = aes(
    y = reorder(Club, -averageView), 
    x = Prediction, 
    fill = averageView, 
    color = averageView
    )
  ) + 
  geom_ridgeline(
    stat = "binline", 
    bins = 20, scale = 0.95, 
    draw_baseline = FALSE
    ) + 
  scale_x_continuous(
    breaks = c(5, 10, 15, 20), 
    labels = c(5, 10, 15, 20)
    ) + 
  labs(
    y = "", x = "", 
    title = "\n Our collective predictions for this season \n"
    ) + 
  scale_fill_gradient(
    low = "green", high = "red", 
    guide=FALSE
    ) + 
  scale_color_gradient(
    low = "green", high = "red", 
    guide=FALSE
    ) +
  theme(
    title = element_text(size = 10), 
    axis.text.y = element_text(size = 6)
    )

(Finally, here are your team-by-team predictions, alongside the latest team standings.)