options(vtree_count=NULL)

## ----load packages, echo = FALSE, message = FALSE--------------------------------------------
library(vtree)
library(vcd)
library(eulerr)
library(UpSetR)
library(readxl)
library(dplyr)
library(forcats)
library(httr)
library(stablelearner)

options(kableExtra.latex.load_packages = FALSE)
library(kableExtra)


## --------------------------------------------------------------------------------------------
knitr::knit_hooks$set(document = function(x) {
  sub('\\usepackage[]{color}', 
    '\\usepackage{xcolor}', x, fixed = TRUE)})

if (is.null(knitr::opts_knit$get("out.format"))) {
  knitr::opts_knit$set(out.format ="latex")
}


## --------------------------------------------------------------------------------------------
data("titanic")
td <- titanic %>%
  rename(Survived=survived) %>%
  mutate(
    Age = ifelse(age<13, "Child", "Adult"),
    Gender = gender,
    Class = fct_collapse(class,
      Crew = c("deck crew", "engineering crew", "restaurant staff",
        "victualling crew")),
    Region = fct_collapse(country,
      "UK and Ireland" = c("England", "Scotland", "Wales", "Ireland",
        "Northern Ireland", "Channel Islands"),
      "Europe" = c("Norway", "France", "Finland", "Sweden", "Latvia",
        "Denmark", "Bulgaria", "Greece", "Hungary", "France", "Spain", 
        "Italy", "Belgium", "Germany", "Austria", "Poland", 
        "Switzerland", "Bosnia", "Croatia", "Croatia (Modern)", 
        "Yugoslavia", "Slovakia (Modern day)", "Slovenia", "Netherlands",
        "Russia"),
      "North America" = c("United States", "Canada", "Mexico", "Cuba"),
      other_level = "Other"))


## ----out.width="425px", message = FALSE------------------------------------------------------
vtree(td, "Region Age", horiz = FALSE)


## ----pRegion function, echo = FALSE----------------------------------------------------------
#
# Generate LaTeX string for % in a Region
#

pRegion <- function(Region,data=td,latex=FALSE) {
  REGION <- as.character(data$Region); REGION[is.na(REGION)] <- "NA"
  
  if (Region=="NA") {
    group <- REGION==Region
    contents <- as.character(sum(group,na.rm=TRUE))
    contents
  } else {
    REGION <- REGION[REGION!="NA"]
    
    group <- REGION==Region
    if (latex) {
      paste0(sum(group,na.rm=TRUE)," (",
        round(100*sum(group,na.rm=TRUE)/length(group)),"\\%)")
    } else {
      paste0(sum(group,na.rm=TRUE)," (",
        round(100*sum(group,na.rm=TRUE)/length(group)),"%)")
    }
  }
}


## ----pRegionAge function, echo = FALSE-------------------------------------------------------
#
# Generate LaTeX string for % Age within a Region
#

pRegionAge <- function(Region,Age,data=td) {

  AGE <- as.character(data$Age); AGE[is.na(AGE)] <- "NA"
  REGION <- as.character(data$Region); REGION[is.na(REGION)] <- "NA"
  
  if (Age=="NA") {
    group <- AGE==Age & REGION==Region
    paste0(sum(group,na.rm=TRUE))    
  } else {
    group <- AGE==Age & REGION==Region
      paste0(sum(group,na.rm=TRUE)," (",
        round(100*sum(group/sum(REGION==Region,na.rm=TRUE),na.rm=TRUE)),"%)")
  }
}


## ----pRegionAgeSurvived function, echo = FALSE-----------------------------------------------
#
# Generate LaTeX string for % Survived within an Age within a Region
#

pRegionAgeSurvived <- function(Region,Age,survived,data=td) {
  REGION <- as.character(data$Region); REGION[is.na(REGION)] <- "NA"

  q <- data[REGION==Region,]
    
  AGE <- as.character(q$Age); AGE[is.na(AGE)] <- "NA"
  
  SURVIVED <- q$Survived
  
  group <- SURVIVED==survived & AGE==Age
  if (sum(AGE==Age,na.rm=TRUE)>0) {
    paste0(sum(group,na.rm=TRUE)," (",
      round(100*sum(group/sum(AGE==Age,na.rm=TRUE),na.rm=TRUE)),"%)")
  } else {
    paste0(sum(group,na.rm=TRUE))
  }
}


## --------------------------------------------------------------------------------------------
mat <- cbind(
  " "=c("Child","Adult","NA"),
  c(
    pRegionAge("UK and Ireland","Child"),
    pRegionAge("UK and Ireland","Adult"),
    pRegionAge("UK and Ireland","NA")),
  c(
    pRegionAge("Europe","Child"),
    pRegionAge("Europe","Adult"),
    pRegionAge("Europe","NA")),
  c(
    pRegionAge("North America","Child"),
    pRegionAge("North America","Adult"),
    pRegionAge("North America","NA")),
  c(
    pRegionAge("Other","Child"),
    pRegionAge("Other","Adult"),
    pRegionAge("Other","NA")),
  c(
    pRegionAge("NA","Child"),
    pRegionAge("NA","Adult"),
    pRegionAge("NA","NA")))

colnames(mat) <- c(
  "",
  pRegion("UK and Ireland"),
  pRegion("Europe"),
  pRegion("North America"),
  pRegion("Other"),
  pRegion("NA"))


## --------------------------------------------------------------------------------------------
kable(mat, booktabs = TRUE) %>%
  add_header_above(c("","UK and Ireland","Europe","North America","Other","NA"),bold=TRUE,align="l") %>%
  column_spec(1, bold = TRUE)


## --------------------------------------------------------------------------------------------
tabRegion <- function(Region,data=td) {
  mat <- cbind(
    c("Survived","Did not survive"),
    c(pRegionAgeSurvived(Region,"Child","yes",data=data),pRegionAgeSurvived(Region,"Child","no",data=data)),
    c(pRegionAgeSurvived(Region,"Adult","yes",data=data),pRegionAgeSurvived(Region,"Adult","no",data=data)),
    c(pRegionAgeSurvived(Region,"NA","yes",data=data),pRegionAgeSurvived(Region,"NA","no",data=data)))
  
  colnames(mat) <- c(
    "",
    pRegionAge(Region,"Child"),
    pRegionAge(Region,"Adult"),
    pRegionAge(Region,"NA"))
  
  mat
}


## --------------------------------------------------------------------------------------------
matUKandIreland <- tabRegion("UK and Ireland")
matNorthAmerica <- tabRegion("North America")
matEurope       <- tabRegion("Europe")
matOther        <- tabRegion("Other")
matNA           <- tabRegion("NA")


## --------------------------------------------------------------------------------------------
regionHeader <- function(Region) {
  if (knitr::opts_knit$get("out.format") %in% c("latex","sweave")) {
    cat(
      paste0(
        "\\vspace{0.2in}\n\\centerline{\\textbf{",
        Region,
        "} ",
        pRegion(Region,latex=TRUE),
        "}\n\\vspace{0.2in}\n"))
        # "\\textit{",
        # pRegion(Region,latex=TRUE),
        # "}}\n\\vspace{0.2in}\n"))
  } else {
    cat(
      paste0(
        Region," ",pRegion(Region,latex=FALSE),"\n"))
  }
}


## --------------------------------------------------------------------------------------------
kableRegion <- function(Region) {
  x <- tabRegion(Region)
  kable(x, booktabs = TRUE) %>%
    add_header_above(c("","Child","Adult","NA"),bold=TRUE,line=FALSE,align="l") %>%
    column_spec(1, bold = TRUE) %>%
    column_spec(2,width="0.8in") %>%
    column_spec(3,width="0.8in") %>%
    column_spec(4,width="0.8in") %>%
    sub("\\\\toprule", "", .) #%>%
    #sub("\\\\bottomrule", "", .)
}


## ----results="asis"--------------------------------------------------------------------------
regionHeader("UK and Ireland")


## --------------------------------------------------------------------------------------------
kableRegion("UK and Ireland")


## ----results="asis"--------------------------------------------------------------------------
regionHeader("North America")


## --------------------------------------------------------------------------------------------
kableRegion("North America")


## ----results="asis"--------------------------------------------------------------------------
regionHeader("Europe")


## --------------------------------------------------------------------------------------------
kableRegion("Europe")


## ----results="asis"--------------------------------------------------------------------------
regionHeader("Other")


## --------------------------------------------------------------------------------------------
kableRegion("Other")


## ----results="asis"--------------------------------------------------------------------------
regionHeader("NA")


## --------------------------------------------------------------------------------------------
kableRegion("NA")


## ----vtree3variables, out.width = "350px"----------------------------------------------------
vtree(td, "Region Age Survived",sameline=TRUE)


## ----mosaicTitanic,fig.height=6.5,fig.width=6, echo = FALSE----------------------------------
mosaic(Survived~ Region + Age, data = td,
  highlighting_fill=c("lightblue1","lightblue3"))


## ----eulerExample,fig.height=1.5, echo = FALSE-----------------------------------------------
wilkinson2012 <-  c(A = 4, B = 6, C = 3, D = 2, E = 7, F = 3,
                    "A&B" = 2, "A&F" = 2, "B&C" = 2, "B&D" = 1,
                    "B&F" = 2, "C&D" = 1, "D&E" = 1, "E&F" = 1,
                    "A&B&F" = 1, "B&C&D" = 1)
fit3 <- euler(wilkinson2012, shape = "ellipse")


## ----EulerPlot, fig.height=3-----------------------------------------------------------------
plot(fit3, quantities = TRUE)


## ----VennData, echo=FALSE--------------------------------------------------------------------
d2<- build.data.frame(
  c(   "A","B","C","D","E","F"),
  list( 1,  0,  0,  0,  0,  0,  4),
  list( 0,  1,  0,  0,  0,  0,  6),
  list( 0,  0,  1,  0,  0,  0,  3),
  list( 0,  0,  0,  1,  0,  0,  2),
  list( 0,  0,  0,  0,  1,  0,  7),
  list( 0,  0,  0,  0,  0,  1,  3),
  list( 1,  1,  0,  0,  0,  0,  2),
  list( 1,  0,  0,  0,  0,  1,  2),
  list( 0,  1,  1,  0,  0,  0,  2),
  list( 0,  1,  0,  1,  0,  0,  1),
  list( 0,  1,  0,  0,  0,  1,  2),
  list( 0,  0,  1,  1,  0,  0,  1),
  list( 0,  0,  0,  1,  1,  0,  1),
  list( 0,  0,  0,  0,  1,  1,  1),
  list( 1,  1,  0,  0,  0,  1,  1),
  list( 0,  1,  1,  1,  0,  0,  1))


## ----UpSetPlot,fig.height=6.5,fig.width=6, echo = FALSE--------------------------------------
upset(d2,nsets=6)


## ----out.width="350px"-----------------------------------------------------------------------
vtree(d2,"A B C D E F",Venn=TRUE,showlegend=TRUE)


## ----out.width="400px"-----------------------------------------------------------------------
vtree(d2,"A B C D E F",pattern=TRUE,showlegend=TRUE)


## ----echo=TRUE-------------------------------------------------------------------------------
library(vtree)
library(stablelearner)
library(dplyr)
library(forcats)
data("titanic")
td <- titanic %>%
  rename(Survived = survived) %>%
  mutate(
    Age = ifelse(age<13, "Child", "Adult"),
    Gender = gender,
    Class = fct_collapse(class,
      Crew = c("deck crew", "engineering crew", "restaurant staff",
        "victualling crew")),
    Region = fct_collapse(country,
      "UK and Ireland" = c("England", "Scotland", "Wales", "Ireland",
        "Northern Ireland", "Channel Islands"),
      "Europe" = c("Norway", "France", "Finland", "Sweden", "Latvia",
        "Denmark", "Bulgaria", "Greece", "Hungary", "France", "Spain", 
        "Italy", "Belgium", "Germany", "Austria", "Poland", 
        "Switzerland", "Bosnia", "Croatia", "Croatia (Modern)", 
        "Yugoslavia", "Slovakia (Modern day)", "Slovenia", "Netherlands",
        "Russia"),
      "North America" = c("United States", "Canada", "Mexico", "Cuba"),
      other_level = "Other"))


## ----echo = TRUE, out.width = "120px"--------------------------------------------------------
vtree(td, "Class")


## ----echo = TRUE, out.width = "400px"--------------------------------------------------------
vtree(td, "Class Age", horiz = FALSE)


## ----echo = TRUE, out.width = "400px"--------------------------------------------------------
library(dplyr)
td %>% filter(Gender == "female") %>% vtree(~ Class + Age, horiz = FALSE)


## ----echo = TRUE, out.width = "325px"--------------------------------------------------------
vtree(td, "Region Age", prune = list(Region = c("Europe", "Other")), 
  horiz = FALSE)


## ----echo = TRUE, out.width = "300px"--------------------------------------------------------
vtree(td, "Region Class Gender Age", 
  keep = list(Region = "Europe", Class = "3rd", Gender = "male"))


## ----echo = TRUE, out.width = "300px"--------------------------------------------------------
vtree(td, "Region Class Gender Age",
  keep = list(Region = "Europe", Class = "3rd", Gender = "male"),
  vp = FALSE)


## ----echo = TRUE, out.width = "175px"--------------------------------------------------------
vtree(td, "Region Age",
  prunebelow = list(Region =c("UK and Ireland", "North America", "Other")))


## ----echo = TRUE, out.width = "435px"--------------------------------------------------------
vtree(td, "Class Region", horiz = FALSE, splitwidth = 5)


## ----echo = TRUE, out.width = "435px"--------------------------------------------------------
vtree(td, "Class Region", horiz = FALSE, prunesmaller = 50,
  splitwidth = 5)


## ----echo = TRUE, out.width = "250px"--------------------------------------------------------
vtree(td, "Class embarked", labelvar = c("embarked" = "Port"),
  sameline = TRUE)


## ----echo = TRUE, out.width = "250px"--------------------------------------------------------
vtree(td, "Class", horiz = FALSE, labelnode = list(Class = c(
  "First Class" = "1st", "Second Class" = "2nd", "Third Class" = "3rd")))


## ----echo = TRUE, out.width = "350px"--------------------------------------------------------
vtree(td, "Class is.na:fare", horiz = FALSE)


## ----echo = TRUE, out.width = "400px"--------------------------------------------------------
vtree(td, "fare>50 Class", horiz = FALSE)


## ----echo = TRUE, out.width = "100px"--------------------------------------------------------
vtree(td, summary = "fare", horiz = FALSE)


## ----echo = TRUE, out.width = "425px"--------------------------------------------------------
vtree(td, "Region Class", summary = "fare \nmean %mean%", horiz = FALSE,
  prune = list(Region = "Other", Class = "Crew"), splitwidth = 5)


## ----echo = TRUE, out.width = "400px"--------------------------------------------------------
vtree(td, "Class Gender", horiz = FALSE, pattern = TRUE)


## ----echo = TRUE, out.width = "375px"--------------------------------------------------------
vtree(td, "Region Gender age fare", check.is.na = TRUE, 
  horiz = FALSE, showlegend = TRUE)


## ----Remdesivir,echo = TRUE------------------------------------------------------------------
rem <- build.data.frame(
  c(   "included","elig","consent","randgrp","started"),
  list(0,          0,     1,        0,       0,          13),
  list(0,          1,     0,        0,       0,          3),
  list(1,          1,     1,        1,       1,          193),
  list(1,          1,     1,        1,       0,          4),
  list(1,          1,     1,        2,       1,          191),
  list(1,          1,     1,        2,       0,          8),
  list(1,          1,     1,        3,       1,          200))


## ----Remdesivir2,echo = TRUE-----------------------------------------------------------------
nodelabels <- list(
  included = c("Randomized" = "1", "Excluded" = "0"),
  randgrp=c(
    "Randomized to receive 10 d of remdesivir" = "1",
    "Randomized to receive 5 d of remdesivir" = "2",
    "Randomized to continue standard care" = "3"),
  started=c(
    "Did not start remdesivir" = "0",
    "Started remdesivir" = "1"))


## ----CONSORTtree, echo = TRUE, out.width = "400px"-------------------------------------------
vtree(rem,"included randgrp started",
  labelnode = nodelabels,
  follow = list(included = "1", randgrp = c("1", "2")),
  summary = c(
    "consent=0 \n(Withdrew consent %sum%%var=included%%node=0%)",
    "elig=0 \n(Ineligible %sum%%var=included%%node=0%)"),
  cdigits = 0, showvarnames = FALSE, title = "patients screened",
  horiz = FALSE, fillcolor = "lightsteelblue1", showpct = FALSE)


## --------------------------------------------------------------------------------------------
## This data set is downloaded as a csv from
## https://data.ottawapolice.ca/datasets/76a54b7f7ef44ab1b78dd96f3061c7cb_0/explore
## and saved in the same folder as code.R
library(readr)
data <- read_csv("Data/TSRDCP_TrafficStops_v1_164883526439542163.csv",
                 col_select = c(`How Cleared`,`Driver Gender`,
                                `Driver Race`,`Driver Age`),
                 col_types = cols())


z <- data |>
  rename(race=`Driver Race`,age=`Driver Age`,gender=`Driver Gender`,
         how_cleared=`How Cleared`) |>
  filter(!is.na(race) & !is.na(age) & !is.na(gender)) |>
  filter(gender != "D", age != "-15")

## ----echo = TRUE, out.width = "425px"--------------------------------------------------------
vtree(z, "race=White age gender", splitwidth = Inf, sameline = TRUE,
      summary = "how_cleared=Final \nNo action %pct%",
      title = "Total stops", showlegend = TRUE, showlegendsum = TRUE)

sessionInfo()