options(vtree_count=NULL) ## ----load packages, echo = FALSE, message = FALSE-------------------------------------------- library(vtree) library(vcd) library(eulerr) library(UpSetR) library(readxl) library(dplyr) library(forcats) library(httr) library(stablelearner) options(kableExtra.latex.load_packages = FALSE) library(kableExtra) ## -------------------------------------------------------------------------------------------- knitr::knit_hooks$set(document = function(x) { sub('\\usepackage[]{color}', '\\usepackage{xcolor}', x, fixed = TRUE)}) if (is.null(knitr::opts_knit$get("out.format"))) { knitr::opts_knit$set(out.format ="latex") } ## -------------------------------------------------------------------------------------------- data("titanic") td <- titanic %>% rename(Survived=survived) %>% mutate( Age = ifelse(age<13, "Child", "Adult"), Gender = gender, Class = fct_collapse(class, Crew = c("deck crew", "engineering crew", "restaurant staff", "victualling crew")), Region = fct_collapse(country, "UK and Ireland" = c("England", "Scotland", "Wales", "Ireland", "Northern Ireland", "Channel Islands"), "Europe" = c("Norway", "France", "Finland", "Sweden", "Latvia", "Denmark", "Bulgaria", "Greece", "Hungary", "France", "Spain", "Italy", "Belgium", "Germany", "Austria", "Poland", "Switzerland", "Bosnia", "Croatia", "Croatia (Modern)", "Yugoslavia", "Slovakia (Modern day)", "Slovenia", "Netherlands", "Russia"), "North America" = c("United States", "Canada", "Mexico", "Cuba"), other_level = "Other")) ## ----out.width="425px", message = FALSE------------------------------------------------------ vtree(td, "Region Age", horiz = FALSE) ## ----pRegion function, echo = FALSE---------------------------------------------------------- # # Generate LaTeX string for % in a Region # pRegion <- function(Region,data=td,latex=FALSE) { REGION <- as.character(data$Region); REGION[is.na(REGION)] <- "NA" if (Region=="NA") { group <- REGION==Region contents <- as.character(sum(group,na.rm=TRUE)) contents } else { REGION <- REGION[REGION!="NA"] group <- REGION==Region if (latex) { paste0(sum(group,na.rm=TRUE)," (", round(100*sum(group,na.rm=TRUE)/length(group)),"\\%)") } else { paste0(sum(group,na.rm=TRUE)," (", round(100*sum(group,na.rm=TRUE)/length(group)),"%)") } } } ## ----pRegionAge function, echo = FALSE------------------------------------------------------- # # Generate LaTeX string for % Age within a Region # pRegionAge <- function(Region,Age,data=td) { AGE <- as.character(data$Age); AGE[is.na(AGE)] <- "NA" REGION <- as.character(data$Region); REGION[is.na(REGION)] <- "NA" if (Age=="NA") { group <- AGE==Age & REGION==Region paste0(sum(group,na.rm=TRUE)) } else { group <- AGE==Age & REGION==Region paste0(sum(group,na.rm=TRUE)," (", round(100*sum(group/sum(REGION==Region,na.rm=TRUE),na.rm=TRUE)),"%)") } } ## ----pRegionAgeSurvived function, echo = FALSE----------------------------------------------- # # Generate LaTeX string for % Survived within an Age within a Region # pRegionAgeSurvived <- function(Region,Age,survived,data=td) { REGION <- as.character(data$Region); REGION[is.na(REGION)] <- "NA" q <- data[REGION==Region,] AGE <- as.character(q$Age); AGE[is.na(AGE)] <- "NA" SURVIVED <- q$Survived group <- SURVIVED==survived & AGE==Age if (sum(AGE==Age,na.rm=TRUE)>0) { paste0(sum(group,na.rm=TRUE)," (", round(100*sum(group/sum(AGE==Age,na.rm=TRUE),na.rm=TRUE)),"%)") } else { paste0(sum(group,na.rm=TRUE)) } } ## -------------------------------------------------------------------------------------------- mat <- cbind( " "=c("Child","Adult","NA"), c( pRegionAge("UK and Ireland","Child"), pRegionAge("UK and Ireland","Adult"), pRegionAge("UK and Ireland","NA")), c( pRegionAge("Europe","Child"), pRegionAge("Europe","Adult"), pRegionAge("Europe","NA")), c( pRegionAge("North America","Child"), pRegionAge("North America","Adult"), pRegionAge("North America","NA")), c( pRegionAge("Other","Child"), pRegionAge("Other","Adult"), pRegionAge("Other","NA")), c( pRegionAge("NA","Child"), pRegionAge("NA","Adult"), pRegionAge("NA","NA"))) colnames(mat) <- c( "", pRegion("UK and Ireland"), pRegion("Europe"), pRegion("North America"), pRegion("Other"), pRegion("NA")) ## -------------------------------------------------------------------------------------------- kable(mat, booktabs = TRUE) %>% add_header_above(c("","UK and Ireland","Europe","North America","Other","NA"),bold=TRUE,align="l") %>% column_spec(1, bold = TRUE) ## -------------------------------------------------------------------------------------------- tabRegion <- function(Region,data=td) { mat <- cbind( c("Survived","Did not survive"), c(pRegionAgeSurvived(Region,"Child","yes",data=data),pRegionAgeSurvived(Region,"Child","no",data=data)), c(pRegionAgeSurvived(Region,"Adult","yes",data=data),pRegionAgeSurvived(Region,"Adult","no",data=data)), c(pRegionAgeSurvived(Region,"NA","yes",data=data),pRegionAgeSurvived(Region,"NA","no",data=data))) colnames(mat) <- c( "", pRegionAge(Region,"Child"), pRegionAge(Region,"Adult"), pRegionAge(Region,"NA")) mat } ## -------------------------------------------------------------------------------------------- matUKandIreland <- tabRegion("UK and Ireland") matNorthAmerica <- tabRegion("North America") matEurope <- tabRegion("Europe") matOther <- tabRegion("Other") matNA <- tabRegion("NA") ## -------------------------------------------------------------------------------------------- regionHeader <- function(Region) { if (knitr::opts_knit$get("out.format") %in% c("latex","sweave")) { cat( paste0( "\\vspace{0.2in}\n\\centerline{\\textbf{", Region, "} ", pRegion(Region,latex=TRUE), "}\n\\vspace{0.2in}\n")) # "\\textit{", # pRegion(Region,latex=TRUE), # "}}\n\\vspace{0.2in}\n")) } else { cat( paste0( Region," ",pRegion(Region,latex=FALSE),"\n")) } } ## -------------------------------------------------------------------------------------------- kableRegion <- function(Region) { x <- tabRegion(Region) kable(x, booktabs = TRUE) %>% add_header_above(c("","Child","Adult","NA"),bold=TRUE,line=FALSE,align="l") %>% column_spec(1, bold = TRUE) %>% column_spec(2,width="0.8in") %>% column_spec(3,width="0.8in") %>% column_spec(4,width="0.8in") %>% sub("\\\\toprule", "", .) #%>% #sub("\\\\bottomrule", "", .) } ## ----results="asis"-------------------------------------------------------------------------- regionHeader("UK and Ireland") ## -------------------------------------------------------------------------------------------- kableRegion("UK and Ireland") ## ----results="asis"-------------------------------------------------------------------------- regionHeader("North America") ## -------------------------------------------------------------------------------------------- kableRegion("North America") ## ----results="asis"-------------------------------------------------------------------------- regionHeader("Europe") ## -------------------------------------------------------------------------------------------- kableRegion("Europe") ## ----results="asis"-------------------------------------------------------------------------- regionHeader("Other") ## -------------------------------------------------------------------------------------------- kableRegion("Other") ## ----results="asis"-------------------------------------------------------------------------- regionHeader("NA") ## -------------------------------------------------------------------------------------------- kableRegion("NA") ## ----vtree3variables, out.width = "350px"---------------------------------------------------- vtree(td, "Region Age Survived",sameline=TRUE) ## ----mosaicTitanic,fig.height=6.5,fig.width=6, echo = FALSE---------------------------------- mosaic(Survived~ Region + Age, data = td, highlighting_fill=c("lightblue1","lightblue3")) ## ----eulerExample,fig.height=1.5, echo = FALSE----------------------------------------------- wilkinson2012 <- c(A = 4, B = 6, C = 3, D = 2, E = 7, F = 3, "A&B" = 2, "A&F" = 2, "B&C" = 2, "B&D" = 1, "B&F" = 2, "C&D" = 1, "D&E" = 1, "E&F" = 1, "A&B&F" = 1, "B&C&D" = 1) fit3 <- euler(wilkinson2012, shape = "ellipse") ## ----EulerPlot, fig.height=3----------------------------------------------------------------- plot(fit3, quantities = TRUE) ## ----VennData, echo=FALSE-------------------------------------------------------------------- d2<- build.data.frame( c( "A","B","C","D","E","F"), list( 1, 0, 0, 0, 0, 0, 4), list( 0, 1, 0, 0, 0, 0, 6), list( 0, 0, 1, 0, 0, 0, 3), list( 0, 0, 0, 1, 0, 0, 2), list( 0, 0, 0, 0, 1, 0, 7), list( 0, 0, 0, 0, 0, 1, 3), list( 1, 1, 0, 0, 0, 0, 2), list( 1, 0, 0, 0, 0, 1, 2), list( 0, 1, 1, 0, 0, 0, 2), list( 0, 1, 0, 1, 0, 0, 1), list( 0, 1, 0, 0, 0, 1, 2), list( 0, 0, 1, 1, 0, 0, 1), list( 0, 0, 0, 1, 1, 0, 1), list( 0, 0, 0, 0, 1, 1, 1), list( 1, 1, 0, 0, 0, 1, 1), list( 0, 1, 1, 1, 0, 0, 1)) ## ----UpSetPlot,fig.height=6.5,fig.width=6, echo = FALSE-------------------------------------- upset(d2,nsets=6) ## ----out.width="350px"----------------------------------------------------------------------- vtree(d2,"A B C D E F",Venn=TRUE,showlegend=TRUE) ## ----out.width="400px"----------------------------------------------------------------------- vtree(d2,"A B C D E F",pattern=TRUE,showlegend=TRUE) ## ----echo=TRUE------------------------------------------------------------------------------- library(vtree) library(stablelearner) library(dplyr) library(forcats) data("titanic") td <- titanic %>% rename(Survived = survived) %>% mutate( Age = ifelse(age<13, "Child", "Adult"), Gender = gender, Class = fct_collapse(class, Crew = c("deck crew", "engineering crew", "restaurant staff", "victualling crew")), Region = fct_collapse(country, "UK and Ireland" = c("England", "Scotland", "Wales", "Ireland", "Northern Ireland", "Channel Islands"), "Europe" = c("Norway", "France", "Finland", "Sweden", "Latvia", "Denmark", "Bulgaria", "Greece", "Hungary", "France", "Spain", "Italy", "Belgium", "Germany", "Austria", "Poland", "Switzerland", "Bosnia", "Croatia", "Croatia (Modern)", "Yugoslavia", "Slovakia (Modern day)", "Slovenia", "Netherlands", "Russia"), "North America" = c("United States", "Canada", "Mexico", "Cuba"), other_level = "Other")) ## ----echo = TRUE, out.width = "120px"-------------------------------------------------------- vtree(td, "Class") ## ----echo = TRUE, out.width = "400px"-------------------------------------------------------- vtree(td, "Class Age", horiz = FALSE) ## ----echo = TRUE, out.width = "400px"-------------------------------------------------------- library(dplyr) td %>% filter(Gender == "female") %>% vtree(~ Class + Age, horiz = FALSE) ## ----echo = TRUE, out.width = "325px"-------------------------------------------------------- vtree(td, "Region Age", prune = list(Region = c("Europe", "Other")), horiz = FALSE) ## ----echo = TRUE, out.width = "300px"-------------------------------------------------------- vtree(td, "Region Class Gender Age", keep = list(Region = "Europe", Class = "3rd", Gender = "male")) ## ----echo = TRUE, out.width = "300px"-------------------------------------------------------- vtree(td, "Region Class Gender Age", keep = list(Region = "Europe", Class = "3rd", Gender = "male"), vp = FALSE) ## ----echo = TRUE, out.width = "175px"-------------------------------------------------------- vtree(td, "Region Age", prunebelow = list(Region =c("UK and Ireland", "North America", "Other"))) ## ----echo = TRUE, out.width = "435px"-------------------------------------------------------- vtree(td, "Class Region", horiz = FALSE, splitwidth = 5) ## ----echo = TRUE, out.width = "435px"-------------------------------------------------------- vtree(td, "Class Region", horiz = FALSE, prunesmaller = 50, splitwidth = 5) ## ----echo = TRUE, out.width = "250px"-------------------------------------------------------- vtree(td, "Class embarked", labelvar = c("embarked" = "Port"), sameline = TRUE) ## ----echo = TRUE, out.width = "250px"-------------------------------------------------------- vtree(td, "Class", horiz = FALSE, labelnode = list(Class = c( "First Class" = "1st", "Second Class" = "2nd", "Third Class" = "3rd"))) ## ----echo = TRUE, out.width = "350px"-------------------------------------------------------- vtree(td, "Class is.na:fare", horiz = FALSE) ## ----echo = TRUE, out.width = "400px"-------------------------------------------------------- vtree(td, "fare>50 Class", horiz = FALSE) ## ----echo = TRUE, out.width = "100px"-------------------------------------------------------- vtree(td, summary = "fare", horiz = FALSE) ## ----echo = TRUE, out.width = "425px"-------------------------------------------------------- vtree(td, "Region Class", summary = "fare \nmean %mean%", horiz = FALSE, prune = list(Region = "Other", Class = "Crew"), splitwidth = 5) ## ----echo = TRUE, out.width = "400px"-------------------------------------------------------- vtree(td, "Class Gender", horiz = FALSE, pattern = TRUE) ## ----echo = TRUE, out.width = "375px"-------------------------------------------------------- vtree(td, "Region Gender age fare", check.is.na = TRUE, horiz = FALSE, showlegend = TRUE) ## ----Remdesivir,echo = TRUE------------------------------------------------------------------ rem <- build.data.frame( c( "included","elig","consent","randgrp","started"), list(0, 0, 1, 0, 0, 13), list(0, 1, 0, 0, 0, 3), list(1, 1, 1, 1, 1, 193), list(1, 1, 1, 1, 0, 4), list(1, 1, 1, 2, 1, 191), list(1, 1, 1, 2, 0, 8), list(1, 1, 1, 3, 1, 200)) ## ----Remdesivir2,echo = TRUE----------------------------------------------------------------- nodelabels <- list( included = c("Randomized" = "1", "Excluded" = "0"), randgrp=c( "Randomized to receive 10 d of remdesivir" = "1", "Randomized to receive 5 d of remdesivir" = "2", "Randomized to continue standard care" = "3"), started=c( "Did not start remdesivir" = "0", "Started remdesivir" = "1")) ## ----CONSORTtree, echo = TRUE, out.width = "400px"------------------------------------------- vtree(rem,"included randgrp started", labelnode = nodelabels, follow = list(included = "1", randgrp = c("1", "2")), summary = c( "consent=0 \n(Withdrew consent %sum%%var=included%%node=0%)", "elig=0 \n(Ineligible %sum%%var=included%%node=0%)"), cdigits = 0, showvarnames = FALSE, title = "patients screened", horiz = FALSE, fillcolor = "lightsteelblue1", showpct = FALSE) ## -------------------------------------------------------------------------------------------- ## This data set is downloaded as a csv from ## https://data.ottawapolice.ca/datasets/76a54b7f7ef44ab1b78dd96f3061c7cb_0/explore ## and saved in the same folder as code.R library(readr) data <- read_csv("Data/TSRDCP_TrafficStops_v1_164883526439542163.csv", col_select = c(`How Cleared`,`Driver Gender`, `Driver Race`,`Driver Age`), col_types = cols()) z <- data |> rename(race=`Driver Race`,age=`Driver Age`,gender=`Driver Gender`, how_cleared=`How Cleared`) |> filter(!is.na(race) & !is.na(age) & !is.na(gender)) |> filter(gender != "D", age != "-15") ## ----echo = TRUE, out.width = "425px"-------------------------------------------------------- vtree(z, "race=White age gender", splitwidth = Inf, sameline = TRUE, summary = "how_cleared=Final \nNo action %pct%", title = "Total stops", showlegend = TRUE, showlegendsum = TRUE) sessionInfo()