## Replication material for "missSBM: An R Package for Handling Missing
## Values in the Stochastic Block Model"
## --- Replication of the results saved in "main_analysis.RData" ---

## required libraries
library("missSBM")
library("aricode")
library("tidyverse"); theme_set(theme_bw())
library("igraph")
library("pROC")
library("parallel")
library("future")

options(future.fork.enable = TRUE) # so it can be used from Rstudio
future::plan("multicore", workers = 10)

## import blog data
data("frenchblog2007", package = "missSBM")
frenchblog2007 <- delete_vertices(frenchblog2007, which(degree(frenchblog2007) == 0))
blog  <- as_adj(frenchblog2007)
party <- vertex.attributes(frenchblog2007)$party

## SBM with no missing edge or node
blocks <- 1:18
sbm_full <- estimateMissSBM(blog, blocks, "node")

set.seed(03052008)

## sampling in the original network to get partly a observed blog network
samplingParameters <- ifelse(sbm_full$bestModel$fittedSBM$blockProp <  0.1, 0.2, 0.8)
blog_obs <- observeNetwork(adjacencyMatrix = blog, sampling = "block-node",
  parameters = samplingParameters,
  clusters = sbm_full$bestModel$fittedSBM$memberships)

## try MAR
sbm_node <- estimateMissSBM(blog_obs, blocks, "node", control = list(iterates = 5))

## try MNAR with block-dyad sampling
sbm_block <- estimateMissSBM(blog_obs, blocks, "block-node", control = list(iterates = 5))

## Imputation and AUC 
cl0 <- sbm_full$bestModel$fittedSBM$memberships
nBlocks <- sbm_full$bestModel$fittedSBM$nbBlocks

future::plan("sequential")

res_auc <- mclapply(1:500, function(i) {
  subGraph <- observeNetwork(blog, "block-node", runif(nBlocks), cl0)
  missing <- which(as.matrix(is.na(subGraph)))
  true_dyads <- blog[missing]
  sbm_block <- estimateMissSBM(subGraph, nBlocks, "block-node", control = list(cores = 1, trace = 0))
  imputed_dyads <- sbm_block$bestModel$imputedNetwork[missing]
  c(rate = 1 - length(missing)/length(blog), auc  = auc(true_dyads, imputed_dyads, quiet = TRUE))
}, mc.cores = 10)

## consider a subgraph and a nodal covariate
future::plan("multicore", workers = 10)
blog_subgraph <- frenchblog2007 %>%
  igraph::induced_subgraph(V(frenchblog2007)$party %in%
  c( "right", "left"))
blog_subgraph <- delete_vertices(blog_subgraph,
  which(degree(blog_subgraph) ==  0))
dummy_party <- (V(blog_subgraph)$party == "left") * 1

## observe this graph according to this covariate
blog_subgraph_obs <- blog_subgraph %>% as_adj() %>%
  missSBM::observeNetwork(sampling = "covar-node", parameters = 3,
    covariates = list(dummy_party))

## fit SBM on the fully observed, taking the covariate into account in the SBM
blocks <- 2:8
sbm_covar_full <- blog_subgraph %>% as_adj() %>%
  estimateMissSBM(blocks, "node", covariates =  list(dummy_party), 
    control = list(useCov = TRUE, iterates = 2))

## fit SBM on the partly observed, taking the covariate into account in the SBM and the sampling
sbm_covar1 <- estimateMissSBM(blog_subgraph_obs, blocks, "covar-node",
  covariates =  list(dummy_party), control = list(useCov = TRUE, iterates = 2))

## fit SBM on the partly observed, taking the covariate into account only in the sampling
sbm_covar2 <- estimateMissSBM(blog_subgraph_obs, blocks, "covar-node",
  covariates = list(dummy_party), control = list(useCov = FALSE, iterates = 2))

## fit SBM on the partly observed, taking the covariate into account only in the SBM
sbm_covar3 <- estimateMissSBM(blog_subgraph_obs, blocks, "node",
  covariates =  list(dummy_party), control = list(useCov = TRUE, iterates = 2))

## fit SBM on the partly observed, not taking the covariate into account
sbm_covar4 <- estimateMissSBM(blog_subgraph_obs, blocks, "node",
  control = list(useCov = FALSE, iterates = 2))

sbm_covar_full <- as_adj(blog_subgraph) %>%
   estimateMissSBM(blocks, "node", covariates =  list(dummy_party))

future::plan("sequential")

## Save output for reproducibility in the Rnw
save.image(file = "main_analysis.RData")

