################################################################################
#                                                                              #
#   econet: An R package for parameter-dependent network centrality measures   #
#                                                                              #
#                     Marco Battaglini (Cornell University)                    #
#           Valerio Leone Sciabolazza (University of Naples Parthenope)        #
#                     Eleonora Patacchini (Cornell University)                 #
#                         Sida Peng (Microsoft University)                     #
#                                                                              #
################################################################################

# Load library
library("econet")

set.seed(2)

#-------------------------------------------------------------------------------
#
# Exercise 1: Katz-Bonacich centrality with parameter constant across agents
#
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
# Implementing Model A

# Select data from the 111h congress
data("a_db_alumni", package = "econet")
data("G_alumni_111", package = "econet")
db_model_A <- a_db_alumni
G_model_A <- a_G_alumni_111

# Specify which variables are factors
are_factors <- c("party", "gender", "nchair", "isolate")
db_model_A[are_factors] <- lapply(db_model_A[are_factors] ,factor)

# Divide Dependent variable by 1e + 06
db_model_A$PAC <- db_model_A$PAC/1e+06

# Specify formula
f_model_A <- formula("PAC ~ gender + party + nchair + isolate")

# Specify starting values
starting <- c(alpha = 0.47325, beta_gender1 = -0.26991, beta_party1 = 0.55883,
              beta_nchair1 = -0.17409, beta_isolate1 = 0.18813, phi = 0.21440)

# Run net_dep
lim_model_A <- net_dep(formula = f_model_A, data = db_model_A, G = G_model_A,
                       model = "model_A", estimation = "NLLS",
                       hypothesis = "lim", start.val = starting)

# Print results
summary(lim_model_A)
head(lim_model_A$centrality)

#-------------------------------------------------------------------------------
# Implementing Model B

# Select data from the 111th congress
data("db_cosponsor", package = "econet")
data("G_alumni_111", package = "econet")
db_model_B <- db_cosponsor
G_model_B <- G_cosponsor_111
G_exclusion_restriction <- G_alumni_111

# Specify which variables are factors
are_factors <- c("gender", "party", "nchair")
db_model_B[are_factors] <- lapply(db_model_B[are_factors] , factor)

# Specify formula
f_model_B <- formula("les ~gender + party + nchair")

# Specify starting values
starting <- c(alpha = 0.23952, beta_gender1 = -0.22024, beta_party1 = 0.42947,
              beta_nchair1 = 3.09615, phi = 0.40038, unobservables = 0.07714)

# Run net_dep
lim_model_B <- net_dep(formula = f_model_B, data = db_model_B, G = G_model_B,
                       model = "model_B", estimation = "NLLS",
                       hypothesis = "lim", endogeneity = TRUE,
                       correction = "heckman", first_step = "standard",
                       exclusion_restriction = G_exclusion_restriction,
                       start.val = starting)

# Print results
summary(lim_model_B)
summary(lim_model_B, print = "first.step")
head(lim_model_B$centrality)

# Bootstrap
boot_lim_estimate <- boot(object = lim_model_B, hypothesis = "lim",
                          group = NULL, niter = 1000, weights = FALSE)
boot_lim_estimate

# Quantify Marginal Effects
quantify(object = lim_model_B)

# Plot figure 1
library("ggplot2")
# Associate congressmen centrality to party affiliation
df <- data.frame(parameter.dependent = lim_model_B$centrality,
                 party = lim_model_B$second_step$data$party1)

# Specify party categories and colors
df[, "party"] <- ifelse(df[, "party"] == 0, "Republican", "Democrat")
df[, "colour"] <- ifelse(df[, "party"] == "Republican", "red", "blue")

# Plot the distributions
ggplot(data = df, aes(parameter.dependent), colour = df[, "colour"]) +
  geom_histogram(binwidth = 0.25, aes(fill = factor(colour)), col = I("black"),
                 alpha=I(.7)) + facet_grid(party ~.) + theme_bw() +
  labs(x = "Parameter-Dependent Centrality", y = "Frequency") +
  scale_fill_manual(values = c("blue", "red")) + theme(legend.position = "none")

#-------------------------------------------------------------------------------
#
# Exercise 2: Katz-Bonacich centrality with heterogenous by node parameter
#
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
# Implementing Model A

# Create gender variable
z <- as.numeric(as.character(db_model_A[, "gender"]))

# Specify formula
f_het_model_A <- formula("PAC ~ party + nchair + isolate")

# Specify starting values
starting <- c(alpha = 0.44835, beta_party1 = 0.56004, beta_nchair1 = -0.16349,
              beta_isolate1 = 0.21011, beta_z = -0.26015, phi = 0.34212,
              gamma = -0.49960)

# Run net_dep
het_model_A <- net_dep(formula = f_het_model_A, data = db_model_A,
                       G = G_model_A, model = "model_A", estimation = "NLLS",
                       hypothesis = "het", z = z, start.val = starting)


# Print results
summary(het_model_A)
head(het_model_A$centrality)

#-------------------------------------------------------------------------------
# Implementing Model B

# Create gender variable
z <- as.numeric(as.character(db_model_B[, "gender"]))

# Specify formula
f_het_model_B <- formula("les ~ party + nchair")

# Specify starting values
starting <- c(alpha = 0.23952, beta_party1 = 0.42947, beta_nchair1 = 3.09615,
              beta_z = -0.12749, theta_0 = 0.42588, theta_1 = 0.08007)

# Run net_dep
het_model_B_l <- net_dep(formula = f_het_model_B, data = db_model_B,
                         G = G_model_B, model = "model_B", estimation = "NLLS",
                         hypothesis = "het_l", z = z, start.val = starting)

# Specify starting values
starting <- c(alpha = 0.04717, beta_party1 = 0.51713, beta_nchair1 = 3.12683,
              beta_z = 0.01975, eta_0 = 1.02789, eta_1 = 2.71825)

# Run net_dep
het_model_B_r <- net_dep(formula = f_het_model_B, data = db_model_B,
                         G = G_model_B, model = "model_B", estimation = "NLLS",
                         hypothesis = "het_r", z = z, start.val = starting)

# Print results
summary(het_model_B_l)
summary(het_model_B_r)

#-------------------------------------------------------------------------------
#
# Exercise 3: Katz-Bonacich centrality with heterogenous by link parameter
#
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
# Implementing Model B

# Specify partition vector
z <- as.numeric(as.character(db_model_B[, "party"]))

# Specify partition vector
starting <- c(alpha = 0.242486, beta_gender1 = -0.229895, beta_party1 = 0.42848,
              beta_nchair1 = 3.0959, phi_within = 0.396371,
              phi_between = 0.414135)

# Run net_dep
party_model_B <- net_dep(formula = f_model_B, data = db_model_B, G = G_model_B,
                         model = "model_B", estimation = "NLLS",
                         hypothesis = "par", z = z, start.val = starting)

# Print results
summary(party_model_B)

#-------------------------------------------------------------------------------
#
# Centrality measure comparison
#
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
# Implementing Model B

# Specify starting values
starting <- c(alpha = 0.214094, beta_gender1 = -0.212706,
              beta_party1 = 0.478518, beta_nchair1 = 3.09234,
              beta_betweenness = 7.06287e-05, phi = 0.344787)

# Horse race
horse_model_B <- horse_race(formula = f_model_B, centralities = "betweenness",
                            directed = TRUE, weighted = TRUE,
                            normalization = NULL, data = db_model_B,
                            G = G_model_B, model = "model_B",
                            estimation = "NLLS", start.val = starting)

# Print results
summary(horse_model_B, centrality = "betweenness")
summary(horse_model_B)
head(horse_model_B$centrality)