# load package "laeken"
library("laeken")
# load example data for the European Union Statistics
# on Income and Living Conditions (EU-SILC)
data("eusilc")
head(eusilc[, 1:10], 3)
# load example data for the Structure of Earnings Survey (SES)
data("ses")
head(ses[, 1:7], 3)
# estimate the at-risk-of poverty rate
arpr("eqIncome", weights = "rb050", data = eusilc)
# estimate the dispersion around the at-risk-of poverty rate
arpr("eqIncome", weights = "rb050", p = c(0.4, 0.5, 0.7), data = eusilc)
# estimate the quintile share ratio
qsr("eqIncome", weights = "rb050", data = eusilc)
# estimate the relative median at-risk-of-poverty gap
rmpg("eqIncome", weights = "rb050", data = eusilc)
# estimate the Gini coefficient
gini("eqIncome", weights = "rb050", data = eusilc)
# estimate the gender pay gap based on weighted means
gpg("earningsHour", gender = "sex", weigths = "weights", data = ses)
# estimate the gender pay gap based on weighted medians
gpg("earningsHour", gender = "sex", weigths = "weights", data = ses,
method = "median")
# estimate the Gini coefficient by supplying a data frame
gini("eqIncome", weights = "rb050", data = eusilc)
# estimate the Gini coefficient by supplying vectors
gini(eusilc$eqIncome, weights = eusilc$rb050)
# estimate the at-risk-of poverty rate broken down by region
a <- arpr("eqIncome", weights = "rb050", breakdown = "db040", data = eusilc)
a
# take a subset of the object computed above
subset(a, strata = c("Lower Austria", "Vienna"))
# replace the equivalized disposable income of the household with the highest
# income with a large outlier and store the resulting vector in a new variable
hID <- eusilc$db030[which.max(eusilc$eqIncome)]
eqIncomeOut <- eusilc$eqIncome
eqIncomeOut[eusilc$db030 == hID] <- 10000000
# create a data set that only contains the equivalized disposable income
# with the outlier and the sample weights on the household level
keep <- !duplicated(eusilc$db030)
eusilcH <- data.frame(eqIncome=eqIncomeOut, db090=eusilc$db090)[keep,]
# produce a Pareto quantile plot
# select the threshold interactively by clicking on a data point: this
# process can be repeated until the interactive session is terminated
# (typically by a secondary mouse click or by pressing Esc)
paretoQPlot(eusilcH$eqIncome, w = eusilcH$db090)
# estimate the threshold for the Pareto distribution with Van Kerm's formula
ts <- paretoScale(eusilcH$eqIncome, w = eusilcH$db090)
ts
# compute the weighted integrated squared error (wISE) estimator for the shape
# parameter of the Pareto distribution by supplying either the number of
# observations in the tail or the threshold
thetaISE(eusilcH$eqIncome, k = ts$k, w = eusilcH$db090)
thetaISE(eusilcH$eqIncome, x0 = ts$x0, w = eusilcH$db090)
# compute the weighted partial density component (wPDC) estimator for the shape
# parameter of the Pareto distribution by supplying either the number of
# observations in the tail or the threshold
thetaPDC(eusilcH$eqIncome, k = ts$k, w = eusilcH$db090)
thetaPDC(eusilcH$eqIncome, x0 = ts$x0, w = eusilcH$db090)
# a better way to fit the Pareto distribution that returns an object
# containing all necessary information for further analysis
fit <- paretoTail(eqIncomeOut, k = ts$k, w = eusilc$db090,
groups = eusilc$db030)
# produce a diagnostic Pareto quantile plot
plot(fit)
# downweight the detected outliers and recalibrate the remaining observations
w <- reweightOut(fit, calibVars(eusilc$db040))
gini(eqIncomeOut, w)
# replace the detected outliers with values drawn from the fitted distribution
set.seed(123)
eqIncomeRN <- replaceOut(fit)
gini(eqIncomeRN, weights = eusilc$rb050)
# shrink outliers to the theoretical quantile used for outlier detection
eqIncomeSN <- shrinkOut(fit)
gini(eqIncomeSN, weights = eusilc$rb050)
# compute the standard estimate for comparison
gini(eqIncomeOut, weights = eusilc$rb050)
# estimate the at-risk-of poverty rate, including variance estimates
# and confidence intervals obtained via naive bootstrap
arpr("eqIncome", weights = "rb050", design = "db040", cluster = "db030",
data = eusilc, var = "bootstrap", bootType = "naive", seed = 1234)
# estimate the at-risk-of poverty rate, including variance estimates and
# confidence intervals obtained via calibrated bootstrap
aux <- cbind(calibVars(eusilc$db040), calibVars(eusilc$rb090))
arpr("eqIncome", weights = "rb050", design = "db040", cluster = "db030",
data = eusilc, var = "bootstrap", X = aux, seed = 1234)