This function allows to compute a nested hierarchy from an character vector or a (named) list.
hier_compute(inp, dim_spec = NULL, root = NULL, method = "len", as = "network")
a character vector (for methods "len"
and "endpos"
containing codes of a hierarchical variables or a list for
method list
. In the latter case, the input is expected to be a
named list where each list-element contains the codes belonging to the
node that has the name of this specific list element. In the examples
below, the required input formats are further explained.
an (integerish) vector containing either the length
(in terms of characters) for each level or the end-positions of
these levels. In the latter-case, one needs to set argument
method
to "endpos"
. This argument is ignored in case the
hierarchy should be created from a named list.
NULL
or a scalar characer specifying the name
of the overall total in case it is not encoded at the first
positions of dim
.
either "len"
(the default) or "endpos"
"len"
: the number of characters for each of the levels
needs to be specified
"endpos"
: the end-positions for each levels need to be fixed
"list"
: the end-positions for each levels need to be fixed
(character) specifies the type of the return object. Possible choices are:
"network"
: the default; a data.table
as network. The
table consists of two columns where the "root"
column defines the
name of parent node to the label in the "leaf"
column.
"df"
: a data.frame
in "@; label"
-format.
"dt"
: a data.table
in "@; label"
-format.
"code"
: returns the R-code that is required to build
the tree
"sdc"
: the tree is structured as a list
"argus"
: suitable input for hier_export()
to
write "hrc"
-files for tau argus.
"json"
: a character-vector encoded as json-string.
a hierarchical data structure depending on choice of
argument as
## Example Regional Codes (NUTS)
# digits 1-2 (len=2, endpos=2) --> level 1
# digit 3 (len=1, endpos=3) --> level 2
# digits 4-5 (len=2, endpos=5) -> level 3
# all strings have equal length but total is not encoded in these values
geo_m <- c(
"01051", "01053", "01054", "01055",
"01056", "01057", "01058", "01059", "01060",
"01061", "01062",
"02000",
"03151", "03152", "03153", "03154", "03155", "03156", "03157", "03158",
"03251", "03252", "03254", "03255", "03256", "03257",
"03351", "03352", "03353", "03354", "03355",
"03356", "03357", "03358", "03359",
"03360", "03361",
"03451", "03452", "03453", "03454", "03455", "03456",
"10155")
a <- hier_compute(
inp = geo_m,
dim_spec = c(2, 3, 5),
root = "Tot",
method = "endpos"
)
b <- hier_compute(
inp = geo_m,
dim_spec = c(2, 1, 2),
root = "Tot",
method = "len"
)
identical(
hier_convert(a, as = "df"),
hier_convert(b, as = "df")
)
#> [1] TRUE
# total is contained in the first 3 positions of the input values
# --> we need to set name of the overall total (argument "root")
# to NULL (the default)
geo_m_with_tot <- paste0("Tot", geo_m)
a <- hier_compute(
inp = geo_m_with_tot,
dim_spec = c(3, 2, 1, 2),
method = "len"
)
b <- hier_compute(
inp = geo_m_with_tot,
dim_spec = c(3, 5, 6, 8),
method = "endpos"
)
identical(a, b)
#> [1] TRUE
# example where inputs have unequal length
# the overall total is not included in input vector
yae_h <- c(
"1.1.1.", "1.1.2.",
"1.2.1.", "1.2.2.", "1.2.3.", "1.2.4.", "1.2.5.", "1.3.1.",
"1.3.2.", "1.3.3.", "1.3.4.", "1.3.5.",
"1.4.1.", "1.4.2.", "1.4.3.", "1.4.4.", "1.4.5.",
"1.5.", "1.6.", "1.7.", "1.8.", "1.9.", "2.", "3.")
a <- hier_compute(
inp = yae_h,
dim_spec = c(2, 4, 6),
root = "Tot",
method = "endpos"
)
b <- hier_compute(
inp = yae_h,
dim_spec = c(2, 2, 2),
root = "Tot",
method = "len"
)
identical(
hier_convert(a, as = "df"),
hier_convert(b, as = "df")
)
#> [1] TRUE
# Same example, but overall total is contained in the first 3 positions
# of the input values --> argument "root" needs to be
# set to NULL (the default)
yae_h_with_tot <- paste0("Tot", yae_h)
a <- hier_compute(
inp = yae_h_with_tot,
dim_spec = c(3, 2, 2, 2),
method = "len",
)
b <- hier_compute(
inp = yae_h_with_tot,
dim_spec = c(3, 5, 7, 9),
method = "endpos"
)
identical(a, b)
#> [1] TRUE
# An example using a list as input (same as above)
# Hierarchy: digits 1-2 (nuts1), digit 3 (nut2), digits 4-5 (nuts3)
# The order of the list-elements is not important but the
# names of input-list correspond to (subtotal/level) names
geo_ll <- list()
geo_ll[["Total"]] <- c("01", "02", "03", "10")
geo_ll[["010"]] <- c(
"01051", "01053", "01054", "01055",
"01056", "01057", "01058", "01059",
"01060", "01061", "01062"
)
geo_ll[["031"]] <- c(
"03151", "03152", "03153", "03154",
"03155", "03156", "03157", "03158"
)
geo_ll[["032"]] <- c(
"03251", "03252", "03254",
"03255", "03256", "03257"
)
geo_ll[["033"]] <- c(
"03351", "03352", "03353", "03354", "03355",
"03356", "03357", "03358", "03359",
"03360", "03361"
)
geo_ll[["034"]] <- c(
"03451", "03452", "03453",
"03454", "03455","03456"
)
geo_ll[["01"]] <- "010"
geo_ll[["02"]] <- "020"
geo_ll[["020"]] <- "02000"
geo_ll[["03"]] <- c("031", "032", "033", "034")
geo_ll[["10"]] <- "101"
geo_ll[["101"]] <- "10155"
d <- hier_compute(
inp = geo_ll,
root = "Total",
method = "list"
); d
#> Argument 'dim_spec' is ignored when constructing a hierarchy from a nested list.
#> root leaf level
#> 1: Total Total 1
#> 2: Total 01 2
#> 3: Total 02 2
#> 4: Total 03 2
#> 5: Total 10 2
#> 6: 01 010 3
#> 7: 010 01051 4
#> 8: 010 01053 4
#> 9: 010 01054 4
#> 10: 010 01055 4
#> 11: 010 01056 4
#> 12: 010 01057 4
#> 13: 010 01058 4
#> 14: 010 01059 4
#> 15: 010 01060 4
#> 16: 010 01061 4
#> 17: 010 01062 4
#> 18: 02 020 3
#> 19: 020 02000 4
#> 20: 03 031 3
#> 21: 03 032 3
#> 22: 03 033 3
#> 23: 03 034 3
#> 24: 031 03151 4
#> 25: 031 03152 4
#> 26: 031 03153 4
#> 27: 031 03154 4
#> 28: 031 03155 4
#> 29: 031 03156 4
#> 30: 031 03157 4
#> 31: 031 03158 4
#> 32: 032 03251 4
#> 33: 032 03252 4
#> 34: 032 03254 4
#> 35: 032 03255 4
#> 36: 032 03256 4
#> 37: 032 03257 4
#> 38: 033 03351 4
#> 39: 033 03352 4
#> 40: 033 03353 4
#> 41: 033 03354 4
#> 42: 033 03355 4
#> 43: 033 03356 4
#> 44: 033 03357 4
#> 45: 033 03358 4
#> 46: 033 03359 4
#> 47: 033 03360 4
#> 48: 033 03361 4
#> 49: 034 03451 4
#> 50: 034 03452 4
#> 51: 034 03453 4
#> 52: 034 03454 4
#> 53: 034 03455 4
#> 54: 034 03456 4
#> 55: 10 101 3
#> 56: 101 10155 4
#> root leaf level
## Reproduce example from above with input defined as named list
yae_ll <- list()
yae_ll[["Total"]] <- c("1.", "2.", "3.")
yae_ll[["1."]] <- paste0("1.", 1:9, ".")
yae_ll[["1.1."]] <- paste0("1.1.", 1:2, ".")
yae_ll[["1.2."]] <- paste0("1.2.", 1:5, ".")
yae_ll[["1.3."]] <- paste0("1.3.", 1:5, ".")
yae_ll[["1.4."]] <- paste0("1.4.", 1:6, ".")
# return result as data.frame
d <- hier_compute(
inp = yae_ll,
root = "Total",
method = "list",
as = "df"
); d
#> Argument 'dim_spec' is ignored when constructing a hierarchy from a nested list.
#> level name
#> 1 @ Total
#> 2 @@ 1.
#> 3 @@@ 1.1.
#> 4 @@@@ 1.1.1.
#> 5 @@@@ 1.1.2.
#> 6 @@@ 1.2.
#> 7 @@@@ 1.2.1.
#> 8 @@@@ 1.2.2.
#> 9 @@@@ 1.2.3.
#> 10 @@@@ 1.2.4.
#> 11 @@@@ 1.2.5.
#> 12 @@@ 1.3.
#> 13 @@@@ 1.3.1.
#> 14 @@@@ 1.3.2.
#> 15 @@@@ 1.3.3.
#> 16 @@@@ 1.3.4.
#> 17 @@@@ 1.3.5.
#> 18 @@@ 1.4.
#> 19 @@@@ 1.4.1.
#> 20 @@@@ 1.4.2.
#> 21 @@@@ 1.4.3.
#> 22 @@@@ 1.4.4.
#> 23 @@@@ 1.4.5.
#> 24 @@@@ 1.4.6.
#> 25 @@@ 1.5.
#> 26 @@@ 1.6.
#> 27 @@@ 1.7.
#> 28 @@@ 1.8.
#> 29 @@@ 1.9.
#> 30 @@ 2.
#> 31 @@ 3.