This function allows to compute a nested hierarchy from an character vector or a (named) list.

hier_compute(inp, dim_spec = NULL, root = NULL, method = "len", as = "network")

Arguments

inp

a character vector (for methods "len" and "endpos" containing codes of a hierarchical variables or a list for method list. In the latter case, the input is expected to be a named list where each list-element contains the codes belonging to the node that has the name of this specific list element. In the examples below, the required input formats are further explained.

dim_spec

an (integerish) vector containing either the length (in terms of characters) for each level or the end-positions of these levels. In the latter-case, one needs to set argument method to "endpos". This argument is ignored in case the hierarchy should be created from a named list.

root

NULL or a scalar characer specifying the name of the overall total in case it is not encoded at the first positions of dim.

method

either "len" (the default) or "endpos"

  • "len": the number of characters for each of the levels needs to be specified

  • "endpos": the end-positions for each levels need to be fixed

  • "list": the end-positions for each levels need to be fixed

as

(character) specifies the type of the return object. Possible choices are:

  • "network": the default; a data.table as network. The table consists of two columns where the "root" column defines the name of parent node to the label in the "leaf" column.

  • "df": a data.frame in "@; label"-format.

  • "dt": a data.table in "@; label"-format.

  • "code": returns the R-code that is required to build the tree

  • "sdc": the tree is structured as a list

  • "argus": suitable input for hier_export() to write "hrc"-files for tau argus.

  • "json": a character-vector encoded as json-string.

Value

a hierarchical data structure depending on choice of argument as

Examples

## Example Regional Codes (NUTS)
# digits 1-2 (len=2, endpos=2) --> level 1
# digit 3 (len=1, endpos=3) --> level 2
# digits 4-5 (len=2, endpos=5) -> level 3

# all strings have equal length but total is not encoded in these values
geo_m <- c(
  "01051", "01053", "01054", "01055",
  "01056", "01057", "01058", "01059", "01060",
  "01061", "01062",
  "02000",
  "03151", "03152", "03153", "03154", "03155", "03156", "03157", "03158",
  "03251", "03252", "03254", "03255", "03256", "03257",
  "03351", "03352", "03353", "03354", "03355",
  "03356", "03357", "03358", "03359",
  "03360", "03361",
  "03451", "03452", "03453", "03454", "03455", "03456",
  "10155")

a <- hier_compute(
  inp = geo_m,
  dim_spec = c(2, 3, 5),
  root = "Tot",
  method = "endpos"
)
b <- hier_compute(
  inp = geo_m,
  dim_spec = c(2, 1, 2),
  root = "Tot",
  method = "len"
)
identical(
  hier_convert(a, as = "df"),
  hier_convert(b, as = "df")
)
#> [1] TRUE

# total is contained in the first 3 positions of the input values
# --> we need to set name of the overall total (argument "root")
# to NULL (the default)
geo_m_with_tot <- paste0("Tot", geo_m)
a <- hier_compute(
  inp = geo_m_with_tot,
  dim_spec = c(3, 2, 1, 2),
  method = "len"
)
b <- hier_compute(
  inp = geo_m_with_tot,
  dim_spec = c(3, 5, 6, 8),
  method = "endpos"
)
identical(a, b)
#> [1] TRUE

# example where inputs have unequal length
# the overall total is not included in input vector
yae_h <- c(
  "1.1.1.", "1.1.2.",
  "1.2.1.", "1.2.2.", "1.2.3.", "1.2.4.", "1.2.5.", "1.3.1.",
  "1.3.2.", "1.3.3.", "1.3.4.", "1.3.5.",
  "1.4.1.", "1.4.2.", "1.4.3.", "1.4.4.", "1.4.5.",
  "1.5.", "1.6.", "1.7.", "1.8.", "1.9.", "2.", "3.")

a <- hier_compute(
  inp = yae_h,
  dim_spec = c(2, 4, 6),
  root = "Tot",
  method = "endpos"
)
b <- hier_compute(
  inp = yae_h,
  dim_spec = c(2, 2, 2),
  root = "Tot",
  method = "len"
)
identical(
  hier_convert(a, as = "df"),
  hier_convert(b, as = "df")
)
#> [1] TRUE

# Same example, but overall total is contained in the first 3 positions
# of the input values --> argument "root" needs to be
# set to NULL (the default)
yae_h_with_tot <- paste0("Tot", yae_h)
a <- hier_compute(
  inp = yae_h_with_tot,
  dim_spec = c(3, 2, 2, 2),
  method = "len",
)
b <- hier_compute(
  inp = yae_h_with_tot,
  dim_spec = c(3, 5, 7, 9),
  method = "endpos"
)
identical(a, b)
#> [1] TRUE

# An example using a list as input (same as above)
# Hierarchy: digits 1-2 (nuts1), digit 3 (nut2), digits 4-5 (nuts3)
# The order of the list-elements is not important but the
# names of input-list correspond to (subtotal/level) names
geo_ll <- list()
geo_ll[["Total"]] <- c("01", "02", "03", "10")
geo_ll[["010"]]   <- c(
  "01051", "01053", "01054", "01055",
  "01056", "01057", "01058", "01059",
  "01060", "01061", "01062"
)
geo_ll[["031"]]   <- c(
  "03151", "03152", "03153", "03154",
  "03155", "03156", "03157", "03158"
)
geo_ll[["032"]]   <- c(
  "03251", "03252", "03254",
  "03255", "03256", "03257"
)
geo_ll[["033"]]   <- c(
  "03351", "03352", "03353", "03354", "03355",
  "03356", "03357", "03358", "03359",
  "03360", "03361"
)
geo_ll[["034"]]   <- c(
  "03451", "03452", "03453",
  "03454", "03455","03456"
)
geo_ll[["01"]]    <- "010"
geo_ll[["02"]]    <- "020"
geo_ll[["020"]]   <- "02000"
geo_ll[["03"]]    <- c("031", "032", "033", "034")
geo_ll[["10"]]    <- "101"
geo_ll[["101"]]   <- "10155"

d <- hier_compute(
  inp = geo_ll,
  root = "Total",
  method = "list"
); d
#> Argument 'dim_spec' is ignored when constructing a hierarchy from a nested list.
#>      root  leaf level
#>  1: Total Total     1
#>  2: Total    01     2
#>  3: Total    02     2
#>  4: Total    03     2
#>  5: Total    10     2
#>  6:    01   010     3
#>  7:   010 01051     4
#>  8:   010 01053     4
#>  9:   010 01054     4
#> 10:   010 01055     4
#> 11:   010 01056     4
#> 12:   010 01057     4
#> 13:   010 01058     4
#> 14:   010 01059     4
#> 15:   010 01060     4
#> 16:   010 01061     4
#> 17:   010 01062     4
#> 18:    02   020     3
#> 19:   020 02000     4
#> 20:    03   031     3
#> 21:    03   032     3
#> 22:    03   033     3
#> 23:    03   034     3
#> 24:   031 03151     4
#> 25:   031 03152     4
#> 26:   031 03153     4
#> 27:   031 03154     4
#> 28:   031 03155     4
#> 29:   031 03156     4
#> 30:   031 03157     4
#> 31:   031 03158     4
#> 32:   032 03251     4
#> 33:   032 03252     4
#> 34:   032 03254     4
#> 35:   032 03255     4
#> 36:   032 03256     4
#> 37:   032 03257     4
#> 38:   033 03351     4
#> 39:   033 03352     4
#> 40:   033 03353     4
#> 41:   033 03354     4
#> 42:   033 03355     4
#> 43:   033 03356     4
#> 44:   033 03357     4
#> 45:   033 03358     4
#> 46:   033 03359     4
#> 47:   033 03360     4
#> 48:   033 03361     4
#> 49:   034 03451     4
#> 50:   034 03452     4
#> 51:   034 03453     4
#> 52:   034 03454     4
#> 53:   034 03455     4
#> 54:   034 03456     4
#> 55:    10   101     3
#> 56:   101 10155     4
#>      root  leaf level

## Reproduce example from above with input defined as named list
yae_ll <- list()
yae_ll[["Total"]] <- c("1.", "2.", "3.")
yae_ll[["1."]] <- paste0("1.", 1:9, ".")
yae_ll[["1.1."]] <- paste0("1.1.", 1:2, ".")
yae_ll[["1.2."]] <- paste0("1.2.", 1:5, ".")
yae_ll[["1.3."]] <- paste0("1.3.", 1:5, ".")
yae_ll[["1.4."]] <- paste0("1.4.", 1:6, ".")

# return result as data.frame
d <- hier_compute(
  inp = yae_ll,
  root = "Total",
  method = "list",
  as = "df"
); d
#> Argument 'dim_spec' is ignored when constructing a hierarchy from a nested list.
#>    level   name
#> 1      @  Total
#> 2     @@     1.
#> 3    @@@   1.1.
#> 4   @@@@ 1.1.1.
#> 5   @@@@ 1.1.2.
#> 6    @@@   1.2.
#> 7   @@@@ 1.2.1.
#> 8   @@@@ 1.2.2.
#> 9   @@@@ 1.2.3.
#> 10  @@@@ 1.2.4.
#> 11  @@@@ 1.2.5.
#> 12   @@@   1.3.
#> 13  @@@@ 1.3.1.
#> 14  @@@@ 1.3.2.
#> 15  @@@@ 1.3.3.
#> 16  @@@@ 1.3.4.
#> 17  @@@@ 1.3.5.
#> 18   @@@   1.4.
#> 19  @@@@ 1.4.1.
#> 20  @@@@ 1.4.2.
#> 21  @@@@ 1.4.3.
#> 22  @@@@ 1.4.4.
#> 23  @@@@ 1.4.5.
#> 24  @@@@ 1.4.6.
#> 25   @@@   1.5.
#> 26   @@@   1.6.
#> 27   @@@   1.7.
#> 28   @@@   1.8.
#> 29   @@@   1.9.
#> 30    @@     2.
#> 31    @@     3.