The function used to carry out this process is defined as follows:

dd_oag_compute(data, age_span = c(1, 5))1

To show how this function works, we shall use the oag_compute_df dataset that is embedded on this package. This is an example of an abridged series so age_span == 5.

## Load the packages required
library(rddharmony)
library(kableExtra)
library(dplyr)
library(purrr)

## Create a function to be used to generate the table output
tab_output <- function(tab){
kable(tab, booktabs=TRUE, align = "c",table.envir = "capctable", longtable = TRUE) %>%
  kable_styling() %>%
  row_spec(0, bold = T, color = "white", background = "#6ebed8") %>% 
  kable_paper(html_font = "helvetica") %>%
  scroll_box(width = "100%", height = "300px")
  
}
oag_compute_df %>% tab_output()
AgeStart AgeEnd AgeLabel AgeSpan DataSourceYear DataValue
10 15 10-14 5 NA 4
15 20 15-19 5 2017 1124
20 25 20-24 5 2017 14048
25 30 25-29 5 2017 36525
30 35 30-34 5 2017 38273
35 40 35-39 5 2017 20046
40 45 40-44 5 2017 4544
45 50 45-49 5 2017 278
50 0 50+ -1 2017 27
0 -1 Total -1 2017 114870
-2 -2 Unknown -2 2017 1

From the data, the open age needed to close this series is present (50+ with a data value of 27), so we will drop it for now and show how it can be re-calculated.

new_df <- oag_compute_df %>% 
  filter(!AgeLabel %in% grep("\\+", AgeLabel, value = TRUE, ignore.case = TRUE))

new_df %>% tab_output()
AgeStart AgeEnd AgeLabel AgeSpan DataSourceYear DataValue
10 15 10-14 5 NA 4
15 20 15-19 5 2017 1124
20 25 20-24 5 2017 14048
25 30 25-29 5 2017 36525
30 35 30-34 5 2017 38273
35 40 35-39 5 2017 20046
40 45 40-44 5 2017 4544
45 50 45-49 5 2017 278
0 -1 Total -1 2017 114870
-2 -2 Unknown -2 2017 1

We start by defining the standard age groups using the std_age_function()2 function.

std_ages <- std_age_function()

std_ages %>% tab_output()
AgeStart AgeEnd AgeLabel AgeSpan abridged complete AgeSort
0 1 0 1 TRUE TRUE 1
1 5 1-4 4 TRUE FALSE 2
0 5 0-4 5 TRUE FALSE 3
5 10 5-9 5 TRUE FALSE 4
10 15 10-14 5 TRUE FALSE 5
15 20 15-19 5 TRUE FALSE 6
20 25 20-24 5 TRUE FALSE 7
25 30 25-29 5 TRUE FALSE 8
30 35 30-34 5 TRUE FALSE 9
35 40 35-39 5 TRUE FALSE 10
40 45 40-44 5 TRUE FALSE 11
45 50 45-49 5 TRUE FALSE 12
50 55 50-54 5 TRUE FALSE 13
55 60 55-59 5 TRUE FALSE 14
60 65 60-64 5 TRUE FALSE 15
65 70 65-69 5 TRUE FALSE 16
70 75 70-74 5 TRUE FALSE 17
75 80 75-79 5 TRUE FALSE 18
80 85 80-84 5 TRUE FALSE 19
85 90 85-89 5 TRUE FALSE 20
90 95 90-94 5 TRUE FALSE 21
95 100 95-99 5 TRUE FALSE 22
100 105 100-104 5 TRUE FALSE 23
105 110 105-109 5 TRUE FALSE 24
110 115 110-114 5 TRUE FALSE 25
115 120 115-119 5 TRUE FALSE 26
120 125 120-124 5 TRUE FALSE 27
125 130 125-129 5 TRUE FALSE 28
1 2 1 1 FALSE TRUE 29
2 3 2 1 FALSE TRUE 30
3 4 3 1 FALSE TRUE 31
4 5 4 1 FALSE TRUE 32
5 6 5 1 FALSE TRUE 33
6 7 6 1 FALSE TRUE 34
7 8 7 1 FALSE TRUE 35
8 9 8 1 FALSE TRUE 36
9 10 9 1 FALSE TRUE 37
10 11 10 1 FALSE TRUE 38
11 12 11 1 FALSE TRUE 39
12 13 12 1 FALSE TRUE 40
13 14 13 1 FALSE TRUE 41
14 15 14 1 FALSE TRUE 42
15 16 15 1 FALSE TRUE 43
16 17 16 1 FALSE TRUE 44
17 18 17 1 FALSE TRUE 45
18 19 18 1 FALSE TRUE 46
19 20 19 1 FALSE TRUE 47
20 21 20 1 FALSE TRUE 48
21 22 21 1 FALSE TRUE 49
22 23 22 1 FALSE TRUE 50
23 24 23 1 FALSE TRUE 51
24 25 24 1 FALSE TRUE 52
25 26 25 1 FALSE TRUE 53
26 27 26 1 FALSE TRUE 54
27 28 27 1 FALSE TRUE 55
28 29 28 1 FALSE TRUE 56
29 30 29 1 FALSE TRUE 57
30 31 30 1 FALSE TRUE 58
31 32 31 1 FALSE TRUE 59
32 33 32 1 FALSE TRUE 60
33 34 33 1 FALSE TRUE 61
34 35 34 1 FALSE TRUE 62
35 36 35 1 FALSE TRUE 63
36 37 36 1 FALSE TRUE 64
37 38 37 1 FALSE TRUE 65
38 39 38 1 FALSE TRUE 66
39 40 39 1 FALSE TRUE 67
40 41 40 1 FALSE TRUE 68
41 42 41 1 FALSE TRUE 69
42 43 42 1 FALSE TRUE 70
43 44 43 1 FALSE TRUE 71
44 45 44 1 FALSE TRUE 72
45 46 45 1 FALSE TRUE 73
46 47 46 1 FALSE TRUE 74
47 48 47 1 FALSE TRUE 75
48 49 48 1 FALSE TRUE 76
49 50 49 1 FALSE TRUE 77
50 51 50 1 FALSE TRUE 78
51 52 51 1 FALSE TRUE 79
52 53 52 1 FALSE TRUE 80
53 54 53 1 FALSE TRUE 81
54 55 54 1 FALSE TRUE 82
55 56 55 1 FALSE TRUE 83
56 57 56 1 FALSE TRUE 84
57 58 57 1 FALSE TRUE 85
58 59 58 1 FALSE TRUE 86
59 60 59 1 FALSE TRUE 87
60 61 60 1 FALSE TRUE 88
61 62 61 1 FALSE TRUE 89
62 63 62 1 FALSE TRUE 90
63 64 63 1 FALSE TRUE 91
64 65 64 1 FALSE TRUE 92
65 66 65 1 FALSE TRUE 93
66 67 66 1 FALSE TRUE 94
67 68 67 1 FALSE TRUE 95
68 69 68 1 FALSE TRUE 96
69 70 69 1 FALSE TRUE 97
70 71 70 1 FALSE TRUE 98
71 72 71 1 FALSE TRUE 99
72 73 72 1 FALSE TRUE 100
73 74 73 1 FALSE TRUE 101
74 75 74 1 FALSE TRUE 102
75 76 75 1 FALSE TRUE 103
76 77 76 1 FALSE TRUE 104
77 78 77 1 FALSE TRUE 105
78 79 78 1 FALSE TRUE 106
79 80 79 1 FALSE TRUE 107
80 81 80 1 FALSE TRUE 108
81 82 81 1 FALSE TRUE 109
82 83 82 1 FALSE TRUE 110
83 84 83 1 FALSE TRUE 111
84 85 84 1 FALSE TRUE 112
85 86 85 1 FALSE TRUE 113
86 87 86 1 FALSE TRUE 114
87 88 87 1 FALSE TRUE 115
88 89 88 1 FALSE TRUE 116
89 90 89 1 FALSE TRUE 117
90 91 90 1 FALSE TRUE 118
91 92 91 1 FALSE TRUE 119
92 93 92 1 FALSE TRUE 120
93 94 93 1 FALSE TRUE 121
94 95 94 1 FALSE TRUE 122
95 96 95 1 FALSE TRUE 123
96 97 96 1 FALSE TRUE 124
97 98 97 1 FALSE TRUE 125
98 99 98 1 FALSE TRUE 126
99 100 99 1 FALSE TRUE 127
100 101 100 1 FALSE TRUE 128
101 102 101 1 FALSE TRUE 129
102 103 102 1 FALSE TRUE 130
103 104 103 1 FALSE TRUE 131
104 105 104 1 FALSE TRUE 132
105 106 105 1 FALSE TRUE 133
106 107 106 1 FALSE TRUE 134
107 108 107 1 FALSE TRUE 135
108 109 108 1 FALSE TRUE 136
109 110 109 1 FALSE TRUE 137
110 111 110 1 FALSE TRUE 138
111 112 111 1 FALSE TRUE 139
112 113 112 1 FALSE TRUE 140
113 114 113 1 FALSE TRUE 141
114 115 114 1 FALSE TRUE 142
115 116 115 1 FALSE TRUE 143
116 117 116 1 FALSE TRUE 144
117 118 117 1 FALSE TRUE 145
118 119 118 1 FALSE TRUE 146
119 120 119 1 FALSE TRUE 147
120 121 120 1 FALSE TRUE 148
121 122 121 1 FALSE TRUE 149
122 123 122 1 FALSE TRUE 150
123 124 123 1 FALSE TRUE 151
124 125 124 1 FALSE TRUE 152
125 126 125 1 FALSE TRUE 153
126 127 126 1 FALSE TRUE 154
127 128 127 1 FALSE TRUE 155
128 129 128 1 FALSE TRUE 156
129 130 129 1 FALSE TRUE 157
5 0 5+ -1 TRUE TRUE 158
10 0 10+ -1 TRUE TRUE 159
15 0 15+ -1 TRUE TRUE 160
20 0 20+ -1 TRUE TRUE 161
25 0 25+ -1 TRUE TRUE 162
30 0 30+ -1 TRUE TRUE 163
35 0 35+ -1 TRUE TRUE 164
40 0 40+ -1 TRUE TRUE 165
45 0 45+ -1 TRUE TRUE 166
50 0 50+ -1 TRUE TRUE 167
55 0 55+ -1 TRUE TRUE 168
60 0 60+ -1 TRUE TRUE 169
65 0 65+ -1 TRUE TRUE 170
70 0 70+ -1 TRUE TRUE 171
75 0 75+ -1 TRUE TRUE 172
80 0 80+ -1 TRUE TRUE 173
85 0 85+ -1 TRUE TRUE 174
90 0 90+ -1 TRUE TRUE 175
95 0 95+ -1 TRUE TRUE 176
100 0 100+ -1 TRUE TRUE 177
105 0 105+ -1 TRUE TRUE 178
110 0 110+ -1 TRUE TRUE 179
115 0 115+ -1 TRUE TRUE 180
120 0 120+ -1 TRUE TRUE 181
125 0 125+ -1 TRUE TRUE 182
130 0 130+ -1 TRUE TRUE 183
0 -1 Total -1 TRUE TRUE 184
-2 -2 Unknown -2 TRUE TRUE 185

Since the data we are dealing with is an abridged series, we subset the standard age groups to only contain standard abridged age groups, and drop the variables that specify whether the records are abridged or complete.

age_std <- std_ages %>%
             filter(abridged == TRUE) %>%
             select(-abridged, -complete)

age_std %>% tab_output()
AgeStart AgeEnd AgeLabel AgeSpan AgeSort
0 1 0 1 1
1 5 1-4 4 2
0 5 0-4 5 3
5 10 5-9 5 4
10 15 10-14 5 5
15 20 15-19 5 6
20 25 20-24 5 7
25 30 25-29 5 8
30 35 30-34 5 9
35 40 35-39 5 10
40 45 40-44 5 11
45 50 45-49 5 12
50 55 50-54 5 13
55 60 55-59 5 14
60 65 60-64 5 15
65 70 65-69 5 16
70 75 70-74 5 17
75 80 75-79 5 18
80 85 80-84 5 19
85 90 85-89 5 20
90 95 90-94 5 21
95 100 95-99 5 22
100 105 100-104 5 23
105 110 105-109 5 24
110 115 110-114 5 25
115 120 115-119 5 26
120 125 120-124 5 27
125 130 125-129 5 28
5 0 5+ -1 158
10 0 10+ -1 159
15 0 15+ -1 160
20 0 20+ -1 161
25 0 25+ -1 162
30 0 30+ -1 163
35 0 35+ -1 164
40 0 40+ -1 165
45 0 45+ -1 166
50 0 50+ -1 167
55 0 55+ -1 168
60 0 60+ -1 169
65 0 65+ -1 170
70 0 70+ -1 171
75 0 75+ -1 172
80 0 80+ -1 173
85 0 85+ -1 174
90 0 90+ -1 175
95 0 95+ -1 176
100 0 100+ -1 177
105 0 105+ -1 178
110 0 110+ -1 179
115 0 115+ -1 180
120 0 120+ -1 181
125 0 125+ -1 182
130 0 130+ -1 183
0 -1 Total -1 184
-2 -2 Unknown -2 185

We then subset the data in question to remove any missing data values

age_span = 5
df <- new_df %>%
    dplyr::filter(!is.na(DataValue) & AgeSpan %in% c(age_span, -1, -2))

df %>% tab_output()
AgeStart AgeEnd AgeLabel AgeSpan DataSourceYear DataValue
10 15 10-14 5 NA 4
15 20 15-19 5 2017 1124
20 25 20-24 5 2017 14048
25 30 25-29 5 2017 36525
30 35 30-34 5 2017 38273
35 40 35-39 5 2017 20046
40 45 40-44 5 2017 4544
45 50 45-49 5 2017 278
0 -1 Total -1 2017 114870
-2 -2 Unknown -2 2017 1

We later identify the start age of the open age group needed to close the series using the dd_oag_agestart()3 function

oag_start <- df %>% dd_oag_agestart
oag_start
#> [1] 50

From there, we list the possible standard open age groups that can be computed from the standard age groups.

  if (!is_empty(oag_start)) {
    age_std_open <- age_std %>%
      dplyr::filter(AgeSpan < 0 & AgeStart <= oag_start & !(AgeStart %in% c(-2,0)))
  } else {
    age_std_open <- NULL
  }
age_std_open %>% tab_output()
AgeStart AgeEnd AgeLabel AgeSpan AgeSort
5 0 5+ -1 158
10 0 10+ -1 159
15 0 15+ -1 160
20 0 20+ -1 161
25 0 25+ -1 162
30 0 30+ -1 163
35 0 35+ -1 164
40 0 40+ -1 165
45 0 45+ -1 166
50 0 50+ -1 167

We proceed to look for an open age group record on input file that closes out the series. If an open age group already exists, the number of records for this output should be greater than 0 but since we already dropped the age group needed to close the series at the beginning for demonstration purposes, the result of this will be NULL.

oag_indata <- df %>%
      dplyr::filter(AgeLabel == paste0(oag_start,"+")) %>%
      select(AgeStart, AgeEnd, AgeSpan, AgeLabel, DataValue)
nrow(oag_indata)
#> [1] 0

## check if an open age group already exists
oag_check <- nrow(oag_indata) > 0
oag_check
#> [1] FALSE

We then check if there is a Total age label present in the data, and whether it is valid i.e is it an actual total or is it different from the sum of the data values of the age labels present in the data?

# check if there is a "Total" age label in the data
total_check <- "Total" %in% df$AgeLabel

# and whether it is valid
if (total_check) {
 total_value <- df$DataValue[df$AgeLabel=="Total"]
 total_value_valid <- total_value >= suppressWarnings(sum(df$DataValue[df$AgeSpan == age_span]))
} else 
{ total_value_valid <- FALSE 
}
total_value_valid
#> [1] TRUE

We also check if there is an unknown age label that exists in the data, and if it does, we extract its value, if it does not, we set the value to 0

unknown_check <- "Unknown" %in% df$AgeLabel
unknown_check
#> [1] TRUE
unknown_value <- ifelse(unknown_check, df$DataValue[df$AgeLabel=="Unknown"], 0)
unknown_value
#> [1] 1

In this case, an unknown exists and its data value is 1.

If an open age group record already exists, we use this to compute other open age groups.

    if (oag_check) {

      data.out <- NULL
      for (i in 1:nrow(age_std_open)) {
        df.out <- df %>%
          dplyr::filter((AgeStart >= age_std_open$AgeStart[i] & AgeSpan > 0) | AgeLabel==paste0(oag_start,"+")) %>%
          select(AgeStart,AgeEnd,AgeSpan,AgeLabel,DataValue) %>%
          summarise(DataValue = sum(DataValue)) %>%
          mutate(AgeStart = age_std_open$AgeStart[i],
                 AgeEnd   = age_std_open$AgeEnd[i],
                 AgeSpan  = age_std_open$AgeSpan[i],
                 AgeLabel = age_std_open$AgeLabel[i],
                 AgeSort  = age_std_open$AgeSort[i])
        data.out <- rbind(data.out,df.out)
      }

    }

But in our case, we know that the open age does not exist in the series since we already dropped it at the beginning, but there is a total, so we use this to compute the possible oag values.

    # if the needed open age group is not on the series, but there is a total, then use this to compute oag values
    if (!(oag_check) & total_check & total_value_valid) {

      data.out <- NULL
      for (i in 1:nrow(age_std_open)) {
        df.out <- df %>%
          dplyr::filter(AgeStart < age_std_open$AgeStart[i] & AgeSpan > 0) %>%
          select(AgeStart,AgeEnd,AgeSpan,AgeLabel,DataValue) %>%
          summarise(DataValue = total_value - unknown_value - sum(DataValue)) %>%
          mutate(AgeStart = age_std_open$AgeStart[i],
                 AgeEnd   = age_std_open$AgeEnd[i],
                 AgeSpan  = age_std_open$AgeSpan[i],
                 AgeLabel = age_std_open$AgeLabel[i],
                 AgeSort  = age_std_open$AgeSort[i])
        data.out <- rbind(data.out,df.out)
      }
    }

data.out %>% 
  relocate(DataValue , .after = last_col()) %>% 
  tab_output()
AgeStart AgeEnd AgeSpan AgeLabel AgeSort DataValue
5 0 -1 5+ 158 114869
10 0 -1 10+ 159 114869
15 0 -1 15+ 160 114865
20 0 -1 20+ 161 113741
25 0 -1 25+ 162 99693
30 0 -1 30+ 163 63168
35 0 -1 35+ 164 24895
40 0 -1 40+ 165 4849
45 0 -1 45+ 166 305
50 0 -1 50+ 167 27

As shown in DDharmonize_Vitals5 4, the result of this function is integrated back into the data to end up with only one open age group for the series. This is done by picking the AgeLabel in the output above whose AgeStart corresponds to the open age group needed to close the series, in this case 50.

fin_df <- df %>%
        bind_rows(data.out[!(data.out$AgeLabel %in% df$AgeLabel) &
                              data.out$AgeStart == oag_start,]) %>%
       arrange(AgeSort)

fin_df %>% arrange(AgeLabel) %>%  tab_output()
AgeStart AgeEnd AgeLabel AgeSpan DataSourceYear DataValue AgeSort
10 15 10-14 5 NA 4 NA
15 20 15-19 5 2017 1124 NA
20 25 20-24 5 2017 14048 NA
25 30 25-29 5 2017 36525 NA
30 35 30-34 5 2017 38273 NA
35 40 35-39 5 2017 20046 NA
40 45 40-44 5 2017 4544 NA
45 50 45-49 5 2017 278 NA
50 0 50+ -1 NA 27 167
0 -1 Total -1 2017 114870 NA
-2 -2 Unknown -2 2017 1 NA

From the data above, the open age group needed to close the series is “50+” with a data value of 27, similar to the record that we dropped in the beginning.

NB: if neither an open age group nor a Total value exists, then we don’t compute open age group values.