The purpose of this function is to reconcile Totals, in cases where reported values are different from calculated values, after having standardized age series. If the computed total is greater than reported total, the reported total is replaced with the computed total. If computed is less than reported, the difference is added to “Unknown” age

This function is defined as follows:

dd_validate_totals_over_age(data) 1

To show how it works, we shall use the vitals_std_full_sample dataset that is embedded on this package. This dataset represents a scenario where we have both abridged and complete series for the same unique id.

## Load the packages required
library(rddharmony)
library(kableExtra)
library(dplyr)
library(purrr)

## Create a function to be used to generate the table output
tab_output <- function(tab) {
  kable(tab, booktabs = TRUE, align = "c", table.envir = "capctable", longtable = TRUE, row.names = FALSE) %>%
    kable_styling() %>%
    row_spec(0, bold = T, color = "white", background = "#6ebed8") %>%
    kable_paper(html_font = "helvetica") %>%
    scroll_box(width = "100%", height = "300px")
}
data <- vitals_std_full_sample %>% 
       select(starts_with("Age"), SexID, DataValue, abridged, complete)
data %>% tab_output()
AgeStart AgeEnd AgeLabel AgeSpan AgeSort SexID DataValue abridged complete
0 1 0 1 1 3 0.0 TRUE FALSE
1 5 1-4 4 2 3 0.0 TRUE FALSE
0 5 0-4 5 3 3 0.0 TRUE FALSE
5 10 5-9 5 4 3 0.0 TRUE FALSE
10 15 10-14 5 5 3 4.0 TRUE FALSE
15 20 15-19 5 6 3 1124.0 TRUE FALSE
20 25 20-24 5 7 3 14048.0 TRUE FALSE
25 30 25-29 5 8 3 36525.0 TRUE FALSE
30 35 30-34 5 9 3 38273.0 TRUE FALSE
35 40 35-39 5 10 3 20046.0 TRUE FALSE
40 45 40-44 5 11 3 4544.0 TRUE FALSE
45 50 45-49 5 12 3 278.0 TRUE FALSE
50 0 50+ -1 167 3 27.0 TRUE FALSE
0 -1 Total -1 184 3 114870.0 TRUE FALSE
-2 -2 Unknown -2 185 3 1.0 TRUE FALSE
0 1 0 1 1 3 0.0 FALSE TRUE
1 2 1 1 29 3 0.0 FALSE TRUE
2 3 2 1 30 3 0.0 FALSE TRUE
3 4 3 1 31 3 0.0 FALSE TRUE
4 5 4 1 32 3 0.0 FALSE TRUE
5 6 5 1 33 3 0.0 FALSE TRUE
6 7 6 1 34 3 0.0 FALSE TRUE
7 8 7 1 35 3 0.0 FALSE TRUE
8 9 8 1 36 3 0.0 FALSE TRUE
9 10 9 1 37 3 0.0 FALSE TRUE
10 11 10 1 38 3 0.0 FALSE TRUE
11 12 11 1 39 3 0.0 FALSE TRUE
12 13 12 1 40 3 0.2 FALSE TRUE
13 14 13 1 41 3 1.0 FALSE TRUE
14 15 14 1 42 3 2.8 FALSE TRUE
15 16 15 1 43 3 14.0 FALSE TRUE
16 17 16 1 44 3 55.0 FALSE TRUE
17 18 17 1 45 3 149.0 FALSE TRUE
18 19 18 1 46 3 287.0 FALSE TRUE
19 20 19 1 47 3 619.0 FALSE TRUE
20 21 20 1 48 3 1174.0 FALSE TRUE
21 22 21 1 49 3 1759.0 FALSE TRUE
22 23 22 1 50 3 2717.0 FALSE TRUE
23 24 23 1 51 3 3615.0 FALSE TRUE
24 25 24 1 52 3 4783.0 FALSE TRUE
25 26 25 1 53 3 6069.0 FALSE TRUE
26 27 26 1 54 3 6803.0 FALSE TRUE
27 28 27 1 55 3 7360.0 FALSE TRUE
28 29 28 1 56 3 8029.0 FALSE TRUE
29 30 29 1 57 3 8264.0 FALSE TRUE
30 31 30 1 58 3 8482.0 FALSE TRUE
31 32 31 1 59 3 8149.0 FALSE TRUE
32 33 32 1 60 3 7756.0 FALSE TRUE
33 34 33 1 61 3 7116.0 FALSE TRUE
34 35 34 1 62 3 6770.0 FALSE TRUE
35 36 35 1 63 3 5919.0 FALSE TRUE
36 37 36 1 64 3 4805.0 FALSE TRUE
37 38 37 1 65 3 3771.0 FALSE TRUE
38 39 38 1 66 3 3187.0 FALSE TRUE
39 40 39 1 67 3 2364.0 FALSE TRUE
40 41 40 1 68 3 1853.0 FALSE TRUE
41 42 41 1 69 3 1220.0 FALSE TRUE
42 43 42 1 70 3 802.0 FALSE TRUE
43 44 43 1 71 3 432.0 FALSE TRUE
44 45 44 1 72 3 237.0 FALSE TRUE
45 46 45 1 73 3 129.0 FALSE TRUE
46 47 46 1 74 3 75.0 FALSE TRUE
47 48 47 1 75 3 35.0 FALSE TRUE
48 49 48 1 76 3 19.0 FALSE TRUE
49 50 49 1 77 3 20.0 FALSE TRUE
50 0 50+ -1 167 3 27.0 FALSE TRUE
0 -1 Total -1 184 3 114870.0 FALSE TRUE
-2 -2 Unknown -2 185 3 1.0 FALSE TRUE

This process begins by subsetting the data to have abridged records for each particular sex id. The reported totals are extracted and actual totals calculated. If the reported and actual totals exist and if the computed total is greater than reported total, then the reported total is replaced with the computed. If the computed total is less than the reported, then the difference is added to the “Unknown” age label.

sex <- 3 

    ## subset the data to have abridged records for each particular sex id
    df_abr <- data %>%
      dplyr::filter(SexID == sex & abridged == TRUE)

    ## extract the reported totals and the actual totals
    total_reported_abr <- df_abr$DataValue[df_abr$AgeLabel == "Total"]
    total_computed_abr <- sum(df_abr$DataValue[df_abr$AgeSpan %in% c(-1,-2, 5) & df_abr$AgeLabel != "Total"])

    ## if the reported and actual totals exist...
    if (!is_empty(total_reported_abr) & !is_empty(total_computed_abr)) {

      # if computed is greater than reported, then replace reported with computed
      diff <- total_computed_abr - total_reported_abr
      if (diff > 0 ) {
        df_abr$DataValue[df_abr$AgeLabel == "Total"] <- total_computed_abr
      }
      # if computed is less than reported, then add difference to "Unknown" age
      if (diff < 0 ) {
        df_abr$DataValue[df_abr$AgeLabel == "Unknown"] <- df_abr$DataValue[df_abr$AgeLabel == "Unknown"] +
          (total_reported_abr-total_computed_abr)
      }

    }

The same process is repeated for complete records

    ## subset the data to have complete records for each particular sex id
    df_cpl <- data %>%
      dplyr::filter(SexID == sex & complete == TRUE)

    total_reported_cpl <- df_cpl$DataValue[df_cpl$AgeLabel == "Total"]
    total_computed_cpl <- sum(df_cpl$DataValue[df_cpl$AgeLabel != "Total"])

    if (!is_empty(total_reported_cpl) & !is_empty(total_computed_cpl)) {

      # if computed is greater than reported, then replace reported with computed
      diff <- total_computed_cpl - total_reported_cpl
      if (diff > 0 ) {
        df_cpl$DataValue[df_cpl$AgeLabel == "Total"] <- total_computed_cpl
      }
      # if computed is less than reported, then add difference to "Unknown" age
      if (diff < 0 ) {
        df_cpl$DataValue[df_cpl$AgeLabel == "Unknown"] <- df_cpl$DataValue[df_cpl$AgeLabel == "Unknown"] +
          (total_reported_cpl-total_computed_cpl)
      }

    }

Eventually, both datasets are appended to form the final output

out.data <- NULL
out.data <- rbind(out.data, df_abr, df_cpl)
out.data %>% tab_output()
AgeStart AgeEnd AgeLabel AgeSpan AgeSort SexID DataValue abridged complete
0 1 0 1 1 3 0.0 TRUE FALSE
1 5 1-4 4 2 3 0.0 TRUE FALSE
0 5 0-4 5 3 3 0.0 TRUE FALSE
5 10 5-9 5 4 3 0.0 TRUE FALSE
10 15 10-14 5 5 3 4.0 TRUE FALSE
15 20 15-19 5 6 3 1124.0 TRUE FALSE
20 25 20-24 5 7 3 14048.0 TRUE FALSE
25 30 25-29 5 8 3 36525.0 TRUE FALSE
30 35 30-34 5 9 3 38273.0 TRUE FALSE
35 40 35-39 5 10 3 20046.0 TRUE FALSE
40 45 40-44 5 11 3 4544.0 TRUE FALSE
45 50 45-49 5 12 3 278.0 TRUE FALSE
50 0 50+ -1 167 3 27.0 TRUE FALSE
0 -1 Total -1 184 3 114870.0 TRUE FALSE
-2 -2 Unknown -2 185 3 1.0 TRUE FALSE
0 1 0 1 1 3 0.0 FALSE TRUE
1 2 1 1 29 3 0.0 FALSE TRUE
2 3 2 1 30 3 0.0 FALSE TRUE
3 4 3 1 31 3 0.0 FALSE TRUE
4 5 4 1 32 3 0.0 FALSE TRUE
5 6 5 1 33 3 0.0 FALSE TRUE
6 7 6 1 34 3 0.0 FALSE TRUE
7 8 7 1 35 3 0.0 FALSE TRUE
8 9 8 1 36 3 0.0 FALSE TRUE
9 10 9 1 37 3 0.0 FALSE TRUE
10 11 10 1 38 3 0.0 FALSE TRUE
11 12 11 1 39 3 0.0 FALSE TRUE
12 13 12 1 40 3 0.2 FALSE TRUE
13 14 13 1 41 3 1.0 FALSE TRUE
14 15 14 1 42 3 2.8 FALSE TRUE
15 16 15 1 43 3 14.0 FALSE TRUE
16 17 16 1 44 3 55.0 FALSE TRUE
17 18 17 1 45 3 149.0 FALSE TRUE
18 19 18 1 46 3 287.0 FALSE TRUE
19 20 19 1 47 3 619.0 FALSE TRUE
20 21 20 1 48 3 1174.0 FALSE TRUE
21 22 21 1 49 3 1759.0 FALSE TRUE
22 23 22 1 50 3 2717.0 FALSE TRUE
23 24 23 1 51 3 3615.0 FALSE TRUE
24 25 24 1 52 3 4783.0 FALSE TRUE
25 26 25 1 53 3 6069.0 FALSE TRUE
26 27 26 1 54 3 6803.0 FALSE TRUE
27 28 27 1 55 3 7360.0 FALSE TRUE
28 29 28 1 56 3 8029.0 FALSE TRUE
29 30 29 1 57 3 8264.0 FALSE TRUE
30 31 30 1 58 3 8482.0 FALSE TRUE
31 32 31 1 59 3 8149.0 FALSE TRUE
32 33 32 1 60 3 7756.0 FALSE TRUE
33 34 33 1 61 3 7116.0 FALSE TRUE
34 35 34 1 62 3 6770.0 FALSE TRUE
35 36 35 1 63 3 5919.0 FALSE TRUE
36 37 36 1 64 3 4805.0 FALSE TRUE
37 38 37 1 65 3 3771.0 FALSE TRUE
38 39 38 1 66 3 3187.0 FALSE TRUE
39 40 39 1 67 3 2364.0 FALSE TRUE
40 41 40 1 68 3 1853.0 FALSE TRUE
41 42 41 1 69 3 1220.0 FALSE TRUE
42 43 42 1 70 3 802.0 FALSE TRUE
43 44 43 1 71 3 432.0 FALSE TRUE
44 45 44 1 72 3 237.0 FALSE TRUE
45 46 45 1 73 3 129.0 FALSE TRUE
46 47 46 1 74 3 75.0 FALSE TRUE
47 48 47 1 75 3 35.0 FALSE TRUE
48 49 48 1 76 3 19.0 FALSE TRUE
49 50 49 1 77 3 20.0 FALSE TRUE
50 0 50+ -1 167 3 27.0 FALSE TRUE
0 -1 Total -1 184 3 114870.0 FALSE TRUE
-2 -2 Unknown -2 185 3 1.0 FALSE TRUE