To illustrate the trend fitting functionality of i2extras we will use the simulated Ebola Virus Disease (EVD) outbreak data from the outbreaks package.
library(outbreaks)
library(incidence2)
library(i2extras)
raw_dat <- ebola_sim_clean$linelistFor this example we will restrict ourselves to the first 20 weeks of data:
dat <- incidence(
    raw_dat, 
    date_index = date_of_onset,
    interval = "week"
)[1:20, ]
dat
#> An incidence object: 20 x 2
#> date range: [2014-W15] to [2014-W34]
#> cases: 783
#> interval: 1 (Monday) week 
#> 
#>    date_index count
#>        <yrwk> <int>
#>  1   2014-W15     1
#>  2   2014-W16     1
#>  3   2014-W17     5
#>  4   2014-W18     4
#>  5   2014-W19    12
#>  6   2014-W20    17
#>  7   2014-W21    15
#>  8   2014-W22    19
#>  9   2014-W23    23
#> 10   2014-W24    21
#> 11   2014-W25    30
#> 12   2014-W26    22
#> 13   2014-W27    34
#> 14   2014-W28    38
#> 15   2014-W29    61
#> 16   2014-W30    59
#> 17   2014-W31    80
#> 18   2014-W32    86
#> 19   2014-W33   116
#> 20   2014-W34   139
plot(dat)We can use fit_curve() to fit the data with either a poisson or negative binomial regression model. The output from this will be a nested object with class incidence2_fit which has methods available for both automatic plotting and the calculation of growth (decay) rates and doubling (halving) times.
out <- fit_curve(dat, model = "poisson", alpha = 0.05)
out
#> # A tibble: 1 x 8
#>   count_variable           data model  estimates   fitting_warning fitting_error
#>   <chr>          <list<tibble[> <list> <list>      <list>          <list>       
#> 1 count                [20 × 2] <glm>  <df [20 × … <NULL>          <NULL>       
#> # … with 2 more variables: prediction_warning <list>, prediction_error <list>
plot(out)growth_rate(out)
#> # A tibble: 1 x 9
#>   count_variable model      r r_lower r_upper growth_or_decay  time time_lower
#>   <chr>          <list> <dbl>   <dbl>   <dbl> <chr>           <dbl>      <dbl>
#> 1 count          <glm>  0.184   0.168   0.200 doubling         3.77       3.46
#> # … with 1 more variable: time_upper <dbl>To unnest the data we can use unnest() (a function that has been reexported from the tidyr package.
unnest(out, estimates)
#> # A tibble: 20 x 14
#>    count_variable         data model count date_index estimate lower_ci upper_ci
#>    <chr>          <list<tibbl> <lis> <int>     <yrwk>    <dbl>    <dbl>    <dbl>
#>  1 count              [20 × 2] <glm>     1   2014-W15     4.09     3.20     5.23
#>  2 count              [20 × 2] <glm>     1   2014-W16     4.92     3.91     6.19
#>  3 count              [20 × 2] <glm>     5   2014-W17     5.92     4.77     7.33
#>  4 count              [20 × 2] <glm>     4   2014-W18     7.11     5.82     8.68
#>  5 count              [20 × 2] <glm>    12   2014-W19     8.55     7.11    10.3 
#>  6 count              [20 × 2] <glm>    17   2014-W20    10.3      8.67    12.2 
#>  7 count              [20 × 2] <glm>    15   2014-W21    12.3     10.6     14.4 
#>  8 count              [20 × 2] <glm>    19   2014-W22    14.8     12.9     17.1 
#>  9 count              [20 × 2] <glm>    23   2014-W23    17.8     15.7     20.3 
#> 10 count              [20 × 2] <glm>    21   2014-W24    21.4     19.1     24.0 
#> 11 count              [20 × 2] <glm>    30   2014-W25    25.8     23.3     28.5 
#> 12 count              [20 × 2] <glm>    22   2014-W26    31.0     28.3     33.9 
#> 13 count              [20 × 2] <glm>    34   2014-W27    37.2     34.3     40.4 
#> 14 count              [20 × 2] <glm>    38   2014-W28    44.8     41.5     48.2 
#> 15 count              [20 × 2] <glm>    61   2014-W29    53.8     50.1     57.7 
#> 16 count              [20 × 2] <glm>    59   2014-W30    64.7     60.3     69.4 
#> 17 count              [20 × 2] <glm>    80   2014-W31    77.7     72.2     83.7 
#> 18 count              [20 × 2] <glm>    86   2014-W32    93.4     86.2    101.  
#> 19 count              [20 × 2] <glm>   116   2014-W33   112.     103.     123.  
#> 20 count              [20 × 2] <glm>   139   2014-W34   135.     122.     149.  
#> # … with 6 more variables: lower_pi <int>, upper_pi <int>,
#> #   fitting_warning <list>, fitting_error <list>, prediction_warning <list>,
#> #   prediction_error <list>fit_curve() also works nicely with grouped incidence2 objects. In this situation, we return a nested tibble with some additional columns that indicate whether there has been a warning or error during the fitting or prediction stages.
grouped_dat <- incidence(
    raw_dat, 
    date_index = date_of_onset,
    interval = "week",
    groups = hospital
)[1:120, ]
grouped_dat
#> An incidence object: 120 x 3
#> date range: [2014-W15] to [2014-W38]
#> cases: 1621
#> interval: 1 (Monday) week 
#> 
#>    date_index hospital                                     count
#>        <yrwk> <fct>                                        <int>
#>  1   2014-W15 Military Hospital                                1
#>  2   2014-W16 Connaught Hospital                               1
#>  3   2014-W17 <NA>                                             2
#>  4   2014-W17 other                                            3
#>  5   2014-W18 <NA>                                             1
#>  6   2014-W18 Connaught Hospital                               1
#>  7   2014-W18 Princess Christian Maternity Hospital (PCMH)     1
#>  8   2014-W18 Rokupa Hospital                                  1
#>  9   2014-W19 <NA>                                             1
#> 10   2014-W19 Connaught Hospital                               3
#> # … with 110 more rows
out <- fit_curve(grouped_dat, model = "poisson", alpha = 0.05)
out
#> # A tibble: 6 x 9
#>   count_variable hospital     data model estimates fitting_warning fitting_error
#>   <chr>          <fct>    <list<t> <lis> <list>    <list>          <list>       
#> 1 count          Connaug… [22 × 2] <glm> <df [22 … <NULL>          <NULL>       
#> 2 count          Militar… [21 × 2] <glm> <df [21 … <NULL>          <NULL>       
#> 3 count          other    [20 × 2] <glm> <df [20 … <NULL>          <NULL>       
#> 4 count          Princes… [17 × 2] <glm> <df [17 … <NULL>          <NULL>       
#> 5 count          Rokupa … [18 × 2] <glm> <df [18 … <NULL>          <NULL>       
#> 6 count          <NA>     [22 × 2] <glm> <df [22 … <NULL>          <NULL>       
#> # … with 2 more variables: prediction_warning <list>, prediction_error <list>
# plot with a prediction interval but not a confidence interval
plot(out, ci = FALSE, pi=TRUE)growth_rate(out)
#> # A tibble: 6 x 10
#>   count_variable hospital      model     r r_lower r_upper growth_or_decay  time
#>   <chr>          <fct>         <lis> <dbl>   <dbl>   <dbl> <chr>           <dbl>
#> 1 count          Connaught Ho… <glm> 0.197   0.177   0.217 doubling         3.53
#> 2 count          Military Hos… <glm> 0.173   0.147   0.200 doubling         4.00
#> 3 count          other         <glm> 0.170   0.141   0.200 doubling         4.09
#> 4 count          Princess Chr… <glm> 0.142   0.101   0.188 doubling         4.87
#> 5 count          Rokupa Hospi… <glm> 0.178   0.133   0.228 doubling         3.89
#> 6 count          <NA>          <glm> 0.184   0.164   0.205 doubling         3.77
#> # … with 2 more variables: time_lower <dbl>, time_upper <dbl>We provide helper functions, is_ok(), is_warning() and is_error() to help filter the output as necessary.
out <- fit_curve(grouped_dat, model = "negbin", alpha = 0.05)
is_warning(out)
#> # A tibble: 5 x 7
#>   count_variable hospital                  data model estimates  fitting_warning
#>   <chr>          <fct>              <list<tibb> <lis> <list>     <list>         
#> 1 count          Connaught Hospital    [22 × 2] <neg… <df [22 ×… <chr [2]>      
#> 2 count          other                 [20 × 2] <neg… <df [20 ×… <chr [2]>      
#> 3 count          Princess Christia…    [17 × 2] <neg… <df [17 ×… <chr [2]>      
#> 4 count          Rokupa Hospital       [18 × 2] <neg… <df [18 ×… <chr [2]>      
#> 5 count          <NA>                  [22 × 2] <neg… <df [22 ×… <chr [2]>      
#> # … with 1 more variable: prediction_warning <list>
unnest(is_warning(out), fitting_warning)
#> # A tibble: 10 x 7
#>    count_variable hospital                data model  estimates fitting_warning 
#>    <chr>          <fct>            <list<tibb> <list> <list>    <chr>           
#>  1 count          Connaught Hospi…    [22 × 2] <negb… <df [22 … iteration limit…
#>  2 count          Connaught Hospi…    [22 × 2] <negb… <df [22 … iteration limit…
#>  3 count          other               [20 × 2] <negb… <df [20 … iteration limit…
#>  4 count          other               [20 × 2] <negb… <df [20 … iteration limit…
#>  5 count          Princess Christ…    [17 × 2] <negb… <df [17 … iteration limit…
#>  6 count          Princess Christ…    [17 × 2] <negb… <df [17 … iteration limit…
#>  7 count          Rokupa Hospital     [18 × 2] <negb… <df [18 … iteration limit…
#>  8 count          Rokupa Hospital     [18 × 2] <negb… <df [18 … iteration limit…
#>  9 count          <NA>                [22 × 2] <negb… <df [22 … iteration limit…
#> 10 count          <NA>                [22 × 2] <negb… <df [22 … iteration limit…
#> # … with 1 more variable: prediction_warning <list>We can add a rolling average, across current and previous intervals, to an incidence2 object with the add_rolling_average() function:
ra <- add_rolling_average(grouped_dat, before = 2) # group observations with the 2 prior
ra
#> # A tibble: 6 x 3
#>   count_variable hospital                                     rolling_average  
#>   <chr>          <fct>                                        <list>           
#> 1 count          Connaught Hospital                           <tibble [22 × 3]>
#> 2 count          Military Hospital                            <tibble [21 × 3]>
#> 3 count          other                                        <tibble [20 × 3]>
#> 4 count          Princess Christian Maternity Hospital (PCMH) <tibble [17 × 3]>
#> 5 count          Rokupa Hospital                              <tibble [18 × 3]>
#> 6 count          <NA>                                         <tibble [22 × 3]>
unnest(ra, rolling_average)
#> # A tibble: 120 x 5
#>    count_variable hospital           date_index count rolling_average
#>    <chr>          <fct>                  <yrwk> <int>           <dbl>
#>  1 count          Connaught Hospital   2014-W16     1           NA   
#>  2 count          Connaught Hospital   2014-W18     1           NA   
#>  3 count          Connaught Hospital   2014-W19     3            1.67
#>  4 count          Connaught Hospital   2014-W20     2            2   
#>  5 count          Connaught Hospital   2014-W21     5            3.33
#>  6 count          Connaught Hospital   2014-W22     4            3.67
#>  7 count          Connaught Hospital   2014-W23     6            5   
#>  8 count          Connaught Hospital   2014-W24     6            5.33
#>  9 count          Connaught Hospital   2014-W25    12            8   
#> 10 count          Connaught Hospital   2014-W26     8            8.67
#> # … with 110 more rows
plot(ra, color = "white")
#> Warning: Removed 12 rows containing missing values (position_stack).
#> Warning: Removed 12 rows containing missing values (position_stack).