Skip to contents

ecostate is an R package for fitting the mass-balance dynamics specified by EcoSim as a state-space model. We here highlight a few features in particular.

Simulation demonstration

We first simulate new data. To do so, we specify a 5-species ecosystem:

# Time-interval
years = 1981:2020
n_years = length(years)

# Name taxa (optional, for illustration)
taxa = c("Consumer_1", "Consumer_2", "Predator_1", "Predator_2", "Producer", "Detritus")
n_species = length(taxa)

# Ecopath-with-EcoSim parameters
# Diet matrix
DC_ij = matrix( c(
  0,     0,     0.8,  0.4,  0,   0,
  0,     0,     0.2,  0.6,  0,   0,
  0,     0,     0,    0,    0,   0,
  0,     0,     0,    0,    0,   0,
  0.9,   0.3,   0,    0,    0,   0,
  0.1,   0.7,   0,    0,    0,   0
), byrow=TRUE, ncol=n_species)
PB_i = c( 4, 1, 0.2, 0.1, 90, 0.5 )         # Reciprocal of mean age according to Polovina-1984 ~= M
QB_i = c( 10, 4, 3, 1, NA, NA )
EE_i = c( 0.9, 0.9, NA, NA, 0.9, 0.9 )
B_i = c( NA, NA, 1, 1, NA, NA )
U_i = rep( 0.2, n_species )
type_i = c( "hetero", "hetero", "hetero", "hetero", "auto", "detritus" )
which_primary = which(type_i=="auto")
which_detritus = which(type_i=="detritus")
X_ij = matrix( 2, nrow=n_species, ncol=n_species )
X_ij[,which_primary] = 91

We first define a function that simulates data, using the same functions for mass-balance and simulating dynamics are then used by EcoState during parameter estimation:

simulate_data = function(){
  # Simulate process errors
  set.seed(101)
  rarray = \(x,dims=dim(x),sd) array( sd*rnorm(prod(dims)), dim=dims )
  epsilon_ti = rarray(dims=c(n_years,n_species),sd=1) * outer(rep(1,n_years),sigmaB_i)
  
  # Choose method to integrate instantaneous rates for annual dynamics
  n_steps = 10
  project_vars = abm3pc_sys
  
  # Define parameters
  Pars = list( logB_i = log( B_i ), 
                     logPB_i = log(PB_i),
                     logQB_i = log(QB_i),
                     Xprime_ij = log(X_ij - 1),
                     EE_i = EE_i,
                     DC_ij = DC_ij,
                     U_i = U_i,
                     type_i = type_i,
                     noB_i = ifelse( is.na(B_i), 1, 0),
                     epsilon_i = rep(0,n_species),
                     logF_i = rep(-Inf,n_species) )
                     #which_detritus = which_detritus,
                     #which_primary = which_primary,
                     #F_type = "integrated" )
  Pars_full = add_equilibrium( Pars,
                               scale_solver = "joint",
                               noB_i = ifelse(is.na(Pars$logB_i),1,0),
                               type_i = type_i )

  # Project forward
  Bobs_ti = Cobs_ti = B_ti = C_ti = array( NA, dim=c(n_years,n_species), 
                                    dimnames=list("Year"=years,"Taxon"=taxa) )
  B_ti[1,] = exp(Pars_full$logB_i)
  C_ti[1,] = NA
  
  for( t in seq_along(years)[-1] ){
    # Fishing mortality ramps up for two predators
    F_t = c( 0, 0, t/n_years*0.2, t/n_years*0.1, 0, 0 )
  
    #
    pars_t = Pars_full
    pars_t$logF_i = log(F_t)
    pars_t$epsilon_i = epsilon_ti[t,]
    pars_t$nu_i = rep(0,n_species)

    # Integrate dynamics annually
    #data2 = local({
    #                n_species = n_species
    #                environment()
    #})
    #environment(dBdt) <- data2
    sim = project_vars(
          f = dBdt,
          a = years[t-1], 
          b = years[t],
          n = n_steps,
          Pars = pars_t,
          type_i = type_i,
          n_species = n_species,
          F_type = "integrated",
          y0 = c( B_ti[t-1,], rep(0,n_species) ) )
    
    # Record results
    B_ti[t,] = sim$y[nrow(sim$y),seq_len(n_species)]
    C_ti[t,] = sim$y[nrow(sim$y),n_species+seq_len(n_species)]
  
    # Simulate measurement errors
    Bobs_ti[t,] = B_ti[t,] * exp(0.1*rnorm(n_species))
    Cobs_ti[t,] = ifelse(C_ti[t,]==0,NA,C_ti[t,]) * exp(0.1*rnorm(n_species))
  }
  return( list( "B_ti"=B_ti, "C_ti"=C_ti, "Bobs_ti"=Bobs_ti, 
                "Cobs_ti"=Cobs_ti, "Pars_full"=Pars_full) )
}

Comparison with Rpath

We first compare the functions used by EcoState with existing implementations of the model. One script-based implementation available in R is Rpath, and we therefore show the syntax and model-output for this comparison. For this comparison, we first simulate a data set that does not have any process errors:

sigmaB_i = c(0, 0, 0, 0, 0, 0) # Taxa 1-2 crashes solver if sigmaB > 0.02
sim = simulate_data()

We then load Rpath and reformat inputs in the format that it expects to calculate mass-balance:

# Rpath needs types in ascending order (EcoState doesn't care)
types  <- sapply( c(type_i,"fishery"), FUN=switch, 
                  "hetero"==0, "auto"=1, "detritus"=2, "fishery"=3 )
groups <- c( taxa, "fishery" )
stgroups = rep(NA, length(groups) )
REco.params <- create.rpath.params(group = groups, type = types, stgroup = stgroups)

# Fill in biomass
#REco.params$model$Biomass = c( exp(Pars_full$logB_i), NA )
REco.params$model$Biomass = c( B_i, NA )
REco.params$model$EE = c( ifelse(type_i=="detritus",NA,EE_i), NA )
REco.params$model$PB = c( PB_i, NA )
REco.params$model$QB = c( QB_i, NA )

#Biomass accumulation and unassimilated consumption
REco.params$model$BioAcc = c( rep(0,length(taxa)), NA )
REco.params$model$Unassim = c( ifelse(type_i=="hetero",0.2,0), NA )

#Detrital Fate
REco.params$model$Detritus = c( ifelse(type_i=="detritus",0,1), 0 )
REco.params$model$fishery = c( rep(0,length(taxa)), NA )
REco.params$model$fishery.disc = c( rep(0,length(taxa)), NA )

# Diet
for(j in 1:5) REco.params$diet[seq_along(taxa),j+1] = DC_ij[,j]
REco.params$diet[seq_along(taxa),2] = DC_ij[,1]

# Balance using Ecopath equations
check.rpath.params( REco.params)
#> Rpath parameter file is functional.
REco <- rpath(REco.params, eco.name = 'R Ecosystem')

We can then simulate forward in time using Rpath:

# Create simulation object
REco.sim <- rsim.scenario(REco, REco.params, years = 1:40)

#
REco.sim = adjust.fishing( Rsim.scenario=REco.sim, 
                           parameter='ForcedFRate', 
                           group = 'Predator_1', # Group is which species to apply
                           sim.year = 1:40, 
                           sim.month = 0, 
                           value = (1:40/40)*0.2 )
REco.sim = adjust.fishing( Rsim.scenario=REco.sim, 
                           parameter='ForcedFRate', 
                           group = 'Predator_2', # Group is which species to apply
                           sim.year = 1:40, 
                           sim.month = 0, 
                           value = (1:40/40)*0.1 )

# Match vulnerability for self-limitation in Producers
REco.sim$params$VV[which(REco.sim$params$PreyFrom==0 & REco.sim$params$PreyTo==5)] = 91

# 
REco.run1 <- rsim.run(REco.sim, method = 'RK4', years = 1:40)

Comparing the scenario forecasted with Rpath against the simulated time-series, we see that the two closely match. EcoState uses the same functions during fitting, and so it also closely matches Rpath.

# Calculate annual simulated catch
Year = rep( years, each=12)
Bsim_ti = apply( REco.run1$out_Biomass[,-1], MARGIN=2, FUN=\(x) tapply(x,INDEX=Year,FUN=mean) )

#
par(mfrow=c(2,1), mar=c(3,3,1,1), mgp=c(2,0.5,0) )
matplot( x=years, y=Bsim_ti / outer(rep(1,n_years),Bsim_ti[1,]), 
         type="l", lwd=3, lty="solid", col=rainbow(n_species), log="y",
         ylab="Relative biomass (Rpath)", xlab="Year" )
Brel_ti = sim$B_ti / outer(rep(1,n_years),sim$B_ti[1,])
matplot( x=years, y=sim$B_ti / outer(rep(1,n_years),sim$B_ti[1,]), 
         type="l", lwd=3, lty="solid", col=rainbow(n_species), log="y",
         ylab="Relative biomass (Simulated)", xlab="Year" )
legend("bottomleft", fill=rainbow(n_species), legend=taxa, ncol=2, bty="n")

Fitting the model using EcoState

We next want to show how EcoState performs when fitting to simulated data. We simulate a data set that has process errors, and plot it to compare with the previous simulation that did not have process errors:

# Simulate new data
sigmaB_i = c(0.02, 0.02, 0.1, 0.1, 0.1, 0.1) # Taxa 1-2 crashes solver if sigmaB > 0.02
sim = simulate_data()

# Unload simulated data
Bobs_ti = sim$Bobs_ti
Cobs_ti = sim$Cobs_ti
B_ti = sim$B_ti
Pars_full = sim$Pars_full

# Plot simulation with process errors
matplot( x=years, y=B_ti / outer(rep(1,n_years),B_ti[1,]), 
         type="l", lwd=3, lty="solid", col=rainbow(n_species), log="y",
         ylab="Relative biomass (simulated)", xlab="Year" )

We then reformat simulated biomass and catch time-series into long-form data frames and fit them with ecostate

# reformat to longform data-frame
Catch = na.omit(data.frame(expand.grid(dimnames(Cobs_ti)), "Mass"=as.vector(Cobs_ti)))
Biomass = na.omit(data.frame(expand.grid(dimnames(Bobs_ti)), "Mass"=as.vector(Bobs_ti)))

# Settings: specify what parameters to estimate
fit_eps = c("Producer", "Detritus", "Predator_1", "Predator_2")   # process errors
fit_Q = c()       # catchability coefficient
fit_B0 = c()      # non-equilibrium initial condition
fit_B = taxa       # equilibrium biomass

# Solving for EE and giving uninformed initial values for biomass
fittedB_i = exp(Pars_full$logB_i)
fittedEE_i = rep(NA, n_species)

# Label EwE inputs for each taxon as expected (so users can easily change taxa)
names(PB_i) = names(QB_i) = names(fittedB_i) = names(fittedEE_i) = names(type_i) = names(U_i) = taxa
  dimnames(DC_ij) = dimnames(X_ij) = list("Prey"=taxa, "Predator"=taxa)
  
# Define priors
log_prior = function(p){
  # Prior on process-error log-SD to stabilize model
  logp = sum(dnorm( p$logtau_i, mean=log(0.2), sd=1, log=TRUE ), na.rm=TRUE)
}

# Run model
out = ecostate( taxa = taxa,
                years = years,
                catch = Catch,
                biomass = Biomass,
                PB = PB_i,
                QB = QB_i,
                DC = DC_ij,
                B = fittedB_i,
                EE = fittedEE_i,
                X = X_ij,
                type = type_i,
                U = U_i,
                fit_B = fit_B,
                fit_Q = fit_Q,
                fit_eps = fit_eps,
                fit_B0 = fit_B0,
                log_prior = log_prior,
                control = ecostate_control( # Much faster to turn off
                                            tmbad.sparse_hessian_compress = 0,
                                            # Faster to profile these
                                            profile = c("logF_ti","logB_i"),
                                            # More stable when starting low
                                            start_tau = 0.001 ))

# print output to terminal
out
#> Dynamics integrated using  ABM  with  10  time-steps
#> Run time: Time difference of 3.585301 mins
#> Negative log-likelihood: -257.8807
#> 
#> EcoSim parameters:
#>                type QB   PB         B        EE   U
#> Consumer_1   hetero 10  4.0 0.7862833 0.8547194 0.2
#> Consumer_2   hetero  4  1.0 1.2979525 0.8762152 0.2
#> Predator_1   hetero  3  0.2 0.9650080 0.0000000 0.2
#> Predator_2   hetero  1  0.1 0.9304681 0.0000000 0.2
#> Producer       auto NA 90.0 0.1006972 0.9527011 0.2
#> Detritus   detritus NA  0.5 9.5823731 0.9226421 0.2
#> 
#> EcoSim diet matrix:
#>             Predator
#> Prey         Consumer_1 Consumer_2 Predator_1 Predator_2 Producer Detritus
#>   Consumer_1        0.0        0.0        0.8        0.4        0        0
#>   Consumer_2        0.0        0.0        0.2        0.6        0        0
#>   Predator_1        0.0        0.0        0.0        0.0        0        0
#>   Predator_2        0.0        0.0        0.0        0.0        0        0
#>   Producer          0.9        0.3        0.0        0.0        0        0
#>   Detritus          0.1        0.7        0.0        0.0        0        0
#> 
#> EcoSim vulnerability matrix:
#>             Predator
#> Prey         Consumer_1 Consumer_2 Predator_1 Predator_2 Producer Detritus
#>   Consumer_1          2          2          2          2       91        2
#>   Consumer_2          2          2          2          2       91        2
#>   Predator_1          2          2          2          2       91        2
#>   Predator_2          2          2          2          2       91        2
#>   Producer            2          2          2          2       91        2
#>   Detritus            2          2          2          2       91        2
#> 
#> Estimates: sdreport(.) result
#>           Estimate Std. Error
#> logtau_i -2.395457  0.1999421
#> logtau_i -2.528423  0.2809718
#> logtau_i -1.815466  0.7800328
#> logtau_i -2.160598  0.2680706
#> Maximum gradient component: 1.367582e-06

Finally, we can extract elements from the fitted model, and plot them easily using ggplot2 to compare them with known (simulated) values. This exercise shows that EcoState can accurately estimate biomass trends:

# Extract estimated biomass
Bhat_ti = out$derived$Est$B_ti
Bse_ti = out$derived$SE$B_ti

# Reformat to long-form data frame for ggplot
results = expand.grid(dimnames(Bobs_ti))
results = cbind( results, 
                 "True" = as.vector(B_ti),
                 "Est" = as.vector(Bhat_ti),
                 "SE" = as.vector(Bse_ti) )

# Plot using ggplot
library(ggplot2)
ggplot(results) + 
  geom_line( aes(x=as.numeric(Year), y=True) ) + 
  facet_wrap( vars(Taxon), scale="free" ) +
  geom_line( aes(x=as.numeric(Year), y=Est), linetype="dotted" ) +
  geom_ribbon( aes(x=as.numeric(Year), ymin=Est-SE, ymax=Est+SE), alpha=0.2)                

Advanced: estimating vulnerability parameters

We can also explore estimating additional parameters. Here, we explore estimating vulnerability:

# Define priors
log_prior = function(p){
  # Prior on process-error log-SD to stabilize model
  logp = sum(dnorm( p$logtau_i, mean=log(0.2), sd=1, log=TRUE ), na.rm=TRUE)
  # Prior on vulnerability to stabilize model (single value mirrored below)
  logp = logp + dnorm( p$Xprime_ij[1,1], mean=0, sd=1, log=TRUE )
}

# Run model
out0 = ecostate( taxa = taxa,
                years = years,
                catch = Catch,
                biomass = Biomass,
                PB = PB_i,
                QB = QB_i,
                DC = DC_ij,
                B = fittedB_i,
                EE = fittedEE_i,
                X = X_ij,
                type = type_i,
                U = U_i,
                fit_B = fit_B,
                fit_Q = fit_Q,
                fit_eps = fit_eps,
                fit_B0 = fit_B0,
                log_prior = log_prior,
                control = ecostate_control( inverse_method = "Standard",
                                            nlminb_loops = 0,
                                            tmbad.sparse_hessian_compress = 0,
                                            getsd = FALSE,
                                            process_error = "epsilon",
                                            profile = c("logF_ti","logB_i"),
                                            start_tau = 0.1 ))

# Change tmb_par
tmb_par = out0$tmb_inputs$p
  tmb_par$Xprime_ij[,which(type_i!="auto")] = log(1.5 - 1)
map = out0$tmb_inputs$map
  map$Xprime_ij = array(NA, dim=dim(tmb_par$Xprime_ij))
  map$Xprime_ij[,which(type_i!="auto")] = 1
  map$Xprime_ij = factor(map$Xprime_ij)

# Run model
out = ecostate( taxa = taxa,
                years = years,
                catch = Catch,
                biomass = Biomass,
                PB = PB_i,
                QB = QB_i,
                DC = DC_ij,
                B = fittedB_i,
                EE = fittedEE_i,
                X = X_ij,
                type = type_i,
                U = U_i,
                fit_B = fit_B,
                fit_Q = fit_Q,
                fit_eps = fit_eps,
                fit_B0 = fit_B0,
                log_prior = log_prior,
                control = ecostate_control( inverse_method = "Standard",
                                            nlminb_loops = 1,
                                            tmbad.sparse_hessian_compress = 0,
                                            getsd = TRUE,
                                            process_error = "epsilon",
                                            profile = c("logF_ti","logB_i"),
                                            start_tau = 0.001,
                                            tmb_par = tmb_par,
                                            map = map ))   # alpha is faster than epsilon
#> Using `control$tmb_par`, so be cautious in constructing it
#> Using `control$map`, so be cautious in constructing it

# print output to terminal
out
#> Dynamics integrated using  ABM  with  10  time-steps
#> Run time: Time difference of 4.339845 mins
#> Negative log-likelihood: -256.9749
#> 
#> EcoSim parameters:
#>                type QB   PB         B        EE   U
#> Consumer_1   hetero 10  4.0 0.7859114 0.8583339 0.2
#> Consumer_2   hetero  4  1.0 1.2992375 0.8788253 0.2
#> Predator_1   hetero  3  0.2 0.9685480 0.0000000 0.2
#> Predator_2   hetero  1  0.1 0.9344566 0.0000000 0.2
#> Producer       auto NA 90.0 0.1008355 0.9511963 0.2
#> Detritus   detritus NA  0.5 9.5883393 0.9227409 0.2
#> 
#> EcoSim diet matrix:
#>             Predator
#> Prey         Consumer_1 Consumer_2 Predator_1 Predator_2 Producer Detritus
#>   Consumer_1        0.0        0.0        0.8        0.4        0        0
#>   Consumer_2        0.0        0.0        0.2        0.6        0        0
#>   Predator_1        0.0        0.0        0.0        0.0        0        0
#>   Predator_2        0.0        0.0        0.0        0.0        0        0
#>   Producer          0.9        0.3        0.0        0.0        0        0
#>   Detritus          0.1        0.7        0.0        0.0        0        0
#> 
#> EcoSim vulnerability matrix:
#>             Predator
#> Prey         Consumer_1 Consumer_2 Predator_1 Predator_2 Producer Detritus
#>   Consumer_1   1.977243   1.977243   1.977243   1.977243       91 1.977243
#>   Consumer_2   1.977243   1.977243   1.977243   1.977243       91 1.977243
#>   Predator_1   1.977243   1.977243   1.977243   1.977243       91 1.977243
#>   Predator_2   1.977243   1.977243   1.977243   1.977243       91 1.977243
#>   Producer     1.977243   1.977243   1.977243   1.977243       91 1.977243
#>   Detritus     1.977243   1.977243   1.977243   1.977243       91 1.977243
#> 
#> Estimates: sdreport(.) result
#>              Estimate Std. Error
#> Xprime_ij -0.02302001  0.1539942
#> logtau_i  -2.39718954  0.2012940
#> logtau_i  -2.51981549  0.2867294
#> logtau_i  -1.77412399  0.8408468
#> logtau_i  -2.15986779  0.2684473
#> Maximum gradient component: 2.810887e-05

Runtime for this vignette: 8.02 mins