Data Preparation

Load and format modeling data for analysis

Load Libraries

Load R-packages needed to execute code

Code
library(tidyverse)
library(usefun)
library(ggplot2)
library(here)
library(patchwork)
library(scales)
library(geosphere)
library(terra)
library(raster)
library(sf)
library(viridis)
library(pals)

select <- dplyr::select

Custom Functions

Load custom functions to process InterSpread Plus outputs and generate figures. These are available from the linked repository at the top of the script.

Code
source(here("./R/utilities.R"))
source_dir("./R")

Import Data

Code
## Assign file path
file_path <- "./script-inputs/isp-outputs"

Infection Data

import_infection_data() reads the Outputs_Infection.txt files that include data on simulated infection events.

Code
infection <- import_infection_data(file_path)

Check the number of infection events

Code
nrow(infection)
[1] 4214927

Check infect_day statistics

Code
infection %>%
  summarize(
    mean = mean(infect_day),
    min = min(infect_day),
    median = median(infect_day),
    max = max(infect_day))
mean min median max
141.7015 10 136 365

Check the number of infection events

Code
## Filter to central region
infection_central <- infection %>%
  filter(region == "central")

## Check data frame dimensions
nrow(infection_central)
[1] 2224274

Check infect_day statistics

Code
infection_central %>%
  summarize(
    mean = mean(infect_day),
    min = min(infect_day),
    median = median(infect_day),
    max = max(infect_day))
mean min median max
139.6225 10 132 365

Check the number of infection events

Code
## Filter to eastern region
infection_eastern <- infection %>%
  filter(region == "eastern")

## Check data frame dimensions
nrow(infection_eastern)
[1] 1990653

Check infect_day statistics

Code
infection_eastern %>%
  summarize(
    mean = mean(infect_day),
    min = min(infect_day),
    median = median(infect_day),
    max = max(infect_day))
mean min median max
144.0246 10 140 365

Detection Data

import_detection_data() reads the Outputs_Detection.txt files that include data on simulated detection events.

Code
detection <- import_detection_data(file_path)

Check the number of detection events

Code
nrow(detection)
[1] 4154347

Check detect_day statistics

Code
detection %>%
  summarize(
    mean = mean(detect_day),
    min = min(detect_day),
    median = median(detect_day),
    max = max(detect_day))
mean min median max
149.9366 15 145 365

Check the number of detection events

Code
## Filter to central region
detection_central <- detection %>%
  filter(region == "central")

nrow(detection_central)
[1] 2190070

Check detect_day statistics

Code
detection_central %>%
  summarize(
    mean = mean(detect_day),
    min = min(detect_day),
    median = median(detect_day),
    max = max(detect_day))
mean min median max
147.7716 15 141 365

Check the number of detection events

Code
## Filter to eastern region
detection_eastern <- detection %>%
  filter(region == "eastern")

nrow(detection_eastern)
[1] 1964277

Check detect_day statistics

Code
detection_eastern %>%
  summarize(
    mean = mean(detect_day),
    min = min(detect_day),
    median = median(detect_day),
    max = max(detect_day))
mean min median max
152.3506 15 150 365

Read Farm Reference File

The FMD Farm Reference File used for this project contains geographic coordinates for theoretical farm locations. Premises data were generated using the Farm Location and Agricultural Production Simulator (FLAPS, see Burdett et al. 2015) and do not reflect true farm locations. Farm demographics are based upon data collected in the U.S. Census of Agriculture conducted by the USDA National Agricultural Statistical Service (NASS).

Code
reference <- read.csv2(here("script-inputs/reference-files/FMD_Reference_File.csv"), header=TRUE, sep=",")

colnames(reference) <- c(
                      "premises",
                      "class",
                      "cattle",
                      "weight",
                      "x",
                      "y",
                      "state",
                      "longitude",
                      "latitude",
                      "geo_region"
                      )

## Select columns needed for spread analyses
reference <- reference %>%
  mutate(longitude = as.numeric(longitude),
         latitude = as.numeric(latitude)) %>%
  select(-c(weight, cattle, x, y, geo_region))

nrow(reference)
[1] 889759

What does this mean?

Each row in reference contains data for a unique, theoretical cattle premises. This data is used to simulate FMDV-spread between cattle farms throughout the United States.