Data Preparation

Load and format modeling data for analysis

Load Libraries

Load R-packages needed to execute code

Code

library(tidyverse)
library(usefun)
library(ggplot2)
library(here)
library(patchwork)
library(scales)
library(geosphere)
library(terra)
library(raster)
library(sf)
library(viridis)
library(pals)

select <- dplyr::select

Custom Functions

Load custom functions to process InterSpread Plus outputs and generate figures. These are available from the linked repository at the top of the script.

Code

source(here("./R/utilities.R"))
source_dir("./R")

Import Data

Code

## Assign file path
file_path <- "./script-inputs/isp-outputs"

Infection Data

import_infection_data() reads the Outputs_Infection.txt files that include data on simulated infection events.

Code

infection <- import_infection_data(file_path)

Check the number of infection events

Code

nrow(infection)

[1] 4214927

Check infect_day statistics

Code

infection %>%
  summarize(
    mean = mean(infect_day),
    min = min(infect_day),
    median = median(infect_day),
    max = max(infect_day))

mean	min	median	max
141.7015	10	136	365

Check the number of infection events

Code

## Filter to central region
infection_central <- infection %>%
  filter(region == "central")

## Check data frame dimensions
nrow(infection_central)

[1] 2224274

Check infect_day statistics

Code

infection_central %>%
  summarize(
    mean = mean(infect_day),
    min = min(infect_day),
    median = median(infect_day),
    max = max(infect_day))

mean	min	median	max
139.6225	10	132	365

Check the number of infection events

Code

## Filter to eastern region
infection_eastern <- infection %>%
  filter(region == "eastern")

## Check data frame dimensions
nrow(infection_eastern)

[1] 1990653

Check infect_day statistics

Code

infection_eastern %>%
  summarize(
    mean = mean(infect_day),
    min = min(infect_day),
    median = median(infect_day),
    max = max(infect_day))

mean	min	median	max
144.0246	10	140	365

Detection Data

import_detection_data() reads the Outputs_Detection.txt files that include data on simulated detection events.

Code

detection <- import_detection_data(file_path)

Check the number of detection events

Code

nrow(detection)

[1] 4154347

Check detect_day statistics

Code

detection %>%
  summarize(
    mean = mean(detect_day),
    min = min(detect_day),
    median = median(detect_day),
    max = max(detect_day))

mean	min	median	max
149.9366	15	145	365

Check the number of detection events

Code

## Filter to central region
detection_central <- detection %>%
  filter(region == "central")

nrow(detection_central)

[1] 2190070

Check detect_day statistics

Code

detection_central %>%
  summarize(
    mean = mean(detect_day),
    min = min(detect_day),
    median = median(detect_day),
    max = max(detect_day))

mean	min	median	max
147.7716	15	141	365

Check the number of detection events

Code

## Filter to eastern region
detection_eastern <- detection %>%
  filter(region == "eastern")

nrow(detection_eastern)

[1] 1964277

Check detect_day statistics

Code

detection_eastern %>%
  summarize(
    mean = mean(detect_day),
    min = min(detect_day),
    median = median(detect_day),
    max = max(detect_day))

mean	min	median	max
152.3506	15	150	365

Read Farm Reference File

The FMD Farm Reference File used for this project contains geographic coordinates for theoretical farm locations. Premises data were generated using the Farm Location and Agricultural Production Simulator (FLAPS, see Burdett et al. 2015) and do not reflect true farm locations. Farm demographics are based upon data collected in the U.S. Census of Agriculture conducted by the USDA National Agricultural Statistical Service (NASS).

Code

reference <- read.csv2(here("script-inputs/reference-files/FMD_Reference_File.csv"), header=TRUE, sep=",")

colnames(reference) <- c(
                      "premises",
                      "class",
                      "cattle",
                      "weight",
                      "x",
                      "y",
                      "state",
                      "longitude",
                      "latitude",
                      "geo_region"
                      )

## Select columns needed for spread analyses
reference <- reference %>%
  mutate(longitude = as.numeric(longitude),
         latitude = as.numeric(latitude)) %>%
  select(-c(weight, cattle, x, y, geo_region))

nrow(reference)

[1] 889759

What does this mean?

Each row in reference contains data for a unique, theoretical cattle premises. This data is used to simulate FMDV-spread between cattle farms throughout the United States.