Preprocessing

Wrangling and preparing date for analyses in other sections

Revise Nexus Files

The function nexus_label_swap() edits the Nexus file to move the descriptive text with each sample to the label position to be shown as tip labels. The revised file is saved with a _rev added to the end.

Hide code

tree_files <- list.files(here("local/paktrees/original"))

# swap labels
for(i in 1:length(tree_files)){
  nexus_label_swap(here("local/paktrees/original/", tree_files[i]))
}

View Tree

Tree visualization to confirm nexus_label_swap() worked

Hide code

# list of revised nexus files
tree_files <- list.files(here("local/paktrees/original"), pattern="_rev\\.nex$")

# choose a tree
tree_tmp <- read.nexus(here("local/paktrees/original", tree_files[4]))

Hide code

plot_tree_serotype(tree_tmp)

Figure 1: Figure is only a test plot to verify labels were correctly swapped in the Nexus file, but represents Segment 3D.

Geography

Extract, clean, and join geographic coordinates. The function convert_dms_to_dd() was coded to clean special characters and mistyped text from the raw file before conversion.

Hide code

farm_file <- read.csv(here("local/farm_clean2.csv")) %>%
   mutate(farm_code = if_else(animal >= 146 & animal <= 155, "DD",  farm_code))

farm_file  <- farm_file %>%
  mutate(coord_x = as.numeric(sapply(coord_x, convert_dms_to_dd)),
         coord_y = as.numeric(sapply(coord_y, convert_dms_to_dd)))

farms_locs <- farm_file %>% # unique locations
  group_by(farm_code) %>%
  slice(1) %>%
  ungroup() %>%
  as.data.frame()

# Faroog et al, 2018 states 136-145 are farm code N & 146-155 are DD.
# However, data (Pakistan FARM LIST DATA- BArbarita.xls) has N and DD lumped as same farm, Tarlai.
# coordinates for N and DD are different.  Likely same owner, different locations.
# splitting them here consistent with Farooq et al.
animal_locs <- farm_file %>%
  distinct(animal, farm_code)

Hide code

write.csv(farms_locs, here("local/farms_locs.csv"), row.names = FALSE)
write.csv(animal_locs, here("local/animals_locs.csv"), row.names = FALSE)

Format data for mapping

The function calculate_bounding_box() helps with conversion and padding dtermination for the map.

Hide code

# use api to access background maps  
map_api <- yaml::read_yaml(here("local", "secrets.yaml"))
register_stadiamaps(key = map_api$stadi_api)
# has_stadiamaps_key()

bbox <- calculate_bounding_box(farms_locs, 1)

bbox_coords <- c(left = bbox$min_lon, bottom = bbox$min_lat, 
                 right = bbox$max_lon, top = bbox$max_lat)

background_map <- get_map(location = bbox_coords, 
                          source = "stadia", maptype = "stamen_terrain")

ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.

Farm Locations (1)

Hide code

plot_study_area(background_map, farms_locs)

Farm Locations (2)

Try a different background

Hide code

new_background <- get_map(location = bbox_coords, 
                          source = "stadia", maptype = "stamen_toner")

plot_study_area(new_background, farms_locs)

Subclinical Summary

Data wrangling to summarize subclinical cases. The convert_dates() function is used to standardize dates recorded using different formats.

Hide code

all_serotypes_tree <- read.nexus(here("local/paktrees/original", tree_files[5]))

sero_df <- as.data.frame(
  all_serotypes_tree$tip.label
)

names(sero_df) <- "label"

# get animal and sample number from tip label
sero_df <- sero_df  %>%
  filter(str_count(label, "/") >= 4) %>%
  mutate(parts = str_split(label, "/")) %>%
  mutate(string = sapply(parts, function(x) if (length(x) > 3) x[4] else NA)) %>%
  mutate(animal = as.integer(
    if_else(str_detect(string, "-"), word(string, 1, sep = "-"), string))) %>%
  mutate(sample = if_else(str_detect(string, "-"), word(string, 2, sep = "-"), NA_character_)) %>%
  select(-parts)

# fix mixed date formats
farm_file <- farm_file %>%
    mutate(across(starts_with("samp_date_"), ~ format(dmy(.), "%Y-%m-%d")))
 
# table by sample and animal
samp_date_table <- farm_file %>%
  select(animal, samp_date_1, samp_date_2, samp_date_3, samp_date_4) %>%
  pivot_longer(
    cols = -animal,
    names_to = "sample_txt",
    values_to = "samp_date"
  ) %>%
  mutate(sample = substr(sample_txt, 11, 11)) %>%
  select(-sample_txt)

sero_df <- left_join(sero_df, samp_date_table, by = c("animal", "sample"))
sero_df$samp_date <- as.Date(sero_df$samp_date)

#farm_file$farm_code <- with(farms_locs, ###########
#                            farm_code[match(
#                              farm_file$farm_name,
#                              farm_name)])

sero_df$farm_code <- with(animal_locs,
                          farm_code[match(
                             sero_df$animal,
                             animal)])

sero_df <- sero_df %>%
  mutate(status = ifelse(grepl("_pro$", label), "Subclinical", "Clinical"),
         serotype = sub("/.*", "", label))

sub_only <- sero_df %>%
  filter(status == "Subclinical")

sub_only$serotype <- sub("/.*", "", sub_only$label)

save a copy

Hide code

saveRDS(sero_df, here("local/assets/sero_df.rds"))
write.csv(farms_locs, here("local/farms_locs.csv"), row.names = FALSE)
write.csv(animal_locs, here("local/animal_locs.csv"), row.names = FALSE)

Co-Infected Animals

The below table lists samples with sequentially, or concurrent co-infection

Hide code

coinf_set <- sub_only %>%
  group_by(animal) %>%
  summarise(Infections = length(serotype)) %>%
  filter(Infections > 1) %>%
  ungroup() %>%
  arrange(animal) %>%
  mutate(Item = row_number()) %>%
  select(Item, animal, Infections)

Hide code

coinf_set %>%
  gt() %>%
  tab_header(
    title = md("Sampled FMDV by Animal")) %>%
  cols_width(starts_with("animal") ~ px(100),
             starts_with("Infections") ~ px(90),
             everything() ~ px(95)) %>%
  tab_options(table.font.size = "small",
              row_group.font.size = "small",
              stub.font.size = "small",
              column_labels.font.size = "medium",
              heading.title.font.size = "large",
              data_row.padding = px(2),
              heading.title.font.weight = "bold",
              column_labels.font.weight = "bold") %>%
  opt_stylize(style = 6, color = 'gray')  %>%
  tab_caption(caption = md("Table lists subclinical animals sampled to have multiple infections.  May be sequential or coinfections, and may be from the same or different FMDVs. The Infections columns lits the number sampled.  Detailed records for these animals are in the next table."))

Table lists subclinical animals sampled to have multiple infections. May be sequential or coinfections, and may be from the same or different FMDVs. The Infections columns lits the number sampled. Detailed records for these animals are in the next table.
Item	animal	Infections
Sampled FMDV by Animal
1	8	2
2	59	2
3	161	2
4	168	3
5	177	2
6	178	2
7	183	2
8	184	3
9	185	2
10	189	3
11	192	3
12	208	2
13	229	2
14	231	2
15	237	2
16	254	2
17	263	2
18	272	2
19	277	2
20	284	2

Hide code

sub_only %>%
  filter(animal %in% coinf_set$animal) %>%
  select(-string) %>%
  arrange(animal) %>%
  gt() %>%
  tab_header(
    title = md("Subclinical with Multiple Infections")) %>%
  cols_width(starts_with("label") ~ px(250),
             starts_with("animal") ~ px(90),
             starts_with("sample") ~ px(90),
             starts_with("farm_code") ~ px(100),
             starts_with("samp_date") ~ px(100),
             everything() ~ px(95)) %>%
  tab_options(table.font.size = "small",
              row_group.font.size = "small",
              stub.font.size = "small",
              column_labels.font.size = "medium",
              heading.title.font.size = "large",
              data_row.padding = px(2),
              heading.title.font.weight = "bold",
              column_labels.font.weight = "bold") %>%
  opt_stylize(style = 6, color = 'gray') %>%
  tab_caption(caption = md("Table lists indiviual records for animals sampled to have multiple infections.  Detailed version of previous table showing counts."))

Table lists indiviual records for animals sampled to have multiple infections. Detailed version of previous table showing counts.
label	animal	sample	samp_date	farm_code	status	serotype
Subclinical with Multiple Infections
Asia1/PAK/ICT/008-1/2011_pro	8	1	2011-12-28	A	Subclinical	Asia1
A/PAK/ICT/008-3/2012_pro	8	3	2012-05-17	A	Subclinical	A
A/PAK/ICT/059-1/2012_pro	59	1	2012-01-10	F	Subclinical	A
Asia1/PAK/ICT/059-2/2012_pro	59	2	2012-03-14	F	Subclinical	Asia1
O/PAK/ICT/161-1/2012_pro	161	1	2012-02-02	O	Subclinical	O
O/PAK/ICT/161-2/2012_pro	161	2	2012-03-29	O	Subclinical	O
A/PAK/ICT/168-2/2012_pro	168	2	2012-04-02	P	Subclinical	A
A/PAK/ICT/168-3/2012_pro	168	3	2012-07-03	P	Subclinical	A
Asia1/PAK/ICT/168-1/2012_pro	168	1	2012-02-07	P	Subclinical	Asia1
A/PAK/ICT/177-4/2012_pro	177	4	2012-11-21	Q	Subclinical	A
Asia1/PAK/ICT/177-1/2012_pro	177	1	2012-02-08	Q	Subclinical	Asia1
Asia1/PAK/ICT/178-2/2012_pro	178	2	2012-04-03	Q	Subclinical	Asia1
Asia1/PAK/ICT/178-3/2012_pro	178	3	2012-07-09	Q	Subclinical	Asia1
Asia1/PAK/ICT/183-2/2012_pro	183	2	2012-04-03	Q	Subclinical	Asia1
Asia1/PAK/ICT/183-1/2012_pro	183	1	2012-02-08	Q	Subclinical	Asia1
Asia1/PAK/ICT/184-3/2012_pro	184	3	2012-07-09	Q	Subclinical	Asia1
Asia1/PAK/ICT/184-2/2012_pro	184	2	2012-04-03	Q	Subclinical	Asia1
Asia1/PAK/ICT/184-1/2012_pro	184	1	2012-02-08	Q	Subclinical	Asia1
Asia1/PAK/ICT/185-1/2012_pro	185	1	2012-02-08	Q	Subclinical	Asia1
Asia1/PAK/ICT/185-3/2012_pro	185	3	2012-07-09	Q	Subclinical	Asia1
Asia1/PAK/ICT/189-2/2012_pro	189	2	2012-04-04	R	Subclinical	Asia1
Asia1/PAK/ICT/189-3/2012_pro	189	3	2012-07-11	R	Subclinical	Asia1
Asia1/PAK/ICT/189-4/2012_pro	189	4	2012-11-22	R	Subclinical	Asia1
Asia1/PAK/ICT/192-4/2012_pro	192	4	2012-11-22	R	Subclinical	Asia1
Asia1/PAK/ICT/192-3/2012_pro	192	3	2012-07-11	R	Subclinical	Asia1
Asia1/PAK/ICT/192-2/2012_pro	192	2	2012-04-04	R	Subclinical	Asia1
A/PAK/ICT/208-1/2012_pro	208	1	2012-02-15	T	Subclinical	A
Asia1/PAK/ICT/208-4/2012_pro	208	4	2012-11-27	T	Subclinical	Asia1
A/PAK/ICT/229-1/2012_pro	229	1	2012-02-21	V	Subclinical	A
Asia1/PAK/ICT/229-4/2012_pro	229	4	2012-12-03	V	Subclinical	Asia1
Asia1/PAK/ICT/231-4/2012_pro	231	4	2012-12-03	V	Subclinical	Asia1
A/PAK/ICT/231-1/2012_pro	231	1	2012-02-21	V	Subclinical	A
A/PAK/ICT/237-1/2012_pro	237	1	2012-02-22	W	Subclinical	A
Asia1/PAK/ICT/237-4/2012_pro	237	4	2012-12-03	W	Subclinical	Asia1
Asia1/PAK/ICT/254-4/2012_pro	254	4	2012-12-05	Y	Subclinical	Asia1
Asia1/PAK/ICT/254-1/2012_pro	254	1	2012-02-23	Y	Subclinical	Asia1
Asia1/PAK/ICT/263-1/2012_pro	263	1	2012-02-28	Z	Subclinical	Asia1
Asia1/PAK/ICT/263-4/2012_pro	263	4	2012-12-10	Z	Subclinical	Asia1
Asia1/PAK/ICT/272-4/2012_pro	272	4	2012-12-11	AA	Subclinical	Asia1
O/PAK/ICT/272-1/2012_pro	272	1	2012-02-29	AA	Subclinical	O
Asia1/PAK/ICT/277-1/2012_pro	277	1	2012-02-29	AA	Subclinical	Asia1
Asia1/PAK/ICT/277-4/2012_pro	277	4	2012-12-11	AA	Subclinical	Asia1
Asia1/PAK/ICT/284-3/2012_pro	284	3	2012-08-28	BB	Subclinical	Asia1
Asia1/PAK/ICT/284-1/2012_pro	284	1	2012-03-01	BB	Subclinical	Asia1

Subclinical Infections by Serotype and Date

Figure showing timeline of infections by subclinical animal.

Hide code

sero_df_red <- sero_df %>%
  filter(status == "Subclinical")  %>%
  mutate(animal_farm = paste(farm_code, animal, sep = ": "))

sero_df_red <- sero_df_red %>%
  arrange(desc(farm_code), animal) %>%
  mutate(animal_farm = factor(animal_farm, levels = unique(animal_farm)))

background <- sero_df_red %>%
  group_by(farm_code) %>%
  summarize(ymin = min(which(levels(animal_farm) %in% animal_farm)) - 0.5,
            ymax = max(which(levels(animal_farm) %in% animal_farm)) + 0.5) %>%
  ungroup()

palette_colors <- pals::glasbey(length(unique(sero_df_red$farm_code)))

Hide code

ggplot() +
  geom_rect(data = background, aes(xmin = as.Date("2011-11-30"), 
                                   xmax = as.Date("2012-12-31"), 
                                   ymin = ymin, ymax = ymax, 
                                   fill = farm_code), col="transparent", alpha = 0.2) +
  geom_point(data = sero_df_red, aes(x = samp_date, y = animal_farm, 
                                   color = serotype, shape = serotype), size = 3) + 
  labs(title = " ",
       x = " ",
       y = "Animal",
       color = "Serotype",
       shape = "Serotype",
       fill = "Farm") +
  theme_minimal() +
   guides(color = guide_legend(order = 1, nrow = 1, title.position = "top", title.hjust = 0.5),
         shape = guide_legend(order = 1, nrow = 1, title.position = "top", title.hjust = 0.5),
         fill = guide_legend(order = 2, nrow = 3, title.position = "top", title.hjust = 0.5)) +
  scale_y_discrete(labels = function(x) gsub(".*: ", "", x)) +
  scale_color_brewer(palette = "Set1") +
  scale_fill_manual(values = palette_colors) + 
  theme(plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm"),
        panel.grid.major = element_line(linewidth = 0.15),
        panel.grid.minor = element_line(linewidth = 0.05),
        legend.direction = "horizontal",
        legend.position="bottom",
        strip.text = element_text(size=16, face="bold"),
        strip.background = element_blank(),
        legend.key.size = unit(2,"line"),
        legend.key.width = unit(3,"line"),
        legend.text = element_text(size=16, face="bold"),
        legend.title = element_text(size=18, face="bold"),
        axis.title.x = element_text(size=18, face="bold"),
        axis.title.y = element_text(size=22, face="bold"),
        axis.text.x = element_text(face="bold", size=15, vjust=0.5, 
                                   hjust=0.5, angle=0),
        axis.text.y = element_text(size=10, face="bold"),
        plot.title = element_text(size=10, face="bold"),
        legend.spacing = unit(4, "cm"), 
        legend.margin = margin(t = 2, b = 1))

Figure 2: Figure summarizes the serotypes and timiming of FMDV sampled from animals listed along vertical axis. More than one point on a line indicates multiple infections were detected.

Clades

Simplify and add clades from Farooq, et al. 2018.

Hide code

clades_farooq <- read.csv(here("local/clades_farooq.csv")) %>%
  mutate(Clade = paste0(serotype, " (", clade, ")")) %>%
  select(animal, sample, farm_code, Clade)

sero_df_red <- sero_df_red %>%
  mutate(sample = as.integer(sample))

sero_df_red <- left_join(sero_df_red, clades_farooq, by = c("animal", "sample", "farm_code")) %>%
  mutate(Clade = if_else(is.na(Clade) & serotype == "A", "A (C1)", Clade),
         Clade = if_else(is.na(Clade) & serotype == "Asia1", "Asia1 (C1)", Clade),
         Clade = if_else(is.na(Clade), serotype, Clade))

Hide code

sero_df_red <- sero_df_red %>%
  mutate(samp_date = factor(samp_date, 
                            levels = unique(samp_date[order(as.Date(samp_date))]))) %>%
  mutate(animal_farm = fct_reorder(animal_farm, desc(animal)))

A_col <- rev(brewer.reds(4))[1:3]
Asia1_col <- rev(brewer.blues(5))[1:4]

clade_colors <- c(
  "Asia1 (C1)" = Asia1_col[4],
  "Asia1 (C2)" = Asia1_col[3],
  "Asia1 (C4)" = Asia1_col[1],
  "A (C1)"     = A_col[3],
  "O"          = "gray40",
  "A (C2)"     = A_col[2],
  "Asia1 (C3)" = Asia1_col[2],
  "A (C3)"     = A_col[1]
)


# no clade designation
clade_colors <- c(clade_colors, "NA" = "black")

ggplot() +
  geom_point(data = sero_df_red, 
             aes(x = samp_date, y = animal_farm, 
                 color = Clade, shape = serotype), 
             size = 3.5) +
  scale_color_manual(
    values = clade_colors,
    na.value = "black",
    breaks = names(clade_colors)[names(clade_colors) != "NA"] 
  ) +
  scale_x_discrete(labels = function(x) format(as.Date(x), "%b %d")) + 
  theme_minimal() +
  labs(title = " ",
       x = " ",
       y = "Farm: Animal",
       color = "Clade",
       shape = "Serotype") +
  theme(
    axis.text.x = element_text(face = "bold", size = 12, angle = 45, hjust = 1),
    axis.text.y = element_text(size = 10, face = "bold"),
    axis.title.y = element_text(size = 22, face = "bold"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray80", linetype = "dotted", linewidth = 0.15),
    legend.direction = "horizontal",
    legend.position = "bottom",
    legend.key.size = unit(2, "line"),
    legend.key.width = unit(3, "line"),
    legend.text = element_text(size = 16, face = "bold"),
    legend.title = element_text(size = 18, face = "bold"),
    plot.margin = unit(c(0.01, 0.5, 0.01, 0.5), "cm")
  ) +
  guides(
    color = guide_legend(ncol = 3),
    shape = guide_legend(ncol = 3)
  )

Tree Dates

The (ugly) code below used to create date files for serotype-specific time calibrated trees in Beast. Dates for clinical samples used as outgroups are queried at GenBank using the custom functions get_isolate_collection_date() and get_accession_date_meta() that search the samples metadata for date related information. The rentrez package is used for metadata retrieval.

Query NCBI

Hide code

iso_names <- c("A/PAK/KCH/7/2009", "A/PAK/ICT/1/2008", "A/PAK/ICT/276/2012", 
               "A/PAK/SGD/12/2012", "A/PAK/PSH/34/2012", "A/PAK/FSD/4/2012")

isolate_dates <- get_isolate_collection_date(iso_names)

Loading required package: rentrez

Hide code

isolate_dates %>% # only years available...  The middle June 1 will be used for month and day
  gt()

IsolateName	CollectionDate
A/PAK/KCH/7/2009	2009
A/PAK/ICT/1/2008	2008
A/PAK/ICT/276/2012	2012
A/PAK/SGD/12/2012	2012
A/PAK/PSH/34/2012	2012
A/PAK/FSD/4/2012	2012

Hide code

acc_numbers <- c("KF112900", "JF721440", "MT981310", "MF140445", "JN006719", 
                 "KM268898", "JX170756", "MT944981", "KR149704")

access_meta <- get_accession_date_meta(acc_numbers) # only year for some...

access_meta %>%
  gt()

AccessionNumber	CollectionDate	IsolateName	StrainName
KF112900	14-Jan-2009	A/BAR/2/2009	NA
JF721440	NA	NA	A/KUN/AFG/L1577/2009
MT981310	2012	A/PAK/6/2012	NA
MF140445	Mar-2017	Asia1/NIAB/PUN/PAK/203/2016	NA
JN006719	NA	As/SIN/PAK/L5/2008	NA
KM268898	09-Feb-2013	TUR/13/2013	NA
JX170756	Feb-2011	O/KHI/PAK/41/2011	NA
MT944981	14-Feb-2016	O/IRN/9/2016	NA
KR149704	2010	O/PAK/1/2010	NA

Creating dates table for Beast.

Hide code

tree_tmp <- read.nexus(here("local/paktrees", tree_files[6]))

tree_tips <- as.data.frame(
  cbind(
    label = tree_tmp$tip.label
  )
)

dates_file <- sero_df %>%
  select(label, samp_date)

dates_file <- left_join(tree_tips, dates_file, by="label") 

dates_file <- dates_file %>%
  mutate(samp_date = case_when(
    label == "KF112900" ~ as_date("2009-01-14", format = "%Y-%m-%d"),
    label == "JF721440" ~ as_date("2009-06-01", format = "%Y-%m-%d"),
    label == "MT981310" ~ as_date("2012-06-01", format = "%Y-%m-%d"),
    
    label == "A/PAK/KCH/7/2009" ~ as_date("2009-06-01", format = "%Y-%m-%d"),
    label == "A/PAK/ICT/1/2008" ~ as_date("2008-06-01", format = "%Y-%m-%d"),
    label == "A/PAK/ICT/276/2012" ~ as_date("2012-06-01", format = "%Y-%m-%d"),
    
    label == "A/PAK/SGD/12/2012" ~ as_date("2012-06-01", format = "%Y-%m-%d"),
    label == "A/PAK/PSH/34/2012" ~ as_date("2012-06-01", format = "%Y-%m-%d"),
    label == "A/PAK/FSD/4/2012" ~ as_date("2012-06-01", format = "%Y-%m-%d"),
    
    TRUE ~ samp_date 
  ))

write.table(dates_file, file = here("local/beast/a_1/fmd_a_dates.tsv"), 
            sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)

Creating dates table for Beast.

Hide code

tree_tmp <- read.nexus(here("local/paktrees", tree_files[7]))

tree_tips <- as.data.frame(
  cbind(
    label = tree_tmp$tip.label
  )
)

tree_tips <- tree_tips  %>%
  mutate(parts = str_split(label, "/")) %>%
  mutate(string = sapply(parts, function(x) if (length(x) > 3) x[4] else NA)) %>%
  mutate(animal = as.integer(
    if_else(str_detect(string, "-"), word(string, 1, sep = "-"), string))) %>%
  mutate(sample = if_else(str_detect(string, "-"), word(string, 2, sep = "-"), NA_character_)) %>%
  select(-parts)

dates_lu <- sero_df %>%
  mutate(date = samp_date) %>%
  select(label, date) 

tree_tips <- left_join(tree_tips, dates_lu, by = c("label"))

is_year <- function(x) {
  sapply(x, function(y) {
    if (grepl("^[0-9]{4}$", y)) {
      year <- as.numeric(y)
      return(year >= 1950 && year <= 2015)
    }
    return(FALSE)
  })
}

tree_tips <- tree_tips %>%
  mutate(
    label_year = substr(label, nchar(label)-3, nchar(label)),
    is_year = is_year(label_year),
    date = ifelse(
      is_year == TRUE & is.na(date), 
      as.Date(paste(label_year, "06", "01", sep="-")),
      date
    ),
    date = as.Date(date, origin="1970-01-01")
  ) %>%
  select(-c(string, animal, sample, label_year, is_year))

tree_tips <- tree_tips %>%
  mutate(date = case_when(
    label == "MF140445" ~ as_date("2017-03-01", format = "%Y-%m-%d"),
    label == "JN006719" ~ as_date("2008-06-01", format = "%Y-%m-%d"),
    label == "KM268898" ~ as_date("2013-02-09", format = "%Y-%m-%d"),
    
    TRUE ~ date 
  ))

write.table(tree_tips, file = here("local/beast/asia1_1/fmd_asia1_dates.tsv"), 
            sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)

Creating dates table for Beast.

Hide code

tree_tmp <- read.nexus(here("local/paktrees", tree_files[9]))

tree_tips <- as.data.frame(
  cbind(
    label = tree_tmp$tip.label
  )
)

tree_tips <- tree_tips  %>%
  mutate(parts = str_split(label, "/")) %>%
  mutate(string = sapply(parts, function(x) if (length(x) > 3) x[4] else NA)) %>%
  mutate(animal = as.integer(
    if_else(str_detect(string, "-"), word(string, 1, sep = "-"), string))) %>%
  mutate(sample = if_else(str_detect(string, "-"), word(string, 2, sep = "-"), NA_character_)) %>%
  select(-parts)

tree_tips <- left_join(tree_tips, dates_lu, by = c("label"))

tree_tips <- tree_tips %>%
  mutate(
    label_year = substr(label, nchar(label)-3, nchar(label)),
    is_year = is_year(label_year),
    date = ifelse(
      is_year == TRUE & is.na(date), 
      as.Date(paste(label_year, "06", "01", sep="-")),
      date
    ),
    date = as.Date(date, origin="1970-01-01")
  ) %>%
  select(-c(string, animal, sample, label_year, is_year))

tree_tips <- tree_tips %>%
  mutate(date = case_when(
    label == "JX170756" ~ as_date("2011-02-01", format = "%Y-%m-%d"),
    label == "MT944981" ~ as_date("2016-02-14", format = "%Y-%m-%d"),
    label == "KR149704" ~ as_date("2010-01-01", format = "%Y-%m-%d"),
    
    TRUE ~ date
  ))

write.table(tree_tips, file = here("local/beast/o_1/fmd_0_dates.tsv"), 
            sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)