Code
library(hms)
library(tidyverse)
source("R/FUNCTIONS.R")This document describes the process of checking date and time consistency in the camera trap dataset. The goal is to identify records with inconsistent sampling periods, future dates, or species records outside the sampling interval.
We start by loading the required functions and packages.
library(hms)
library(tidyverse)
source("R/FUNCTIONS.R")We load the camera trap setup data for further checks.
ct <- read_sheet(path = "Example", sheet = "Camera_trap", na = c("NA", "na"))We identify camera deployments with a sampling duration of less than 24 hours (86,400 seconds).
check_date_before <- map(.x = ct, function(dataset) {
dataset |>
dttm_update(date_col = "Start_date", time_col = "Start_time") |>
dttm_update(date_col = "End_date", time_col = "End_time") |>
select(-ends_with("_time")) |>
mutate(
duration = as.duration(Start_date %--% End_date)
) |>
filter(duration < 86400)
}) |>
bind_rows(.id = "dataset") |>
select(dataset, Camera_id, Start_date:duration)
check_date_before# A tibble: 4 × 30
dataset Camera_id Start_date End_date Camera_problem
<chr> <chr> <dttm> <dttm> <chr>
1 Example8 cam5 2013-03-28 10:19:00 2013-03-29 00:31:00 Sí
2 Example8 cam5 2013-03-28 10:19:00 2013-03-29 00:31:00 Sí
3 Example1 cam06_26 2014-08-02 12:30:00 2013-09-01 10:51:00 Não
4 Example8 cam5 2013-03-28 10:19:00 2013-03-29 00:31:00 Sí
# ℹ 25 more variables: Problem1_from <dttm>, Problem1_to <dttm>,
# Problem2_from <dttm>, Problem2_to <dttm>, Problem3_from <dttm>,
# Problem3_to <dttm>, Problem4_from <dttm>, Problem4_to <dttm>,
# Problem5_from <dttm>, Problem5_to <dttm>, Problem6_from <dttm>,
# Problem6_to <dttm>, Problem7_from <dttm>, Problem7_to <dttm>,
# Problem8_from <dttm>, Problem8_to <dttm>, Problem9_from <dttm>,
# Problem9_to <dttm>, Problem10_from <dttm>, Problem10_to <dttm>, …
We identify deployments with a sampling duration longer than 3 months (7,776,000 seconds).
check_date_after <- map(.x = ct, function(dataset) {
dataset |>
dttm_update(date_col = "Start_date", time_col = "Start_time") |>
dttm_update(date_col = "End_date", time_col = "End_time") |>
select(-ends_with("_time")) |>
mutate(
duration = as.duration(Start_date %--% End_date)
) |>
filter(duration > 7776000)
}) |>
bind_rows(.id = "dataset") |>
select(dataset, Camera_id, Start_date:duration)
check_date_after# A tibble: 77 × 30
dataset Camera_id Start_date End_date Camera_problem
<chr> <chr> <dttm> <dttm> <chr>
1 Example4 MX2_004 2018-09-08 14:50:00 2019-09-29 12:21:00 No
2 Example4 MX2_003 2018-09-08 15:29:00 2019-09-08 13:13:00 No
3 Example4 MX2_010 2018-09-09 15:19:00 2019-03-04 11:24:00 Sí
4 Example4 MX007 2018-09-08 17:58:00 2019-03-05 11:54:00 Sí
5 Example4 MX2_009 2018-09-09 14:36:00 2019-03-05 12:16:00 Sí
6 Example4 MX2_001 2018-09-09 12:31:00 2019-07-28 11:38:00 Sí
7 Example4 MX2_006 2018-09-08 11:51:00 2019-03-05 13:22:00 Sí
8 Example4 MX2_013 2018-08-15 13:55:00 2019-01-27 13:52:00 Sí
9 Example4 MX2_014 2018-08-15 14:20:00 2019-03-14 14:24:00 Sí
10 Example4 MX2_015 2018-08-15 15:15:00 2019-05-24 13:38:00 No
# ℹ 67 more rows
# ℹ 25 more variables: Problem1_from <dttm>, Problem1_to <dttm>,
# Problem2_from <dttm>, Problem2_to <dttm>, Problem3_from <dttm>,
# Problem3_to <dttm>, Problem4_from <dttm>, Problem4_to <dttm>,
# Problem5_from <dttm>, Problem5_to <dttm>, Problem6_from <dttm>,
# Problem6_to <dttm>, Problem7_from <dttm>, Problem7_to <dttm>,
# Problem8_from <dttm>, Problem8_to <dttm>, Problem9_from <dttm>, …
We flag deployments with start or end dates set in the future. We defined the threshold date as April 30th, 2025.
check_date_future <- map(.x = ct, function(dataset) {
data_thresh <- "2025-04-30"
dataset |>
mutate(
date_start = ymd(as.character(Start_date)),
date_end = ymd(as.character(End_date))
) |>
filter(if_any(starts_with("date_"), ~ .x > data_thresh))
}) |>
bind_rows(.id = "dataset") |>
select(dataset, Camera_id, date_start:date_end)
check_date_future# A tibble: 0 × 4
# ℹ 4 variables: dataset <chr>, Camera_id <chr>, date_start <date>,
# date_end <date>
We load the species records data for cross-checking with camera trap intervals.
rec <- read_sheet(
path = "Example",
sheet = "Species_records_camera",
na = c("NA", "na")
)
datasets <- names(rec)We check if each species record falls within the sampling interval of the corresponding camera. Firstly, we show if there were errors in terms of processing the code. There were none.
species_records_within_ct_date <- list()
error_log <- tibble(dataset = character(), error_message = character())
for (dataset in datasets) {
message(str_glue("Starting dataset {dataset}\n"))
tryCatch(
{
camera <- ct[[dataset]] |>
select(Structure_id, Camera_id, Start_date, End_date)
species_records_within_ct_date[[dataset]] <- rec[[dataset]] |>
inner_join(camera, by = c("Camera_id", "Structure_id")) |>
mutate(
across(ends_with("date"), as_datetime),
excel_row = row_number() + 1,
check = case_when(
Record_date %within% c(Start_date %--% End_date) ~ "YES",
TRUE ~ "NO"
)
) |>
filter(check == "NO") |>
select(excel_row, Species, Camera_id, ends_with("date"), check)
message(str_glue("Finalizing dataset {dataset}\n"))
},
error = function(e) {
msg <- as.character(e$message)
error_log <<- bind_rows(
error_log,
tibble(dataset = dataset, error_message = msg)
)
message(str_glue("Error in dataset {dataset}: {msg}\n"))
return(NULL)
}
)
}
error_log# A tibble: 0 × 2
# ℹ 2 variables: dataset <chr>, error_message <chr>
In the sequence, we summarize and print the number of records outside the sampling interval for each dataset.
clean_species_records_within_ct_date <- species_records_within_ct_date |>
discard(~ nrow(.x) == 0)
clean_species_records_within_ct_date |>
imap_dfr(
~ tibble(
dataset = .y,
n = nrow(.x)
)
)# A tibble: 7 × 2
dataset n
<chr> <int>
1 Example1 304
2 Example2 1
3 Example3 1
4 Example4 104
5 Example8 50
6 Example9 53
7 Example13 38
Finally, we export the records outside the sampling interval to an Excel file.
clean_species_records_within_ct_date |>
openxlsx::write.xlsx(
"Output/REGISTROS_SP_FORA_DA_DATA.xlsx",
asTable = TRUE,
colWidths = "auto"
)