1. Data Preprocessing

Open the code snippet below and to the right to see all data transformations executed to clean the dataset.

################################
####### OCTOBER DATASETS #######
################################

library(plyr); library(dplyr)

# FIRST JOINT, FOCAL DATA

Oct <- read.csv("./rawData/Oct30_2021_Ipad Susana/behaviorsTable.csv")

sessionsTableOct <- read.csv("./rawData/Oct30_2021_Ipad Susana/sessionsTable.csv")

sessionsTableOct <- sessionsTableOct[,c("session_start_timeStamp", "group_ID")]

FocalOctJoin <- join(Oct, sessionsTableOct, by = "session_start_timeStamp")

FocalOctJoin$USE <- "Research"


# Second joint, SCAN DATA

OctScan <- read.csv("./rawData/Oct30_2021_Ipad Susana/scansTable.csv") # 110 obs

HabitatOct <- read.csv("./rawData/Oct30_2021_Ipad Susana/scanVarsTable.csv") # 99 obs

HabitatOct <- HabitatOct[,c("focal_start_timeStamp", "TERRAIN.")]

HabitatOct <- HabitatOct[HabitatOct$TERRAIN. != "", ] # remove rows without terrain
HabitatOct <- unique(HabitatOct) # AVOID DUPLICATES

ScanOctJoin <- join(OctScan, HabitatOct, by = "focal_start_timeStamp")


### SCAN + HabitatOct + GROUP

scan_final_oct <- join(ScanOctJoin, sessionsTableOct, by = "session_start_timeStamp")

write.csv(scan_final_oct, "scan_final_oct.xlsx")

################################
####### JULY DATASETS #########
################################

# FIRST JOINT, FOCAL DATA

Jul <- read.csv("./rawData/Julho17_2022_Ipad Jacinto/behaviorsTable.csv")

Jul[,c("FOOD_ITEM.", "PART_EATEN.")] <- Jul[,c("FOOD_ITEM..1", "PART_EATEN..1")]
Jul <- subset(Jul, select = -c(`FOOD_ITEM..1`, `PART_EATEN..1`))

sessionsTableJul <- read.csv("./rawData/Julho17_2022_Ipad Jacinto/sessionsTable.csv")
sessionsTableJul <- sessionsTableJul[,c("session_start_timeStamp", "group_ID")]

FocalJul <- join(Jul, sessionsTableJul, by = "session_start_timeStamp")

focalVarsJul <- read.csv("./rawData/Julho17_2022_Ipad Jacinto/focalVarsTable.csv")
focalVarsJul <- focalVarsJul[,c("session_start_timeStamp", "USE")]
focalVarsJul <- focalVarsJul[which(focalVarsJul$USE == "Research"),]
focalVarsJul <- unique(focalVarsJul)

FocalJulJoin <- left_join(FocalJul, focalVarsJul, by = "session_start_timeStamp")

write.csv(FocalJulJoin,"./tables/FocalJulJoin.csv")

# Second scans + HabitatOct, SCAN DATA

JulScan <- read.csv("./rawData/Julho17_2022_Ipad Jacinto/scansTable.csv") # 1445 obs

HabitatJul <- read.csv("./rawData/Julho17_2022_Ipad Jacinto/scanVarsTable.csv") # 1437 obs

HabitatJul <- HabitatJul[,c("focal_start_timeStamp", "TERRAIN.")]
HabitatJul <- HabitatJul[HabitatJul$TERRAIN. != "", ] # remove rows without terrain
HabitatJul <- unique(HabitatJul) # AVOID DUPLICATES

ScanJulJoin <- join(JulScan, HabitatJul, by = "focal_start_timeStamp")

write.csv(ScanJulJoin, "./tables/ScanJulJoin.csv")

### SCAN + HABITAT + GROUP

scan_final_jul <- join(ScanJulJoin, sessionsTableJul, by = "session_start_timeStamp")

write.csv(scan_final_jul, "scan_final_jul.xlsx")

### FOCAL OCT + FOCAL JULY

focalFinal <- rbind(FocalJulJoin, FocalOctJoin) # 2290 obs
focalFinal$session_start_timeStamp <- as.POSIXct(focalFinal$session_start_timeStamp, format = "%Y-%m-%d,%H:%M:%S")

### SCAN OCT + FOCAL JULY

scanFinal <- rbind(scan_final_jul, scan_final_oct) # 1555 obs
scanFinal$session_start_timeStamp <- as.POSIXct(scanFinal$session_start_timeStamp, format = "%Y-%m-%d,%H:%M:%S")


### Get Habitat values into FOCAL DATASET

scanHabitats <- scanFinal[, c("session_start_timeStamp", "TERRAIN.")]
scanHabitats <- unique(scanHabitats[!is.na(scanHabitats$TERRAIN.),])
scanHabitats <- scanHabitats[!duplicated(scanHabitats$session_start_timeStamp), ]

# Fill habitat NAs, anti_join() return all rows from x without a match in y.
missing_values <- anti_join(focalFinal, scanHabitats, by = "session_start_timeStamp")
missing_values$TERRAIN. <- NA
missing_values <- missing_values[,c("session_start_timeStamp", "TERRAIN.")]

for (i in 1:nrow(missing_values)) {
  time_differences <- difftime(missing_values$session_start_timeStamp[i], scanHabitats$session_start_timeStamp, units = "secs")
  closest_index <- which.min(abs(time_differences))
  missing_values$TERRAIN.[i] <- scanHabitats$TERRAIN.[closest_index]
}

scan_habitats <- rbind(scanHabitats, unique(missing_values))
remove_set <- anti_join(scan_habitats, focalFinal,  by = "session_start_timeStamp")
habitatFinal <- anti_join(scan_habitats, remove_set,  by = "session_start_timeStamp")

final_set <- join(focalFinal, unique(habitatFinal), by = "session_start_timeStamp")
final_set <- final_set[, colSums(is.na(final_set)) != nrow(final_set)] # remove all empty columns

#########################################
######## EDIT TERRAIN VARIABLE ##########
#########################################

final_set$TERRAIN. <- ifelse(final_set$TERRAIN. == "Closed woodland (50-75%);Road", "Closed woodland (50-75%)", final_set$TERRAIN.)
final_set$TERRAIN. <- ifelse(final_set$TERRAIN. == "Floodplain Grass", "Floodplain", final_set$TERRAIN.)
final_set$TERRAIN. <- ifelse(final_set$TERRAIN. == "Floodplain;Floodplain Grass", "Floodplain", final_set$TERRAIN.)
final_set$TERRAIN. <- ifelse(final_set$TERRAIN. == "Moderate woodland (10-50%);Road", "Moderate woodland (10-50%)", final_set$TERRAIN.)
final_set$TERRAIN. <- ifelse(final_set$TERRAIN. == "Open woodland (1-10%);Moderate woodland (10-50%)", "Moderate woodland (10-50%)", final_set$TERRAIN.)
final_set$TERRAIN. <- ifelse(final_set$TERRAIN. == "Sparse woodland (<1%);Floodplain", "Sparse woodland (<1%)", final_set$TERRAIN.)
final_set$TERRAIN. <- ifelse(final_set$TERRAIN. == "Sparse woodland (<1%);Floodplain Grass", "Sparse woodland (<1%)", final_set$TERRAIN.)
final_set$TERRAIN. <- ifelse(final_set$TERRAIN. == "Sparse woodland (<1%);Open woodland (1-10%)", "Sparse woodland (<1%)", final_set$TERRAIN.)

#########################################
########### ADD AGE AND SEX #############
#########################################

demographics <- read.csv("demographics.csv")
final_set <- left_join(final_set, demographics, by = "actor")

#########################################
########### FINAL CLEANING ##############
#########################################

final_set <- final_set[!is.na(final_set$USE),] # remove non-research instances
final_set <- final_set[final_set$group_ID != "Bones of Predation",] # remove non-research instances
final_set <- final_set[final_set$Sex != "",] # remove Empty "" obsv in Sex

final_set$ObjectUse <- ifelse(final_set$Activity != "Object Use", "Other", final_set$Activity)

idx <- which(final_set$Obj_Type == "")

final_set$Anthropogenic <- ifelse(final_set$Obj_Type != "Man-made", "Non-anthropogenic", final_set$Obj_Type)
final_set$Anthropogenic[idx] <- ""

idx2 <- final_set$Anthropogenic == "Man-made"
final_set$Anthropogenic[idx2] <- "Anthropogenic"

final_set <- final_set[-2211, ] # Remove line with Tool Use obsv test

library(rio)
library(dplyr)
library(lme4)

ObjectUse <- read.csv("final_set.csv", row.names = 1)

ObjectUse$focal_start_timeStamp <- as.POSIXct(ObjectUse$focal_start_timeStamp, format = "%Y-%m-%d,%H:%M:%S")

days <- format(as.Date(ObjectUse$focal_start_timeStamp, format="%Y-%m-%d,%H:%M:%S"),"%Y-%m-%d")


# reorder AGE levels for aesthetics in graphics
ObjectUse$Age <- factor(
  ObjectUse$Age,
  levels = c(
    'Infant', 'Juvenile', 'Subadult', 'Adult'
  )
)
ObjectUse$Sex <- factor(ObjectUse$Sex)

ObjectUse$ObjectUse <- factor(ObjectUse$ObjectUse,
                              levels = c("Other", "Object Use"))

# reorder HABITAT levels for aesthetics + change variable name
ObjectUse$Habitat <- factor(
  ObjectUse$TERRAIN.,
  levels = c(
    "Floodplain",
    "Sparse woodland (<1%)",
    "Open woodland (1-10%)",
    "Moderate woodland (10-50%)",
    "Closed woodland (50-75%)"
  )
)

# Create new variable Behaviour / based on "Activity" + "Object Function"

library(forcats)

ObjectUse$Behaviour <- fct_collapse(ObjectUse$Activity, "Vocalization" = c("Bark", "Wahoo")) # join variables (low n)

ObjectUse$Behaviour <- fct_collapse(ObjectUse$Behaviour, "Moving" = c("Locomotion", "Move", "Travel")) # join variables (similar)

ObjectUse$Behaviour <- fct_collapse(ObjectUse$Behaviour, "Forage" = c("Forage", "Hunt")) # join variables (low n)


ObjectUse$Behaviour <- as.character(ObjectUse$Behaviour) # convert to string for next operation
ObjectUse$Behaviour[ObjectUse$Activity == "Object Use"] <- paste("Object Use:", ObjectUse$ObjFunction[ObjectUse$Activity == "Object Use"]) # get object function to break object use into multiple categories

ObjectUse$ObjFunction <- as.factor(ObjectUse$ObjFunction)

n_focals <- length(unique(ObjectUse$focal_start_timeStamp))
min_focal <- 10 # each focal took 10 minutes

2. Data Structure

str(ObjectUse)
## 'data.frame':    2262 obs. of  40 variables:
##  $ device_ID               : chr  "9C6AA987-85B3-4DEB-90DE-2F7611880AA4" "9C6AA987-85B3-4DEB-90DE-2F7611880AA4" "9C6AA987-85B3-4DEB-90DE-2F7611880AA4" "9C6AA987-85B3-4DEB-90DE-2F7611880AA4" ...
##  $ session_start_timeStamp : chr  "2021-11-02 07:16:28" "2021-11-02 07:16:28" "2021-11-02 07:16:28" "2021-11-02 07:16:28" ...
##  $ focal_start_timeStamp   : POSIXct, format: "2021-11-02 08:08:56" "2021-11-02 08:08:56" ...
##  $ behavior_timeStamp      : chr  "2021-11-02,08:09:19" "2021-11-02,08:10:09" "2021-11-02,08:10:37" "2021-11-02,08:10:50" ...
##  $ actor                   : chr  "AFV1" "AFV1" "AFV1" "AFV1" ...
##  $ subject                 : chr  "AFV1" "AFV1" "AFV1" "AFV1" ...
##  $ FOOD_ITEM.              : chr  "" "Grass" "" "Grass" ...
##  $ PART_EATEN.             : chr  "" "Root" "" "Root" ...
##  $ Activity                : chr  "Object Use" "Forage" "Locomotion" "Forage" ...
##  $ Locomotion_Type         : chr  "" "" "Quadrupedal" "" ...
##  $ Bipedal_Type            : chr  "" "" "" "" ...
##  $ Location                : chr  "Ground" "Ground" "Ground" "Ground" ...
##  $ Vigilant                : chr  "No" "Yes" "No" "No" ...
##  $ Shade                   : chr  "Sun" "Sun" "Sun" "Sun" ...
##  $ Vig_At                  : chr  "" "Monkey" "" "" ...
##  $ Height                  : chr  "" "" "" "" ...
##  $ Carrying_Type           : chr  "" "" "" "" ...
##  $ Object                  : chr  "" "" "" "" ...
##  $ ObjFunction             : Factor w/ 5 levels "","Display","Food processing",..: 3 1 1 1 3 3 1 3 1 1 ...
##  $ Obj_Type                : chr  "Roots/Bulbs" "" "" "" ...
##  $ Obj_N                   : chr  "2 or +" "" "" "" ...
##  $ Obj_Mob                 : chr  "Fixed" "" "" "" ...
##  $ Obj_Lat                 : chr  "Both" "" "" "" ...
##  $ Observers               : chr  "No" "" "" "" ...
##  $ Play_Type               : chr  "" "" "" "" ...
##  $ Speed                   : chr  "" "" "" "" ...
##  $ Direction.              : chr  "" "" "" "" ...
##  $ latitude                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ longitude               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gps_horizontal_precision: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ altitude                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ group_ID                : chr  "Palm Tree" "Palm Tree" "Palm Tree" "Palm Tree" ...
##  $ USE                     : chr  "Research" "Research" "Research" "Research" ...
##  $ TERRAIN.                : chr  "Floodplain" "Floodplain" "Floodplain" "Floodplain" ...
##  $ Sex                     : Factor w/ 3 levels "F","M","Unknown": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Age                     : Factor w/ 4 levels "Infant","Juvenile",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ ObjectUse               : Factor w/ 2 levels "Other","Object Use": 2 1 1 1 2 2 1 2 1 1 ...
##  $ Anthropogenic           : chr  "Non-anthropogenic" "" "" "" ...
##  $ Habitat                 : Factor w/ 5 levels "Floodplain","Sparse woodland (<1%)",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Behaviour               : chr  "Object Use: Food processing" "Forage" "Moving" "Forage" ...

After processing the data we obtained a cleaned data set, with 2262 observations (rows), and a total of 40 variables (columns). The variables are the following: device_ID, session_start_timeStamp, focal_start_timeStamp, behavior_timeStamp, actor, subject, FOOD_ITEM., PART_EATEN., Activity, Locomotion_Type, Bipedal_Type, Location, Vigilant, Shade, Vig_At, Height, Carrying_Type, Object, ObjFunction, Obj_Type, Obj_N, Obj_Mob, Obj_Lat, Observers, Play_Type, Speed, Direction., latitude, longitude, gps_horizontal_precision, altitude, group_ID, USE, TERRAIN., Sex, Age, ObjectUse, Anthropogenic, Habitat, Behaviour

There was a total of 197 focals, each with a total time of 10 minutes, meaning that the total time of observation was 1970 min. A total of approximately 32 hours and 50 minutes, sampled over a period of 57 days.

library(ggplot2)

# Convert behavior_timeStamp to Date format
ObjectUse$behaviorDays <- as.Date(ObjectUse$behavior_timeStamp)

# Order dates
ObjectUse$behaviorDays <- ObjectUse$behaviorDays[order(ObjectUse$behaviorDays, decreasing = FALSE)]

# Create a new column for the month and day
ObjectUse$month_and_day <- as.factor(format(ObjectUse$behaviorDays, "%b-%d"))

# Reorder month_and_day
ObjectUse$month_and_day <- factor(ObjectUse$month_and_day, levels = unique(ObjectUse$month_and_day))

# Plot the frequency of behavior_timeStamp per day and year using ggplot2
ggplot(ObjectUse, aes(x = month_and_day, fill = as.factor(format(behaviorDays, "%Y")))) +
  geom_bar() +
  labs(x = "Month and Day", y = "# of recorded behaviours") +
  theme_minimal() +
  scale_fill_viridis_d(option = "D", name = "Year") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(breaks = seq(0, 120, by = 10))

List of days with Focals:

indDays <- order(unique(days), decreasing = FALSE)

unique(days)[indDays]
##  [1] "2021-10-21" "2021-10-25" "2021-10-27" "2021-10-28" "2021-10-29"
##  [6] "2021-10-30" "2021-11-02" "2021-11-03" "2021-11-10" "2021-11-11"
## [11] "2021-11-12" "2021-11-13" "2021-11-15" "2021-11-16" "2021-11-19"
## [16] "2021-11-21" "2021-11-22" "2021-11-24" "2021-11-25" "2022-03-31"
## [21] "2022-04-01" "2022-04-02" "2022-04-04" "2022-04-06" "2022-04-07"
## [26] "2022-04-08" "2022-04-11" "2022-04-12" "2022-04-13" "2022-04-14"
## [31] "2022-04-15" "2022-04-18" "2022-04-19" "2022-04-20" "2022-04-21"
## [36] "2022-04-30" "2022-05-03" "2022-05-06" "2022-05-09" "2022-05-10"
## [41] "2022-05-11" "2022-05-12" "2022-05-13" "2022-05-14" "2022-05-17"
## [46] "2022-05-18" "2022-06-15" "2022-06-18" "2022-06-23" "2022-06-24"
## [51] "2022-06-27" "2022-07-02" "2022-07-04" "2022-07-06" "2022-07-07"
## [56] "2022-07-12" "2022-07-13"
uniqueFocals <- unique(ObjectUse[, c("focal_start_timeStamp")])

# Order dates
uniqueFocals <- as.data.frame(uniqueFocals[order(uniqueFocals, decreasing = FALSE)])

# Convert behavior_timeStamp to Date format
uniqueFocals$Days <- as.Date(uniqueFocals[,1])

# Create a new column for the month and day
uniqueFocals$fmonth_and_day <- as.factor(format(uniqueFocals$Days, "%b-%d"))

# Reorder month_and_day
uniqueFocals$fmonth_and_day <- factor(uniqueFocals$fmonth_and_day, levels = unique(uniqueFocals$fmonth_and_day))

# Plot the frequency of behavior_timeStamp per day and year using ggplot2
ggplot(uniqueFocals, aes(x = fmonth_and_day, fill = as.factor(format(Days, "%Y")))) +
  geom_bar() +
  labs(x = "Month and Day", y = "# of focals") +
  theme_minimal() +
  scale_fill_viridis_d(option = "D", name = "Year") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(breaks = seq(0, 8, by = 1))

Mean n of focals per day = 3.456 *

*this only considers days where at least 1 focal occurred.

Individuals observed:

# Create a new variable "sex-and-age"
ObjectUse$age_and_sex <- as.factor(paste(ObjectUse$Age, ObjectUse$Sex))

levels(ObjectUse$age_and_sex) <- c("Adult Female", "Adult Male", "Adult Unknown", "Infant", "Juvenile Female", "Juvenile Male", "Juvenile Unknown", "Subadult Male", "Subadult Unknown")

ObjectUse$age_and_sex <- factor(ObjectUse$age_and_sex, levels = c("Adult Male", "Subadult Male", "Juvenile Male", "Adult Female", "Juvenile Female", "Adult Unknown", "Subadult Unknown", "Juvenile Unknown", "Infant"))

# Plot the frequency of behavior_timeStamp per day and year using ggplot2
ggplot(ObjectUse, aes(x = actor, fill = age_and_sex)) +
  geom_bar() +
  labs(x = "ID", y = "# of recorded behaviours") +
  theme_minimal() +
  scale_fill_viridis_d(option = "C", name = "Age and Sex") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  scale_y_continuous(breaks = seq(0, 200, by = 20))

3. Data vizualization

Behaviour per Habitat (%) barplot

ggplot(ObjectUse, aes(x = Habitat, fill = Behaviour)) + 
  geom_bar(position = "fill") + # Dados em Proporção (corrigidos pelo total de observações)
  scale_y_continuous(breaks = seq(0, 1, .2), label = scales::percent) +
  scale_fill_viridis_d(option = "C") +
  labs(y = "Percent") + theme_minimal()

Behaviour per Habitat (count) barplot

ggplot(ObjectUse, aes(x = Habitat, fill = Behaviour)) + 
  geom_bar(position = "dodge") +
  scale_fill_viridis_d(option = "C") +
  theme_minimal()

Behaviour per Age class (%) barplot

ggplot(ObjectUse, aes(x = Age, fill = Behaviour)) + 
  geom_bar(position = "fill") + 
  scale_y_continuous(breaks = seq(0, 1, .2), label = scales::percent) +
  scale_fill_viridis_d(option = "C") +
  labs(fill = "Behaviour", y = "Percent") + theme_minimal() +
  theme(text = element_text(size=20))

Behaviour per Age class (count) barplot

ggplot(ObjectUse, aes(x = Age, fill = Behaviour)) + 
    geom_bar(position = "dodge") +
  scale_fill_viridis_d(option = "C") +
  labs(fill = "Behaviour") + theme_minimal() +
  theme(text = element_text(size=20))

Behaviour per Sex (%) barplot

ggplot(ObjectUse, aes(x = Sex, fill = Behaviour)) + 
  geom_bar(position = "fill") + 
  scale_y_continuous(breaks = seq(0, 1, .2), label = scales::percent) +
  scale_fill_viridis_d(option = "C") +
  labs(y = "Percent") + theme_minimal() +
  theme(text = element_text(size=20))