Setting up file structure and preparing for barracudar.
setwd("C:/Users/doug/Desktop/GrassoBio6100/Homework_11")
source("barracudar/AddFolder.R")
source("barracudar/BuildFunction.R")
source("barracudar/CreatePaddedLabel.R")
source("barracudar/DataTableTemplate.R")
source("barracudar/InitiateSeed.R")
source("barracudar/MetaDataTemplate.R")
source("barracudar/QBox.R")
source("barracudar/QBub.R")
source("barracudar/QCon1.R")
source("barracudar/QCon2.R")
source("barracudar/QContour.R")
source("barracudar/QHist.R")
source("barracudar/QLogis.R")
source("barracudar/SetUpLog.R")
source("barracudar/SourceBatch.R")
filelist <- list.files("C:/Users/doug/Desktop/GrassoBio6100/Homework_11/OriginalData")
filelist # list of file names
## [1] "NEON.D01.BART.DP1.10003.001.2015-06.basic.20240127T000425Z.RELEASE-2024"
## [2] "NEON.D01.BART.DP1.10003.001.2016-06.basic.20240127T000425Z.RELEASE-2024"
## [3] "NEON.D01.BART.DP1.10003.001.2017-06.basic.20240127T000425Z.RELEASE-2024"
## [4] "NEON.D01.BART.DP1.10003.001.2018-06.basic.20240127T000425Z.RELEASE-2024"
## [5] "NEON.D01.BART.DP1.10003.001.2019-06.basic.20240127T000425Z.RELEASE-2024"
## [6] "NEON.D01.BART.DP1.10003.001.2020-06.basic.20240127T000425Z.RELEASE-2024"
## [7] "NEON.D01.BART.DP1.10003.001.2021-06.basic.20240127T000425Z.RELEASE-2024"
## [8] "NEON.D01.BART.DP1.10003.001.2022-06.basic.20240127T000425Z.RELEASE-2024"
# use a for loop here
filenames <- c()
for (i in 1:8) {
setwd(paste0("C:/Users/doug/Desktop/GrassoBio6100/Homework_11/OriginalData","/", filelist[i]))
filenames[i] <- list.files(pattern = "countdata")
}
filenames # list of file names.csv
## [1] "NEON.D01.BART.DP1.10003.001.brd_countdata.2015-06.basic.20231226T232626Z.csv"
## [2] "NEON.D01.BART.DP1.10003.001.brd_countdata.2016-06.basic.20231227T013428Z.csv"
## [3] "NEON.D01.BART.DP1.10003.001.brd_countdata.2017-06.basic.20231227T094709Z.csv"
## [4] "NEON.D01.BART.DP1.10003.001.brd_countdata.2018-06.basic.20231228T172744Z.csv"
## [5] "NEON.D01.BART.DP1.10003.001.brd_countdata.2019-06.basic.20231227T184129Z.csv"
## [6] "NEON.D01.BART.DP1.10003.001.brd_countdata.2020-06.basic.20231227T224944Z.csv"
## [7] "NEON.D01.BART.DP1.10003.001.brd_countdata.2021-06.basic.20231228T010546Z.csv"
## [8] "NEON.D01.BART.DP1.10003.001.brd_countdata.2022-06.basic.20231229T053256Z.csv"
Creating my functions
########################
# FUNCTION: clean_data()
# Description: cleans data of any empty/missing cases
# Inputs: filepaths/names that need to be cleaned
# Outputs: cleaned dataset
years <- c(2015,2016,2017,2018,2019,2020,2021,2022)
clean_data <- function(filelist, filenames) {
for (i in 1:8) {
setwd(paste0("C:/Users/doug/Desktop/GrassoBio6100/Homework_11/OriginalData/", filelist[i]))
raw_data <- read.csv(filenames[i], na.strings = c("","NA"))
clean_data <- raw_data[complete.cases(raw_data["scientificName"]), ]
setwd("C:/Users/doug/Desktop/GrassoBio6100/Homework_11/CleanedData/")
write.csv(clean_data,paste0("CleanData_",years[i], ".csv"))
}
}
clean_data(filelist,filenames) # run my function
clean_files <- list.files("C:/Users/doug/Desktop/GrassoBio6100/Homework_11/CleanedData/") # store these new cleaned files
##########################
#FUNCTION: extract_years()
#Description: extracts the years from the file names
#Inputs: clean_files
#Outputs: years from the file names
extract_years <- function(clean_files){
years <- str_extract(clean_files, pattern = "\\d{4}")
return(years)
}
extract_years(clean_files) # run my function
## [1] "2015" "2016" "2017" "2018" "2019" "2020" "2021" "2022"
################################
#FUNCTION: calculate_abundance()
#Description: calculates the abundance for each year
#Inputs: clean_files
#Outputs: abundance for each year
calculate_abundance <- function(clean_files){
data <- read.csv(clean_files, header=TRUE)
abundance <- c()
abundance <- nrow(data)
print(abundance)
}
###############################
#FUNCTION: calculate_richness()
#Description: calculates species richness for each year
#Inputs: clean_files
#Outputs: species richness for each year
calculate_richness <- function(clean_files){
data2 <- read.csv(clean_files, header=TRUE)
richness <- c()
richness <- length(unique(data2$scientificName))
print(richness)
}
# creating my initial empty data frame
finaldf <- data.frame(File = rep(NULL,8), Year = rep(NULL,8), Abundance = rep(NULL,8), SpeciesRichness = rep(NULL,8))
Some of my work directories were acting funky when I wrote up my R Markdown file. However, below is the code that I used:
for (i in 1:8){
finaldf <- rbind(finaldf, list(File= clean_files[i],
Year= extract_years(clean_files[i]),
Abundance = calculate_abundance(clean_files[i]),
SpeciesRichness= calculate_richness(clean_files[i])))
}
finaldf