Question 1:

Setting up file structure and preparing for barracudar.

setwd("C:/Users/doug/Desktop/GrassoBio6100/Homework_11")

source("barracudar/AddFolder.R")
source("barracudar/BuildFunction.R")
source("barracudar/CreatePaddedLabel.R")
source("barracudar/DataTableTemplate.R")
source("barracudar/InitiateSeed.R")
source("barracudar/MetaDataTemplate.R")
source("barracudar/QBox.R")
source("barracudar/QBub.R")
source("barracudar/QCon1.R")
source("barracudar/QCon2.R")
source("barracudar/QContour.R")
source("barracudar/QHist.R")
source("barracudar/QLogis.R")
source("barracudar/SetUpLog.R")
source("barracudar/SourceBatch.R")

Question 2:

filelist <- list.files("C:/Users/doug/Desktop/GrassoBio6100/Homework_11/OriginalData")
filelist # list of file names
## [1] "NEON.D01.BART.DP1.10003.001.2015-06.basic.20240127T000425Z.RELEASE-2024"
## [2] "NEON.D01.BART.DP1.10003.001.2016-06.basic.20240127T000425Z.RELEASE-2024"
## [3] "NEON.D01.BART.DP1.10003.001.2017-06.basic.20240127T000425Z.RELEASE-2024"
## [4] "NEON.D01.BART.DP1.10003.001.2018-06.basic.20240127T000425Z.RELEASE-2024"
## [5] "NEON.D01.BART.DP1.10003.001.2019-06.basic.20240127T000425Z.RELEASE-2024"
## [6] "NEON.D01.BART.DP1.10003.001.2020-06.basic.20240127T000425Z.RELEASE-2024"
## [7] "NEON.D01.BART.DP1.10003.001.2021-06.basic.20240127T000425Z.RELEASE-2024"
## [8] "NEON.D01.BART.DP1.10003.001.2022-06.basic.20240127T000425Z.RELEASE-2024"
# use a for loop here
filenames <- c()
for (i in 1:8) {
  setwd(paste0("C:/Users/doug/Desktop/GrassoBio6100/Homework_11/OriginalData","/", filelist[i]))
  filenames[i] <- list.files(pattern = "countdata")
}

filenames # list of file names.csv
## [1] "NEON.D01.BART.DP1.10003.001.brd_countdata.2015-06.basic.20231226T232626Z.csv"
## [2] "NEON.D01.BART.DP1.10003.001.brd_countdata.2016-06.basic.20231227T013428Z.csv"
## [3] "NEON.D01.BART.DP1.10003.001.brd_countdata.2017-06.basic.20231227T094709Z.csv"
## [4] "NEON.D01.BART.DP1.10003.001.brd_countdata.2018-06.basic.20231228T172744Z.csv"
## [5] "NEON.D01.BART.DP1.10003.001.brd_countdata.2019-06.basic.20231227T184129Z.csv"
## [6] "NEON.D01.BART.DP1.10003.001.brd_countdata.2020-06.basic.20231227T224944Z.csv"
## [7] "NEON.D01.BART.DP1.10003.001.brd_countdata.2021-06.basic.20231228T010546Z.csv"
## [8] "NEON.D01.BART.DP1.10003.001.brd_countdata.2022-06.basic.20231229T053256Z.csv"

Question 3:

Creating my functions

########################
# FUNCTION: clean_data()
# Description: cleans data of any empty/missing cases
# Inputs: filepaths/names that need to be cleaned
# Outputs: cleaned dataset
years <- c(2015,2016,2017,2018,2019,2020,2021,2022)
clean_data <- function(filelist, filenames) { 
  for (i in 1:8) {
    setwd(paste0("C:/Users/doug/Desktop/GrassoBio6100/Homework_11/OriginalData/", filelist[i]))
    raw_data <- read.csv(filenames[i], na.strings = c("","NA"))
    clean_data <- raw_data[complete.cases(raw_data["scientificName"]), ] 
    setwd("C:/Users/doug/Desktop/GrassoBio6100/Homework_11/CleanedData/")
    write.csv(clean_data,paste0("CleanData_",years[i], ".csv"))
  }
}

clean_data(filelist,filenames) # run my function
clean_files <- list.files("C:/Users/doug/Desktop/GrassoBio6100/Homework_11/CleanedData/") # store these new cleaned files
##########################
#FUNCTION: extract_years()
#Description: extracts the years from the file names
#Inputs: clean_files
#Outputs: years from the file names

extract_years <- function(clean_files){
  years <- str_extract(clean_files, pattern = "\\d{4}")
  return(years)
}
extract_years(clean_files) # run my function
## [1] "2015" "2016" "2017" "2018" "2019" "2020" "2021" "2022"
################################
#FUNCTION: calculate_abundance()
#Description: calculates the abundance for each year
#Inputs: clean_files
#Outputs: abundance for each year
calculate_abundance <- function(clean_files){
  data <- read.csv(clean_files, header=TRUE)
  abundance <- c()
  abundance <- nrow(data)
  print(abundance)
}
###############################
#FUNCTION: calculate_richness()
#Description: calculates species richness for each year
#Inputs: clean_files
#Outputs: species richness for each year


calculate_richness <- function(clean_files){
  data2 <- read.csv(clean_files, header=TRUE)
  richness <- c()
  richness <- length(unique(data2$scientificName))
  print(richness)
}

Question 4:

# creating my initial empty data frame
finaldf <- data.frame(File = rep(NULL,8), Year = rep(NULL,8), Abundance = rep(NULL,8), SpeciesRichness = rep(NULL,8))

Question 5:

Some of my work directories were acting funky when I wrote up my R Markdown file. However, below is the code that I used:

for (i in 1:8){
 finaldf <- rbind(finaldf, list(File= clean_files[i],
                       Year= extract_years(clean_files[i]),
                       Abundance = calculate_abundance(clean_files[i]),
                       SpeciesRichness= calculate_richness(clean_files[i])))
}
finaldf