I used the Bombus Bumblebee Seasonal Variation data where samples
were collected to assay viruses (DWV and BQCV) and Nosema from around
300 bumblebees of various species from 5 sites at 4 different time
points
bee <- read.csv("BeeData.csv",sep=",",comment.char='#')
View(bee) # view in new tab - make sure to use capital V
bee <- na.omit(bee) # omit the rows that contain an NA
str(bee)
## 'data.frame': 865 obs. of 11 variables:
## $ id : int 1 3 4 5 6 8 10 11 12 13 ...
## $ target_name : chr "BQCV" "BQCV" "BQCV" "BQCV" ...
## $ pathogen_binary: int 1 1 0 0 1 1 1 1 0 1 ...
## $ sampling_date : chr "9/9/2020" "9/10/2020" "9/10/2020" "9/9/2020" ...
## $ site_code : chr "BOST" "MUDGE" "MUDGE" "BOST" ...
## $ field_id : int 6 18 11 11 14 4 13 38 5 12 ...
## $ bombus_spp : chr "impatiens" "impatiens" "impatiens" "impatiens" ...
## $ host_plant : chr "solidago" "crown vetch" "crown vetch" "solidago" ...
## $ bee_caste : chr "worker" "worker" "worker" "male" ...
## $ sampling_event : int 4 4 4 4 4 4 4 2 4 4 ...
## $ pathogen_load : num 2414169 760793 0 0 124237 ...
## - attr(*, "na.action")= 'omit' Named int [1:41] 22 43 57 77 83 172 207 233 256 289 ...
## ..- attr(*, "names")= chr [1:41] "22" "43" "57" "77" ...
# make a column called myVar and copy over the pathogen_load column
bee$myVar <- bee$pathogen_load
# filter out rows where 'pathogen_load' is 0, and log() the myVar column
bee <- bee%>%filter(pathogen_load>0)%>%
mutate(myVar=log(pathogen_load))
# plot this data
p1 <- ggplot(data=bee, aes(x=myVar, y=after_stat(density))) + geom_histogram(color="grey60",fill="cornsilk",linewidth=0.2)
print(p1)
# add empirical density curve
p1 <- p1 + geom_density(linetype="dotted",linewidth=0.75)
print(p1)
# get maximum likelihood parameters for normal
normPars <- fitdistr(bee$myVar,"normal")
print(normPars)
## mean sd
## 13.3863402 2.4396499
## ( 0.1153915) ( 0.0815941)
str(normPars)
## List of 5
## $ estimate: Named num [1:2] 13.39 2.44
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 0.1154 0.0816
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 0.01332 0 0 0.00666
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 447
## $ loglik : num -1033
## - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"] # note structure of getting a named attribute
## mean
## 13.38634
# plot normal probability density
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(bee$myVar),len=length(bee$myVar))
stat <- stat_function(aes(x = xval, y = after_stat(y)), fun = dnorm, colour="red", n = length(bee$myVar), args = list(mean = meanML, sd = sdML))
p1 + stat
# plot exponential probability density
expoPars <- fitdistr(bee$myVar,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = after_stat(y)), fun = dexp, colour="blue", n = length(bee$myVar), args = list(rate=rateML))
p1 + stat + stat2
# plot uniform probability density
stat3 <- stat_function(aes(x = xval, y = after_stat(y)), fun = dunif, colour="darkgreen", n = length(bee$myVar), args = list(min=min(bee$myVar), max=max(bee$myVar)))
p1 + stat + stat2 + stat3
# plot gamma probability density
gammaPars <- fitdistr(bee$myVar,"gamma")
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = after_stat(y)), fun = dgamma, colour="brown", n = length(bee$myVar), args = list(shape=shapeML, rate=rateML))
p1 + stat + stat2 + stat3 + stat4
Both the normal and gamma distributions fit this dataset pretty
well. The gamma looks like it is a slightly better fit.
# plot beta probability density
pSpecial <- ggplot(data=bee, aes(x=myVar/(max(myVar + 0.1)), y=after_stat(density))) +
geom_histogram(color="grey60",fill="cornsilk",linewidth=0.2) +
xlim(c(0,1)) +
geom_density(size=0.75,linetype="dotted")
betaPars <- fitdistr(x=bee$myVar/max(bee$myVar + 0.1),start=list(shape1=1,shape2=2),"beta")
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = after_stat(y)), fun = dbeta, colour="orchid", n = length(bee$myVar), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial