#HW Data
library(ggplot2) # for graphics
library(MASS) # for maximum likelihood estimation
z <- rnorm(n=3000,mean=0.2)
z <- data.frame(1:3000,z)
names(z) <- list("ID","myVar")
z <- z[z$myVar>0,]
str(z)
## 'data.frame': 1737 obs. of 2 variables:
## $ ID : int 1 2 3 4 6 8 13 15 16 18 ...
## $ myVar: num 1.257 0.252 1.502 0.753 0.646 ...
summary(z$myVar)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.001479 0.364689 0.744630 0.869471 1.246718 3.695058
This codes for some varibles to work with for learning about probabilities
p1 <- ggplot(data=z, aes(x=myVar, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
this codes for a histogram to be produced with the data curated earlier, this has axis’ labeled
p1 <- p1 + geom_density(linetype="dotted",size=0.75)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
this adds a dotted line fitted to the histogram created previously
normPars <- fitdistr(z$myVar,"normal")
print(normPars)
## mean sd
## 0.86947116 0.63399940
## (0.01521209) (0.01075657)
str(normPars)
## List of 5
## $ estimate: Named num [1:2] 0.869 0.634
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 0.0152 0.0108
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 0.000231 0 0 0.000116
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 1737
## $ loglik : num -1673
## - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"] # note structure of getting a named attribute
## mean
## 0.8694712
This gives you a read out of the mean and the standard deviation of the created data set
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(z$myVar),len=length(z$myVar))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$myVar), args = list(mean = meanML, sd = sdML))
p1 + stat
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
this is giving you the mean probability of each possibly density point
expoPars <- fitdistr(z$myVar,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$myVar), args = list(rate=rateML))
p1 + stat + stat2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
this function adds an exponential curve for the probability of the created data
stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$myVar), args = list(min=min(z$myVar), max=max(z$myVar)))
p1 + stat + stat2 + stat3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
This adds a uniform distribution of the min and the max of the data( a straigt line)
gammaPars <- fitdistr(z$myVar,"gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$myVar), args = list(shape=shapeML, rate=rateML))
p1 + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
this code adds a line better fit then the mean probability line, this is because zeros are not taken into account
xval <- seq(0,max(z$myVar+0.1),len=length(z$myVar))
pSpecial <- ggplot(data=z, aes(x=myVar/(max(myVar + 0.1)), y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
xlim(c(0,1)) +
geom_density(size=0.75,linetype="dotted")
betaPars <- fitdistr(x=z$myVar/max(z$myVar + 0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$myVar), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
This is an example of the example data set with a beta probability line, this estimates the uncertainty of the dataset
#Practice data
library(ggplot2)
library(MASS)
new_data<- read.table(file='./Data_Homework_6/JAEDataArchiveNewsomeRipple.csv' , header=TRUE, sep=",", comment.char="#")
str(new_data)
## 'data.frame': 4680 obs. of 13 variables:
## $ ï..ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Coyote : int 4 8 0 0 15 8 0 0 2 0 ...
## $ Fox : int 5 30 9 11 14 5 29 42 10 28 ...
## $ Site : chr "Manitoba" "Manitoba" "Manitoba" "Manitoba" ...
## $ Block : chr "Bloodvein" "Bonnet" "BreensRiver" "Brochet" ...
## $ Year : int 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 ...
## $ WolfPresent : chr "Wolf" "Wolf" "Wolf" "Wolf" ...
## $ EdgeDistance : chr "31.90" "51.90" "40.32" "506.95" ...
## $ Zone : chr "Transition" "Transition" "Transition" "Wolf" ...
## $ Coyote.FoxRatio: chr "0.31" "0.28" "0.58" "0.01" ...
## $ Fox.CoyoteRatio: chr "3.18" "3.54" "1.72" "199.50" ...
## $ CoyotePrice : int 37 37 37 37 37 37 37 37 37 37 ...
## $ FoxPrice : int 25 25 25 25 25 25 25 25 25 25 ...
addition of data set from suggested website
p1 <- ggplot(data=new_data, aes(x=CoyotePrice, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
normPars <- fitdistr(new_data$CoyotePrice,"normal")
print(normPars)
## mean sd
## 37.5145299 13.8236838
## ( 0.2020695) ( 0.1428847)
str(normPars)
## List of 5
## $ estimate: Named num [1:2] 37.5 13.8
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 0.202 0.143
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 0.0408 0 0 0.0204
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 4680
## $ loglik : num -18932
## - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"] # note structure of getting a named attribute
## mean
## 37.51453
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(new_data$CoyotePrice),len=length(new_data$CoyotePrice))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(new_data$CoyotePrice), args = list(mean = meanML, sd = sdML))
p1 + stat
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#{r} stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(new_data$CoyotePrice), args = list(min=min(new_data$CoyotePrice), max=max(new_data$CoyotePrice))) p1 + stat + stat2 + stat3 #
gammaPars <- fitdistr(new_data$CoyotePrice,"gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(new_data$CoyotePrice), args = list(shape=shapeML, rate=rateML))
p1+ stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Gamma fits this data the best.
s<-rnorm(n=4680,mean=37.5)
s <- data.frame(1:4680,s)
names(s) <- list("A","B")
s <- s[s$B>0,]
str(s)
## 'data.frame': 4680 obs. of 2 variables:
## $ A: int 1 2 3 4 5 6 7 8 9 10 ...
## $ B: num 36.4 37.2 36.9 36.5 35.8 ...
summary(s$B)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 34.20 36.82 37.49 37.49 38.15 40.93
xval
p2 <- ggplot(data=s, aes(x=B, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
qval <- seq(0,max(s$B),len=length(s$B))
gammaPars <- fitdistr(s$B,"gamma")
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = qval, y = ..y..), fun = dgamma, colour="brown", n = length(s$B), args = list(shape=shapeML, rate=rateML))
p2 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.