#HW Data

library(ggplot2) # for graphics
library(MASS) # for maximum likelihood estimation

z <- rnorm(n=3000,mean=0.2)
z <- data.frame(1:3000,z)
names(z) <- list("ID","myVar")
z <- z[z$myVar>0,]
str(z)

## 'data.frame':    1737 obs. of  2 variables:
##  $ ID   : int  1 2 3 4 6 8 13 15 16 18 ...
##  $ myVar: num  1.257 0.252 1.502 0.753 0.646 ...

summary(z$myVar)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.001479 0.364689 0.744630 0.869471 1.246718 3.695058

This codes for some varibles to work with for learning about probabilities

p1 <- ggplot(data=z, aes(x=myVar, y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) 
print(p1)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

this codes for a histogram to be produced with the data curated earlier, this has axis’ labeled

p1 <-  p1 +  geom_density(linetype="dotted",size=0.75)
print(p1)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

this adds a dotted line fitted to the histogram created previously

normPars <- fitdistr(z$myVar,"normal")
print(normPars)

##       mean          sd    
##   0.86947116   0.63399940 
##  (0.01521209) (0.01075657)

str(normPars)

## List of 5
##  $ estimate: Named num [1:2] 0.869 0.634
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ sd      : Named num [1:2] 0.0152 0.0108
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ vcov    : num [1:2, 1:2] 0.000231 0 0 0.000116
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "mean" "sd"
##   .. ..$ : chr [1:2] "mean" "sd"
##  $ n       : int 1737
##  $ loglik  : num -1673
##  - attr(*, "class")= chr "fitdistr"

normPars$estimate["mean"] # note structure of getting a named attribute

##      mean 
## 0.8694712

This gives you a read out of the mean and the standard deviation of the created data set

meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]

xval <- seq(0,max(z$myVar),len=length(z$myVar))

 stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$myVar), args = list(mean = meanML, sd = sdML))
 p1 + stat

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

this is giving you the mean probability of each possibly density point

expoPars <- fitdistr(z$myVar,"exponential")
rateML <- expoPars$estimate["rate"]

stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$myVar), args = list(rate=rateML))
 p1 + stat + stat2

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

this function adds an exponential curve for the probability of the created data

stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$myVar), args = list(min=min(z$myVar), max=max(z$myVar)))
 p1 + stat + stat2 + stat3

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

This adds a uniform distribution of the min and the max of the data( a straigt line)

gammaPars <- fitdistr(z$myVar,"gamma")

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]

stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$myVar), args = list(shape=shapeML, rate=rateML))
 p1 + stat + stat2 + stat3 + stat4

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

this code adds a line better fit then the mean probability line, this is because zeros are not taken into account

xval <- seq(0,max(z$myVar+0.1),len=length(z$myVar))
pSpecial <- ggplot(data=z, aes(x=myVar/(max(myVar + 0.1)), y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) + 
  xlim(c(0,1)) +
  geom_density(size=0.75,linetype="dotted")

betaPars <- fitdistr(x=z$myVar/max(z$myVar + 0.1),start=list(shape1=1,shape2=2),"beta")

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]

statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$myVar), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 2 rows containing missing values (geom_bar).

This is an example of the example data set with a beta probability line, this estimates the uncertainty of the dataset

#Practice data

library(ggplot2)
library(MASS)
new_data<- read.table(file='./Data_Homework_6/JAEDataArchiveNewsomeRipple.csv' , header=TRUE, sep=",", comment.char="#")
str(new_data)

## 'data.frame':    4680 obs. of  13 variables:
##  $ ï..ID          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Coyote         : int  4 8 0 0 15 8 0 0 2 0 ...
##  $ Fox            : int  5 30 9 11 14 5 29 42 10 28 ...
##  $ Site           : chr  "Manitoba" "Manitoba" "Manitoba" "Manitoba" ...
##  $ Block          : chr  "Bloodvein" "Bonnet" "BreensRiver" "Brochet" ...
##  $ Year           : int  1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 ...
##  $ WolfPresent    : chr  "Wolf" "Wolf" "Wolf" "Wolf" ...
##  $ EdgeDistance   : chr  "31.90" "51.90" "40.32" "506.95" ...
##  $ Zone           : chr  "Transition" "Transition" "Transition" "Wolf" ...
##  $ Coyote.FoxRatio: chr  "0.31" "0.28" "0.58" "0.01" ...
##  $ Fox.CoyoteRatio: chr  "3.18" "3.54" "1.72" "199.50" ...
##  $ CoyotePrice    : int  37 37 37 37 37 37 37 37 37 37 ...
##  $ FoxPrice       : int  25 25 25 25 25 25 25 25 25 25 ...

addition of data set from suggested website

p1 <- ggplot(data=new_data, aes(x=CoyotePrice, y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) 
print(p1)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

normPars <- fitdistr(new_data$CoyotePrice,"normal")
print(normPars)

##       mean          sd    
##   37.5145299   13.8236838 
##  ( 0.2020695) ( 0.1428847)

str(normPars)

## List of 5
##  $ estimate: Named num [1:2] 37.5 13.8
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ sd      : Named num [1:2] 0.202 0.143
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ vcov    : num [1:2, 1:2] 0.0408 0 0 0.0204
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "mean" "sd"
##   .. ..$ : chr [1:2] "mean" "sd"
##  $ n       : int 4680
##  $ loglik  : num -18932
##  - attr(*, "class")= chr "fitdistr"

normPars$estimate["mean"] # note structure of getting a named attribute

##     mean 
## 37.51453

meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]

xval <- seq(0,max(new_data$CoyotePrice),len=length(new_data$CoyotePrice))

 stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(new_data$CoyotePrice), args = list(mean = meanML, sd = sdML))
 p1 + stat

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#{r} stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(new_data$CoyotePrice), args = list(min=min(new_data$CoyotePrice), max=max(new_data$CoyotePrice))) p1 + stat + stat2 + stat3 #

gammaPars <- fitdistr(new_data$CoyotePrice,"gamma")

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]

stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(new_data$CoyotePrice), args = list(shape=shapeML, rate=rateML))
 p1+ stat4

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Gamma fits this data the best.

Simulate new data

s<-rnorm(n=4680,mean=37.5)
s <- data.frame(1:4680,s)
names(s) <- list("A","B")
s <- s[s$B>0,]
str(s)

## 'data.frame':    4680 obs. of  2 variables:
##  $ A: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ B: num  36.4 37.2 36.9 36.5 35.8 ...

summary(s$B)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   34.20   36.82   37.49   37.49   38.15   40.93

xval

p2 <- ggplot(data=s, aes(x=B, y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) 
print(p2)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]

qval <- seq(0,max(s$B),len=length(s$B))

gammaPars <- fitdistr(s$B,"gamma")
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]

stat4 <- stat_function(aes(x = qval, y = ..y..), fun = dgamma, colour="brown", n = length(s$B), args = list(shape=shapeML, rate=rateML))
 p2  + stat4

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.