Homework 8

This is an example of how the data would normally look.

This includes eyesize estimations from Stergas et al.

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(ggplot2)
library(MASS)

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

library(dplyr)

# Example data set of Eye- Diameter at 72hpf
wt_e <- rnorm(n=100, mean=290 , sd=50)


null_e <-rnorm( n=100, mean=220 , sd=50)




eye_size_df <- data.frame(WTeyesize = wt_e , Nulleyesize = null_e)

str(eye_size_df)

## 'data.frame':    100 obs. of  2 variables:
##  $ WTeyesize  : num  307 300 257 266 251 ...
##  $ Nulleyesize: num  182 217 213 202 251 ...

long_df <- eye_size_df %>%
      pivot_longer(cols=WTeyesize:Nulleyesize, names_to= "Genotype" , values_to= "EyeSize")

head(long_df)

## # A tibble: 6 x 2
##   Genotype    EyeSize
##   <chr>         <dbl>
## 1 WTeyesize      307.
## 2 Nulleyesize    182.
## 3 WTeyesize      300.
## 4 Nulleyesize    217.
## 5 WTeyesize      257.
## 6 Nulleyesize    213.

test<-t.test(wt_e,null_e, paired=TRUE)

print(test)

## 
##  Paired t-test
## 
## data:  wt_e and null_e
## t = 11.172, df = 99, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  54.19692 77.60597
## sample estimates:
## mean of the differences 
##                65.90145

#p-value1(n=30): 5.8e-07, p-value(n=50):2.5e-09, p-value(n=100):2.2e-16

p1 <- ggplot(data=long_df, aes(x=Genotype, y=EyeSize)) + geom_point(color="white" , fill="purple", size=0.2) +
  stat_boxplot()

print(p1)

In retinal development the retina is finished being laminated by 72hpf, here we simulate data for a this time point, for organisms with proper lamination, leading to proper eye size (wt), and organisms missing a protein integral to eye development, causing improper lamination, and smaller eye size(null)

Here we begin to adjust the means

library(tidyverse)
library(ggplot2)
library(MASS)
library(dplyr)

# Example data set of Eye- Diameter at 72hpf
wt_e <- rnorm(n=30 , mean=270 , sd=50)


null_e <-rnorm( n=30 , mean=240 , sd=50)




eye_size_df <- data.frame(WTeyesize = wt_e , Nulleyesize = null_e)

str(eye_size_df)

## 'data.frame':    30 obs. of  2 variables:
##  $ WTeyesize  : num  280 274 198 251 276 ...
##  $ Nulleyesize: num  218 290 249 254 247 ...

long_df <- eye_size_df %>%
      pivot_longer(cols=WTeyesize:Nulleyesize, names_to= "Genotype" , values_to= "EyeSize")

head(long_df)

## # A tibble: 6 x 2
##   Genotype    EyeSize
##   <chr>         <dbl>
## 1 WTeyesize      280.
## 2 Nulleyesize    218.
## 3 WTeyesize      274.
## 4 Nulleyesize    290.
## 5 WTeyesize      198.
## 6 Nulleyesize    249.

test<-t.test(wt_e,null_e, paired=TRUE)

print(test)

## 
##  Paired t-test
## 
## data:  wt_e and null_e
## t = 0.47856, df = 29, p-value = 0.6358
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -18.10987  29.17368
## sample estimates:
## mean of the differences 
##                5.531906

#p-value(250-240):0.2558 
#p-value(260-240):0.07137
#p-value(270-240):0.006474

p1 <- ggplot(data=long_df, aes(x=Genotype, y=EyeSize)) + geom_point(color="white" , fill="purple", size=0.2) +
  stat_boxplot()

print(p1)

Adjusting the mean of each group to be values closer to eachother causes the groups to lose significant difference, moving up in groups of 10 we find that the means need to be 30 integers apart.

Here we adjust the sample sizes

library(tidyverse)
library(ggplot2)
library(MASS)
library(dplyr)

# Example data set of Eye- Diameter at 72hpf
wt_e <- rnorm(n=10 , mean=290 , sd=50)


null_e <-rnorm( n=10 , mean=240 , sd=50)




eye_size_df <- data.frame(WTeyesize = wt_e , Nulleyesize = null_e)

str(eye_size_df)

## 'data.frame':    10 obs. of  2 variables:
##  $ WTeyesize  : num  225 336 225 226 347 ...
##  $ Nulleyesize: num  230 305 233 244 299 ...

long_df <- eye_size_df %>%
      pivot_longer(cols=WTeyesize:Nulleyesize, names_to= "Genotype" , values_to= "EyeSize")

head(long_df)

## # A tibble: 6 x 2
##   Genotype    EyeSize
##   <chr>         <dbl>
## 1 WTeyesize      225.
## 2 Nulleyesize    230.
## 3 WTeyesize      336.
## 4 Nulleyesize    305.
## 5 WTeyesize      225.
## 6 Nulleyesize    233.

test<-t.test(wt_e,null_e, paired=TRUE)

print(test)

## 
##  Paired t-test
## 
## data:  wt_e and null_e
## t = 1.8093, df = 9, p-value = 0.1039
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -9.65556 86.80566
## sample estimates:
## mean of the differences 
##                38.57505

#p-value(5):0.2921-0.2338
#p-value(10):0.07516-0.001271
#p-value(15):0.0001277

p1 <- ggplot(data=long_df, aes(x=Genotype, y=EyeSize)) + geom_point(color="white" , fill="purple", size=0.2) +
  stat_boxplot()

print(p1)

we find that the smallest sample size for an appropriate p-value would be a sample of 10 fish each, anything lower allows for significance to be lost