errors<-read_csv("data/Resampling.csv")
## Parsed with column specification:
## cols(
## name = col_character(),
## lda_error = col_double(),
## qda_error = col_double(),
## seed = col_double()
## )
e <- c(errors$lda_error, errors$qda_error)
type <- rep(c("lda","qda"), c(length(errors$lda_error), length(errors$qda_error)))
errors_tidy<-data.frame(e,type)
ggplot(errors_tidy, aes(x = e, fill = type))+geom_histogram(bins = 15, alpha = .75, position = "identity")+labs(x = "Error", title = "LDA vs QDA Error")+theme_bw()
ggplot(errors_tidy, aes(x = e, fill = type))+geom_density(alpha =.5)+labs(x = "Error", title = "LDA vs QDA Error")+theme_bw()
errors_tidy %>% group_by(type) %>% summarize(mean = mean(e), sd = sd(e))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 3
## type mean sd
## <fct> <dbl> <dbl>
## 1 lda 0.204 0.0347
## 2 qda 0.193 0.0310
errors %>% mutate(diff_error = qda_error - lda_error) %>% summarize(mean_diff = mean(diff_error), QDA_better = sum(diff_error < 0), LDA_better = sum(diff_error > 0))
## # A tibble: 1 x 3
## mean_diff QDA_better LDA_better
## <dbl> <int> <int>
## 1 -0.0113 29 13