library(tidyverse)
library(e1071)
- Use
naiveBayes
to classify the iris
flowers into their three classes.
nb_iris <- naiveBayes(Species ~ ., data = iris)
- What information does the
$tables
from the fit contain?
nb_iris$tables %>% lapply(function(x){
colnames(x) <- c("mean", "sd")
x
})
$Sepal.Length
Sepal.Length
Y mean sd
setosa 5.006 0.3524897
versicolor 5.936 0.5161711
virginica 6.588 0.6358796
$Sepal.Width
Sepal.Width
Y mean sd
setosa 3.428 0.3790644
versicolor 2.770 0.3137983
virginica 2.974 0.3224966
$Petal.Length
Petal.Length
Y mean sd
setosa 1.462 0.1736640
versicolor 4.260 0.4699110
virginica 5.552 0.5518947
$Petal.Width
Petal.Width
Y mean sd
setosa 0.246 0.1053856
versicolor 1.326 0.1977527
virginica 2.026 0.2746501
- Try to identify the most informative variables for predicting the classes.
pars_iris <- nb_iris$tables %>%
lapply(function(x){
x %>% as.data.frame() %>%
mutate(Species = rownames(.)) %>%
set_names(c("mean", "sd", "species"))
}) %>%
bind_rows(.id = "variable")
pars_iris_ <- pars_iris %>%
mutate(x = list(seq(0, 12, len = 100))) %>%
unnest() %>%
mutate(d_x = dnorm(x = x, mean = mean , sd = sd))
ggplot(pars_iris_, aes(x = x, y = d_x, colour = species)) +
facet_wrap(~variable, scales = "free_y") + geom_line()
- Are the assumptions about normality satisfied for each of \(x_i\mid y\)?
qq_iris <- function(column){
par(mfrow = c(1,3))
for(i in unique(iris$Species)){
ii <- iris %>% filter(Species == i) %>% pull(column)
qqnorm(ii, main = i)
mtext(side = 3, line = 0.7, column, cex = 0.8)
qqline(ii)
}
}
qq_iris("Petal.Length")
qq_iris("Petal.Width")
qq_iris("Sepal.Length")
qq_iris("Sepal.Length")
- Can you visualise the decision boundaries in the
Petal
-plane (i.e x = Petal.Length, y = Petal.Width
)? Tip:* Create a grid (cf below), make the prediction for each* point in the grid and plot this.
iris_petal_grid <- iris %>%
expand(
Petal.Length = seq(min(Petal.Length), max(Petal.Length), len = 100),
Petal.Width = seq(min(Petal.Width), max(Petal.Width), len = 100)
) %>%
mutate(Sepal.Length = NA, Sepal.Width = NA)
iris_nb <- naiveBayes(Species ~ ., data = iris)
iris_petal_grid$pred <- predict(iris_nb, newdata = iris_petal_grid)
iris_petal_grid %>%
ggplot(aes(x = Petal.Length, y = Petal.Width, colour = pred)) + geom_point(alpha = 0.5) +
geom_point(data = iris, aes(fill = Species, colour = NULL), pch = 21, colour = "black")