2 Arbres CART

2.4 Le package rpart

library(rpart)
treeDef <- rpart(type ~ ., data = spamApp)
print(treeDef, digits = 2)
plot(treeDef)
text(treeDef, xpd = TRUE)
set.seed(601334)
treeMax <- rpart(type ~ ., data = spamApp, minsplit = 2, cp = 0)
plot(treeMax)
treeMax$cptable
plotcp(treeMax)
cpOpt <- treeMax$cptable[which.min(treeMax$cptable[, 4]), 1]
treeOpt <- prune(treeMax, cp = cpOpt)
plot(treeOpt)
text(treeOpt, xpd = TRUE, cex = 0.8)
thres1SE <- sum(treeMax$cptable[which.min(treeMax$cptable[, 4]), 4:5])
cp1SE <- treeMax$cptable[min(which(treeMax$cptable[, 4] <= thres1SE)), 1]
tree1SE <- prune(treeMax, cp = cp1SE)
plot(tree1SE)
text(tree1SE, xpd = TRUE, cex = 0.8)
errTestTreeMax <- mean(predict(treeMax, spamTest, type = "class") != spamTest$type)
errEmpTreeMax <- mean(predict(treeMax, spamApp, type = "class") != spamApp$type)

2.5 Découpes concurrentes et de substitution

2.5.2 Découpes de substitution

treeStump <- rpart(type ~ ., data = spamApp, maxdepth = 1)
summary(treeStump)

2.5.3 Interprétabilité

par(mar = c(7, 3, 1, 1) + 0.1)
barplot(treeMax$variable.importance, las = 2, cex.names = 0.8)

2.6 Exemples

2.6.1 Prédire la concentration d’ozone

library("rpart")
data("Ozone", package = "mlbench")
OzTreeDef <- rpart(V4 ~ ., data = Ozone)
print(OzTreeDef, digits = 3)
plot(OzTreeDef)
text(OzTreeDef, xpd = TRUE, cex = 0.9)
set.seed(727325)
OzTreeMax <- rpart(V4 ~ ., data = Ozone, minsplit = 2, cp = 0)
plotcp(OzTreeMax)
OzIndcpOpt <- which.min(OzTreeMax$cptable[, 4])
OzcpOpt <- OzTreeMax$cptable[OzIndcpOpt, 1]
OzTreeOpt <- prune(OzTreeMax, cp = OzcpOpt)
plot(OzTreeOpt)
text(OzTreeOpt, xpd = TRUE)

2.6.2 Analyser des données génomiques

library(rpart)
data("vac18", package = "mixOmics")
VAC18 <- data.frame(vac18$genes, stimu = vac18$stimulation)
VacTreeDef <- rpart(stimu ~ ., data = VAC18)
print(VacTreeDef)
plot(VacTreeDef)
text(VacTreeDef, use.n = TRUE, xpd = TRUE)
set.seed(788182)
VacTreeMax <- rpart(stimu ~ ., data = VAC18, minsplit = 2, cp = 0)
plot(VacTreeMax)
text(VacTreeMax, use.n = TRUE, xpd = TRUE)
set.seed(413745)
VacTreeMaxLoo <- rpart(stimu ~ ., data = VAC18, minsplit = 2, cp = 0, xval = nrow(VAC18))
par(mfrow = c(1, 2))
plotcp(VacTreeMax)
plotcp(VacTreeMaxLoo)
VacIndcpOpt <- which.min(VacTreeMaxLoo$cptable[, 4])
VaccpOpt <- VacTreeMaxLoo$cptable[VacIndcpOpt, 1]
VacTreeOpt <- prune(VacTreeMaxLoo, cp = VaccpOpt)
plot(VacTreeOpt)
text(VacTreeOpt, use.n = TRUE, xpd = TRUE)