4 Importance des variables

4.1 Notions d’importance

RFDefImp <- randomForest(type ~ ., data = spamApp, importance = TRUE)
varImpPlot(RFDefImp, type = 1, scale = FALSE, n.var = ncol(spamApp) - 1, cex = 0.8,
    main = "Importance des variables")

4.3 Diversité des arbres et importance des variables

bagStumpImp <- randomForest(type ~ ., spamApp, mtry = ncol(spamApp) - 1, maxnodes = 2,
    importance = TRUE)
varImpPlot(bagStumpImp, type = 1, scale = FALSE, n.var = 20, cex = 0.8, main = "Importance des variables")
RFStumpImp <- randomForest(type ~ ., spamApp, maxnodes = 2, importance = TRUE)
varImpPlot(RFStumpImp, type = 1, scale = FALSE, n.var = 20, cex = 0.8, main = "Importance des variables")

4.5 Exemples

4.5.1 Une illustration par simulation en régression

library(mlbench)
fried1Simu <- mlbench.friedman1(n = 500)
fried1Data <- data.frame(fried1Simu$x, y = fried1Simu$y)
fried1RFimp <- randomForest(y ~ ., fried1Data, importance = TRUE)
varImpPlot(fried1RFimp, type = 1, scale = FALSE, main = "Importance des variables")
partialPlot(fried1RFimp, fried1Data, x.var = "X1", main = "X1")

4.5.2 Prédire la concentration d’ozone

library("randomForest")
data("Ozone", package = "mlbench")
OzRFDefImp <- randomForest(V4 ~ ., Ozone, na.action = na.omit, importance = TRUE)
varImpPlot(OzRFDefImp, type = 1, scale = FALSE, main = "Importance des variables")

4.5.3 Analyser des données génomiques

library(randomForest)
data("vac18", package = "mixOmics")
geneExpr <- vac18$genes
stimu <- vac18$stimulation
vacRFDefImp <- randomForest(x = geneExpr, y = stimu, mtry = ncol(geneExpr)/3, importance = TRUE)
varImpPlot(vacRFDefImp, type = 1, scale = FALSE, cex = 0.8)
vacImp <- vacRFDefImp$importance[, nlevels(stimu) + 1]
plot(sort(vacImp, decreasing = TRUE), type = "l", xlab = "Variables", ylab = "Importance des variables")