4 Importance des variables

4.1 Notions d’importance

RFDefImp <- randomForest(type ~ ., data = spamApp, importance=TRUE)
varImpPlot(RFDefImp, type = 1, scale = FALSE, n.var = ncol(spamApp) - 1,
           cex = 0.8, main = "Importance des variables")

4.3 Diversité des arbres et importance des variables

bagStumpImp <- randomForest(type~., spamApp, mtry = ncol(spamApp) - 1,
                            maxnodes=2, importance=TRUE)
varImpPlot(bagStumpImp, type = 1, scale = FALSE, n.var = 20, cex = 0.8,
           main = "Importance des variables")
RFStumpImp <- randomForest(type~., spamApp, maxnodes=2, importance=TRUE)
varImpPlot(RFStumpImp, type = 1, scale = FALSE, n.var = 20, cex = 0.8,
           main = "Importance des variables")

4.5 Exemples

4.5.1 Une illustration par simulation en régression

library(mlbench)
fried1Simu <- mlbench.friedman1(n = 500)
fried1Data <- data.frame(fried1Simu$x, y = fried1Simu$y)
fried1RFimp <- randomForest(y ~., fried1Data, importance = TRUE)
varImpPlot(fried1RFimp, type = 1, scale = FALSE,
           main = "Importance des variables")
partialPlot(fried1RFimp, fried1Data, x.var = "X1", main = "X1")

4.5.2 Prédire la concentration d’ozone

library("randomForest")
data("Ozone", package = "mlbench")
OzRFDefImp <- randomForest(V4 ~ ., Ozone, na.action = na.omit,
                           importance = TRUE)
varImpPlot(OzRFDefImp, type = 1, scale = FALSE,
           main = "Importance des variables")

4.5.3 Analyser des données génomiques

library(randomForest)
data("vac18", package = "mixOmics")
geneExpr <- vac18$genes
stimu <- vac18$stimulation
vacRFDefImp <- randomForest(x = geneExpr, y = stimu, mtry = ncol(geneExpr)/3,
                            importance = TRUE)
varImpPlot(vacRFDefImp, type = 1, scale = FALSE, cex = 0.8)
vacImp <- vacRFDefImp$importance[, nlevels(stimu) + 1]
plot(sort(vacImp, decreasing = TRUE), type = "l", xlab = "Variables",
     ylab = "Importance des variables")