# 5 Sélection de variables ## 5.4 Le package `VSURF` ```{r vsurfLoad} library(VSURF) data("toys") ``` ```{r vsurfToysglob} set.seed(3101318) vsurfToys <- VSURF(toys$x, toys$y, mtry = 100) ``` ```{r vsurfToysSumm} summary(vsurfToys) ``` ```{r vsurfToysPlot, fig.cap="Illustration of the results of VSURF applied on toys data"} plot(vsurfToys) ``` ```{r vsurfThres} set.seed(3101318) vsurfThresToys <- VSURF_thres(toys$x, toys$y, mtry = 100) ``` ```{r vsurfThresVarsel} vsurfThresToys$varselect.thres ``` ```{r vsurfToysPlotThres, fig.cap="Zoom du graphique en haut à droite de la figure 5.1."} plot(vsurfToys, step = "thres", imp.mean = FALSE, ylim = c(0, 2e-4)) ``` ```{r vsurfInterp} vsurfInterpToys <- VSURF_interp(toys$x, toys$y, vars = vsurfThresToys$varselect.thres) ``` ```{r vsurfInterpRes} vsurfInterpToys$varselect.interp ``` ```{r vsurfPred} vsurfPredToys <- VSURF_pred(toys$x, toys$y, err.interp = vsurfInterpToys$err.interp, varselect.interp = vsurfInterpToys$varselect.interp) ``` ```{r vsurfPredRes} vsurfPredToys$varselect.pred ``` ```{r vsurfSpamCode} set.seed(923321, kind = "L'Ecuyer-CMRG") vsurfSpam <- VSURF(type~., spamApp, parallel = TRUE, ncores = 3, clusterType = "FORK") ``` ```{r vsurfSpamRes, fig.cap="Résultats de `VSURF`, données `spam`."} summary(vsurfSpam) plot(vsurfSpam) colnames(spamApp[vsurfSpam$varselect.interp]) colnames(spamApp[vsurfSpam$varselect.pred]) ``` ```{r vsurfSpamContinued} vsurfSpam$mean.jump ``` ```{r vsurfSpamTuneCode} set.seed(945834) vsurfSpamPred <- VSURF_pred(type~., spamApp, nmj = 15, err.interp = vsurfSpam$err.interp, varselect.interp = vsurfSpam$varselect.interp) ``` ```{r vsurfSpamTuneRes} colnames(spamApp[vsurfSpamPred$varselect.pred]) ``` ## 5.5 Réglage des paramètres pour la sélection ```{r vsurfStump} vsurfToysStump <- VSURF(toys$x, toys$y, mtry = 100, maxnodes = 2) summary(vsurfToysStump) vsurfToysStump$varselect.interp vsurfToysStump$varselect.pred ``` ```{r vsurfThresTuned} vsurfThresToysTuned <- tune(vsurfThresToys, nmin = 3) vsurfThresToysTuned$varselect.thres ``` ```{r vsurfInterTuned} vsurfInterpToysTuned <- tune(vsurfInterpToys, nsd = 5) vsurfInterpToysTuned$varselect.interp ``` ```{r vsurfPredTuned} vsurfPredToysTuned <- VSURF_pred(toys$x, toys$y, err.interp = vsurfInterpToys$err.interp, varselect.interp = vsurfInterpToys$varselect.interp, nmj = 3) vsurfPredToysTuned$varselect.pred ``` ## 5.6 Exemples ### 5.6.1 Prédire la concentration d’ozone ```{r vsurfOzLoad} library(VSURF) data("Ozone", package = "mlbench") ``` ```{r vsurfOzCode} set.seed(303601) OzVSURF <- VSURF(V4 ~ ., data = Ozone, na.action = na.omit) ``` ```{r vsurfOzRes, fig.cap="Résultats de `VSURF`, données `Ozone`."} summary(OzVSURF) plot(OzVSURF, var.names = TRUE) ``` ```{r vsurfOzVarThres} number <- c(1:3, 5:13) number[OzVSURF$varselect.thres] ``` ```{r vsurfOzVarInterp} number[OzVSURF$varselect.interp] ``` ```{r vsurfOzVarPred} number[OzVSURF$varselect.pred] ``` ### 5.6.2 Analyser des données génomiques ```{r vsurfVac18Load} library(VSURF) data("vac18", package = "mixOmics") geneExpr <- vac18$genes stimu <- vac18$stimulation ``` ```{r vsurfVac18seqCode} set.seed(481933) vacVSURF <- VSURF(x = geneExpr, y = stimu) ``` ```{r vsurfVac18seqRes, fig.cap="Graphiques illustrant les résultats de `VSURF()`, données `Vac18`."} summary(vacVSURF) plot(vacVSURF) ``` ```{r vsurfVac18probeSelPred} probeSelPred <- colnames(geneExpr)[vacVSURF$varselect.pred] probeSelPred ``` ```{r vsurfVac18Code} set.seed(627408, kind = "L'Ecuyer-CMRG") vacVSURFpara <- VSURF(x = geneExpr, y = stimu, parallel = TRUE, ncores = 3, clusterType = "FORK") ``` ```{r vsurfVac18Res} summary(vacVSURFpara) ```