J'ai 2 dataframe df_1 et df_2. Maintenant, je dois sélectionner certaines lignes au hasard dans df_1, puis je fusionnerai le reste des lignes (qui ne sont pas sélectionnées au hasard) de df_1 avec df_2. J'utilise ce code ...

0
Akib62 14 mars 2021 à 20:32

1 réponse

Meilleure réponse

Obtenez votre échantillon en utilisant des numéros de ligne aléatoires et utilisez - pour obtenir l'inverse:

df_1 <- structure(list(nodeA = structure(c(4L, 2L, 1L, 1L, 1L, 4L, 1L, 9L, 3L, 4L, 
                                         2L, 8L, 2L, 1L, 5L, 7L, 3L, 6L, 2L, 1L), 
                                       .Label = c("ID00309", "ID00361", "ID00541", 
                                                  "ID00570", "ID00615", "ID00696", 
                                                  "ID00762", "ID01200", "ID05109"), 
                                       class = "factor"), 
                     nodeB = structure(c(8L, 3L, 3L, 1L, 2L, 7L, 9L, 8L, 8L, 6L, 
                                         9L, 7L, 4L, 4L, 6L, 9L, 6L, 7L, 5L, 5L), 
                                       .Label = c("ID00361", "ID00541", "ID00570", 
                                                  "ID00615", "ID00696", "ID01200", 
                                                  "ID05109", "ID11641", "ID11691"), 
                                       class = "factor"), 
                     scr = structure(20:1, .Label = c("1.85284606048794", "1.90444166064472", 
                                                      "1.90762235378507", "1.94364188077133", 
                                                      "1.95883206119256", "2.08440437841349", 
                                                      "2.26408172709962", "2.3223132020942", 
                                                      "2.46120775935034", "2.49647215035727", 
                                                      "2.50432367561777", "2.57541320006514", 
                                                      "2.65099330092281", "2.75209155741549", 
                                                      "2.93717640337986", "2.99596628688011", 
                                                      "3.21209741517806", "3.21997803385465", 
                                                      "3.48788394772132", "3.81389707587156"
                     ), class = "factor")), 
                class = "data.frame", row.names = c(NA, -20L))

set.seed(9999)
Selected <- sample.int(nrow(df_1), 10)
# index selected the row; use [col,row] pattern to select rows
test_dataset1 <- df_1[ Selected, ] 
# use -index to remove rows
train_part_1  <- df_1[-Selected, ] 

test_dataset1
#>      nodeA   nodeB              scr
#> 6  ID00570 ID05109 2.93717640337986
#> 9  ID00541 ID11641 2.57541320006514
#> 19 ID00361 ID00696 1.90444166064472
#> 3  ID00309 ID00570 3.21997803385465
#> 10 ID00570 ID01200 2.50432367561777
#> 2  ID00361 ID00570 3.48788394772132
#> 20 ID00309 ID00696 1.85284606048794
#> 8  ID05109 ID11641 2.65099330092281
#> 12 ID01200 ID05109 2.46120775935034
#> 18 ID00696 ID05109 1.90762235378507
train_part_1
#>      nodeA   nodeB              scr
#> 1  ID00570 ID11641 3.81389707587156
#> 4  ID00309 ID00361 3.21209741517806
#> 5  ID00309 ID00541 2.99596628688011
#> 7  ID00309 ID11691 2.75209155741549
#> 11 ID00361 ID11691 2.49647215035727
#> 13 ID00361 ID00615  2.3223132020942
#> 14 ID00309 ID00615 2.26408172709962
#> 15 ID00615 ID01200 2.08440437841349
#> 16 ID00762 ID11691 1.95883206119256
#> 17 ID00541 ID01200 1.94364188077133

Créé le 14/03/2021 par le package reprex (v1.0.0)

0
Jan 14 mars 2021 à 17:45