J'ai un data.table comme celui-ci ci-dessous qui contient des groupes de chaînes d'ADN (colonne ref_seq) et une série de mutations (caractères dans la colonne alt) avec leur emplacement relatif le long des chaînes ({{ Colonne X2}}).

Parce que pour les mutations n , le nombre de chaînes résultant est de 2 ^ n , ce que je voudrais faire est de faire une fonction générale qui permet de générer toutes les combinaisons des alt caractères remplacés à la snp_position dans la chaîne ref_seq, tout en conservant (de préférence) cette structure data.frame

> dt
   seqnames hap_start  hap_end      snp alt snp_position numb_snps_per_hap
1:     chr1  19600274 19600443 19600324   G           50                 4
2:     chr1  19600274 19600443 19600378   C          104                 4
3:     chr1  19600274 19600443 19600389   A          115                 4
4:     chr1  19600274 19600443 19600396   C          122                 4
5:    chr10   5482730  5482899  5482790   C           60                 4
6:    chr10   5482730  5482899  5482830   A          100                 4
7:    chr10   5482730  5482899  5482839   A          109                 4
8:    chr10   5482730  5482899  5482843   A          113                 4
                                                                                                                                                                      ref_seq
1: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAGGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
2: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAGGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
3: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAGGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
4: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAGGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
5: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAGTGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
6: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAGTGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
7: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAGTGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
8: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAGTGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
> dput(dt)
structure(list(seqnames = c("chr1", "chr1", "chr1", "chr1", "chr10", 
"chr10", "chr10", "chr10"), hap_start = c(19600274, 19600274, 
19600274, 19600274, 5482730, 5482730, 5482730, 5482730), hap_end = c(19600443, 
19600443, 19600443, 19600443, 5482899, 5482899, 5482899, 5482899
), snp = c(19600324L, 19600378L, 19600389L, 19600396L, 5482790L, 
5482830L, 5482839L, 5482843L), alt = c("G", "C", "A", "C", "C", 
"A", "A", "A"), snp_position = c(50, 104, 115, 122, 60, 100, 
109, 113), numb_snps_per_hap = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L), ref_seq = c("CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAGGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC", 
"CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAGGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC", 
"CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAGGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC", 
"CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAGGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC", 
"TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAGTGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT", 
"TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAGTGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT", 
"TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAGTGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT", 
"TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAGTGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT"
)), row.names = c(NA, -8L), class = c("data.table", "data.frame"
), sorted = "seqnames", .internal.selfref = <pointer: 0x1d50f80>)

Sortie désirée

> dt_final
    seqnames hap_start  hap_end
 1:     chr1  19600274 19600443
 2:     chr1  19600274 19600443
 3:     chr1  19600274 19600443
 4:     chr1  19600274 19600443
 5:     chr1  19600274 19600443
 6:     chr1  19600274 19600443
 7:     chr1  19600274 19600443
 8:     chr1  19600274 19600443
 9:     chr1  19600274 19600443
10:     chr1  19600274 19600443
11:     chr1  19600274 19600443
12:     chr1  19600274 19600443
13:     chr1  19600274 19600443
14:     chr1  19600274 19600443
15:     chr1  19600274 19600443
16:     chr1  19600274 19600443
17:    chr10   5482730  5482899
18:    chr10   5482730  5482899
19:    chr10   5482730  5482899
20:    chr10   5482730  5482899
21:    chr10   5482730  5482899
22:    chr10   5482730  5482899
23:    chr10   5482730  5482899
24:    chr10   5482730  5482899
25:    chr10   5482730  5482899
26:    chr10   5482730  5482899
27:    chr10   5482730  5482899
28:    chr10   5482730  5482899
29:    chr10   5482730  5482899
30:    chr10   5482730  5482899
31:    chr10   5482730  5482899
32:    chr10   5482730  5482899
    seqnames hap_start  hap_end
                                                                                                                                                                      sequence
 1: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAAGCCGCCCAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
 2: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACGGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATCCGGAGCCCCAGGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
 3: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACGGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAAGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
 4: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATCCGGAGCCCCAGGCCGCCCAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
 5: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACGGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATCCGGAGCCCCAAGCCGCCCAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
 6: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACGGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATCCGGAGCCCCAAGCCGCCCAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
 7: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACGGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATCCGGAGCCCCAAGCCGCCCAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
 8: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACGGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATCCGGAGCCCCAAGCCGCCCAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
 9: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACGGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAGGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
10: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATCCGGAGCCCCAGGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
11: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAAGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
12: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAGGCCGCCCAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
13: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACCGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATCCGGAGCCCCAAGCCGCCCAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
14: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACGGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATTCGGAGCCCCAAGCCGCCCAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
15: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACGGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATCCGGAGCCCCAGGCCGCCCAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
16: CCAGGGAGAGCGGGGCAGCGCACGGCTGGTGACAGGCTCAGCTCTGCACGGTGCAGAGGGAGGATCAGGGGCCACTGTTACCTCTGCAGTCGCTGCTGCCCATCCGGAGCCCCAAGCCGCCAAGGATGGTCTCGGACTGGCCGTCGCTGTACAGGAAGGCCGTGTCTATC
17: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAATGAATCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
18: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTCTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTAACTAGTGAGTGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
19: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTCTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAATGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
20: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTAACTAGTGAGTGAATCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
21: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTCTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTAACTAGTGAATGAATCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
22: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTCTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTAACTAGTGAATGAATCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
23: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTCTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTAACTAGTGAATGAATCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
24: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTCTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTAACTAGTGAATGAATCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
25: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTCTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAGTGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
26: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTAACTAGTGAGTGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
27: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAATGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
28: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAGTGAATCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
29: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTGTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTAACTAGTGAATGAATCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
30: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTCTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTTACTAGTGAATGAATCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
31: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTCTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTAACTAGTGAGTGAATCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
32: TGCCCCTCTCTTGTTCATTTGCCACCACTACTATGTAAGGCATTGCCTCACTACTCTGTCTCTCATCCCTATTCATACCTGTAGTCCTTATCACCATTTAACTAGTGAATGAGTCTGTTTCAGAGTCCCATCCTGTGTTTCTGATCTCTAGGACCAATCTCAGTGTCCAT
                                                                                                                                                                      sequence

Savez-vous comment faire cela en R?

Merci

0
Davide 27 août 2020 à 11:55

2 réponses

Meilleure réponse

Voici une option utilisant des exemples de données d'entrée simplifiés. Beaucoup d'étapes pour entrer trop dans les détails, essayez de l'exécuter petit à petit pour comprendre chaque fonction.

Fondamentalement, je divise sur refseq , puis je divise refseq en lettres, puis je mets à jour les combinaisons de positions avec alt , puis je les recolle.

df <- data.frame(alt = c("A","B", "C", "D", "E"),
                 snp_position = c(2, 4, 1, 3, 5),
                 refseq = c("-----", "-----",
                            "=====", "=====", "====="),
                 stringsAsFactors = FALSE)

res <- stack(
  lapply(split(df, df$refseq), function(i){
    s <- strsplit(i$refseq[ 1 ], "")[[ 1 ]]
    pos <- i$snp_position
    alt <- i$alt
    unlist(
      lapply(seq(nrow(i)), function(j) 
        combn(pos, j,
              FUN = function(k){
                res <- s
                res[ k ] <- alt[ which(pos %in% k) ]
                paste(res, collapse = "")
              }))
    )
  }))
colnames(res) <- c("sequence", "refseq")

res
#      sequence refseq
#   1     -A---  -----
#   2     ---B-  -----
#   3     -A-B-  -----
#   4     C====  =====
#   5     ==D==  =====
#   6     ====E  =====
#   7     C=D==  =====
#   8     C===E  =====
#   9     ==D=E  =====
#   10    C=D=E  =====
0
zx8754 27 août 2020 à 12:34

Je n'ai utilisé que les deux premières lignes et les colonnes d'intérêt. J'ai également réduit la taille du refseq et utilisé snp_position = 2 pour une meilleure visualisation. Cependant, cela fonctionnera de la même manière avec vos données complètes dès que les noms de colonnes seront identiques.

Contribution:

df <- data.frame(alt=c("G","A"),snp_position=c(2,2),refseq=c("CCAGGGAGAGCGGGGCAGCGC",
                                                                "TCAGGGAGAGCGGGGCAGCGC"))

Une fonction:

  seq_mut <- function(refseq,snp_position,Mutated_allele){
  
  combinations <- c("A","T","C","G")
  WT_allele <- substr(refseq, snp_position, snp_position)
  combinations <- combinations[combinations!=WT_allele]
  
  refseq2 <- refseq
  substr(refseq2, snp_position, snp_position) <- Mutated_allele
  combinations <- combinations[combinations!=Mutated_allele]
  
  seq <- refseq2
  for(variant in combinations){
    refseq2 <- refseq
    substr(refseq2, snp_position, snp_position) <- variant
    seq <- paste0(seq,";",refseq2)
  }
  
  return(seq)
}

Avec dplyr:

library(dplyr)
df <- df %>% rowwise() %>% mutate(ref_seq_mut=seq_mut(refseq,snp_position,alt))

Tidyr:

library(tidyr)
df <- df %>% separate(ref_seq_mut,into=c("V1","V2","V3"))%>% 
  pivot_longer(cols=refseq:V3,values_to="refseq") %>% select(-name)

Production:

alt   snp_position refseq               
  <chr>        <dbl> <chr>                
1 G                2 CCAGGGAGAGCGGGGCAGCGC
2 G                2 CGAGGGAGAGCGGGGCAGCGC
3 G                2 CAAGGGAGAGCGGGGCAGCGC
4 G                2 CTAGGGAGAGCGGGGCAGCGC
5 A                2 TCAGGGAGAGCGGGGCAGCGC
6 A                2 TAAGGGAGAGCGGGGCAGCGC
7 A                2 TTAGGGAGAGCGGGGCAGCGC
8 A                2 TGAGGGAGAGCGGGGCAGCGC
1
Flo 27 août 2020 à 11:01