A ModRNAString
object allows RNA sequences with modified nucleotides
to be stored and manipulated.
ModRNAString(x = "", start = 1, nchar = NA)
the input as a character
.
the postion in the character vector to use as start position in
the ModRNAString
object (default start = 1
).
the width of the character vector to use in the
ModRNAString
object (default nchar = NA
). The end position is
calculated as start + nchar - 1
.
a ModRNAString
object
The ModRNAString class contains the virtual ModString
class,
which is itself based on the XString
class. Therefore, functions for working with XString
classes are
inherited.
The alphabet of the ModRNAString class consist of the non-extended IUPAC
codes "A,G,C,U", the gap letter "-", the hard masking letter "+", the not
available letter "." and letters for individual modifications:
alphabet(ModRNAString())
.
Since the special characters are encoded differently depending on the OS and
encoding settings of the R session, it is not always possible to enter a RNA
sequence containing modified nucleotides via the R console. The most
convinient solution for this problem is to use the function
modifyNucleotides
and modify and existing RNAString or
ModRNAString object.
A ModRNAString
object can be converted into a RNAString
object
using the RNAstring()
constructor. Modified nucleotides are
automaitcally converted intro their base nucleotides.
If a modified RNA nucleotide you want to work with is not part of the alphabet, please let us know.
# Constructing ModDNAString containing an m6A and a dihydrouridine
mr1 <- ModRNAString("AGCU`D")
mr1
#> 6-letter ModRNAString object
#> seq: AGCU`D
# the alphabet of the ModRNAString class
alphabet(mr1)
#> [1] "A" "C" "G" "U" "N" "-" "+" "." "œ" "ε" "ξ" "α" "\"" "K" "O"
#> [16] "]" "±" "Γ" "}" "/" "£" "ÿ" "≠" "≈" "*" "∞" "[" "ω" "∏" "%"
#> [31] "2" ":" "B" "#" "≤" "Z" "J" "Ä" "^" "ℑ" "σ" "Ð" "Þ" "X" "'"
#> [46] "κ" "δ" "†" "4" "τ" "¤" "b" "," "½" "¼" "¾" "Δ" "π" "∫" "∪"
#> [61] "r" "l" "~" "&" "≥" "℘" "f" "⊥" "$" ")" "!" "◊" "Ѷ" "°" "×"
#> [76] "Ç" "∅" "∝" "3" "∩" "1" "5" "F" "h" "≅" "S" "{" "?" "ρ" "T"
#> [91] "∃" "Ê" "¥" "Ω" "⇑" "∉" "φ" "7" "â" "γ" "æ" "∨" "|" "∠" "R"
#> [106] "L" "λ" "β" "μ" "ℵ" "M" "ν" "χ" "η" "ζ" "`" "⇓" "Ϩ" "≡" "Ϫ"
#> [121] "√" "Θ" "E" "=" "6" "¿" "(" "e" "D" "ς" "9" "⊄" "«" "⊆" "I"
#> [136] "⊇" "8" "y" "∑" "W" "P" "Q" "š" "Î" "÷" "H" "<" ";" "Ü" "V"
#> [151] "υ" "Y" "€"
# due to encoding issues the shortNames can also be used
shortName(mr1)
#> [1] "m1Am" "m1Gm" "m1Im"
#> [4] "m1acp3Y" "m1A" "m1G"
#> [7] "m1I" "m1Y" "m2,8A"
#> [10] "ges2U" "k2C" "m2A"
#> [13] "msms2i6A" "ms2ct6A" "ms2io6A"
#> [16] "ms2hn6A" "ms2i6A" "ms2m6A"
#> [19] "ms2t6A" "se2U" "s2Um"
#> [22] "s2C" "s2U" "Am"
#> [25] "Cm" "Gm" "Im"
#> [28] "Ym" "Um" "mcmo5Um"
#> [31] "Ar(p)" "Gr(p)" "m3Um"
#> [34] "acp3D" "acp3Y" "acp3U"
#> [37] "m3C" "m3Y" "m3U"
#> [40] "imG-14" "s4U" "m5Cm"
#> [43] "m5Um" "mchm5Um" "mchm5U"
#> [46] "inm5s2U" "inm5Um" "inm5U"
#> [49] "nm5ges2U" "nm5se2U" "nm5s2U"
#> [52] "nm5U" "nchm5U" "ncm5s2U"
#> [55] "ncm5Um" "ncm5U" "chm5U"
#> [58] "cm5s2U" "cmnm5ges2U" "cmnm5se2U"
#> [61] "cmnm5s2U" "cmnm5Um" "cmnm5U"
#> [64] "cm5U" "cnm5U" "f5Cm"
#> [67] "f5C" "ho5C" "hm5C"
#> [70] "ho5U" "mcm5s2U" "mcm5Um"
#> [73] "mcm5U" "mo5U" "m5s2U"
#> [76] "mnm5ges2U" "mnm5se2U" "mnm5s2U"
#> [79] "mnm5U" "m5C" "m5D"
#> [82] "m5U" "tm5s2U" "tm5U"
#> [85] "yW-86" "yW-72" "yW-58"
#> [88] "preQ1tRNA" "preQ0tRNA" "m7G"
#> [91] "m8A" "m2Gm" "m2,7Gm"
#> [94] "m2,7G" "m2,2Gm" "m2,2,7G"
#> [97] "m2,2G" "m2G" "m4Cm"
#> [100] "m4,4Cm" "m4,4C" "ac4Cm"
#> [103] "ac4C" "m4C" "m6Am"
#> [106] "m6,6Am" "m6,6A" "io6A"
#> [109] "ac6A" "f6A" "g6A"
#> [112] "hm6A" "hn6A" "i6A"
#> [115] "m6t6A" "m6A" "t6A"
#> [118] "C+" "G+" "ct6A"
#> [121] "D" "oQtRNA" "galQtRNA"
#> [124] "gluQtRNA" "ht6A" "OHyW"
#> [127] "I" "imG2" "manQtRNA"
#> [130] "OHyWy" "mimG" "o2yW"
#> [133] "Y" "QtRNA" "OHyWx"
#> [136] "Xm" "xX" "xA"
#> [139] "xC" "xG" "xU"
#> [142] "cmo5U" "mcmo5U" "yW"
#> [145] "imG" "CoA(pN)" "acCoA(pN)"
#> [148] "malonyl-CoA(pN)" "succinyl-CoA(pN)" "p(pN)"
#> [151] "(pN)" "NAD(pN)" "pp(pN)"
#> [154] "m7Gpp(pN)" "m2,7Gpp(pN)" "m2,2,7Gpp(pN)"
#> [157] "mpp(pN)" "Gpp(pN)"
# due to encoding issues the nomenclature can also be used
nomenclature(mr1)
#> [1] "01A" "01G" "019A" "1309U" "1A" "1G"
#> [7] "19A" "19U" "28A" "21U" "21C" "2A"
#> [13] "2361A" "2164A" "2160A" "2163A" "2161A" "621A"
#> [19] "2162A" "20U" "02U" "2C" "2U" "0A"
#> [25] "0C" "0G" "09A" "09U" "0U" "0503U"
#> [31] "00A" "00G" "03U" "308U" "309U" "30U"
#> [37] "3C" "39U" "3U" "4G" "74U" "05C"
#> [43] "05U" "0522U" "522U" "2583U" "0583U" "583U"
#> [49] "21510U" "20510U" "2510U" "510U" "531U" "253U"
#> [55] "053U" "53U" "520U" "2540U" "2151U" "2051U"
#> [61] "251U" "051U" "51U" "52U" "55U" "071C"
#> [67] "71C" "50C" "51C" "50U" "2521U" "0521U"
#> [73] "521U" "501U" "25U" "21511U" "20511U" "2511U"
#> [79] "511U" "5C" "58U" "5U" "254U" "54U"
#> [85] "47G" "347G" "348G" "101G" "100G" "7G"
#> [91] "8A" "02G" "027G" "27G" "022G" "227G"
#> [97] "22G" "2G" "04C" "044C" "44C" "042C"
#> [103] "42C" "4C" "06A" "066A" "66A" "60A"
#> [109] "64A" "67A" "65A" "68A" "63A" "61A"
#> [115] "662A" "6A" "62A" "20C" "103G" "69A"
#> [121] "8U" "102G" "104G" "105G" "2165A" "34830G"
#> [127] "9A" "42G" "106G" "3480G" "342G" "34832G"
#> [133] "9U" "10G" "3470G" "0X" "X" "?A"
#> [139] "?C" "?G" "?U" "502U" "503U" "3483G"
#> [145] "34G" "455N" "4155N" "4255N" "4355N" "552N"
#> [151] "N" "255N" "553N" "79553N" "279553N" "2279553N"
#> [157] "1553N" "9553N"
# convert to RNAString
r1 <- RNAString(mr1)
r1
#> 6-letter RNAString object
#> seq: AGCUAU