Extract/replace arbitrary substrings from/in a string or set of strings.
extractAt
extracts multiple subsequences from XString
object x
, or from the individual sequences of XStringSet
object x
, at the ranges of positions specified thru at
.
replaceAt
performs multiple subsequence replacements (a.k.a.
substitutions) in XString object x
, or in the individual
sequences of XStringSet object x
, at the ranges of positions
specified thru at
.
extractAt(x, at) replaceAt(x, at, value="")
x |
An XString or XStringSet object. |
at |
Typically a IntegerRanges object if Alternatively, the ranges can be specified with only 1 number
per range (its start position), in which case they are considered
to be empty ranges (a.k.a. zero-width ranges). So if The following applies only if
As a special case, |
value |
The replacement sequences. If If As a special case, |
For extractAt
: An XStringSet object of the same length as
at
if x
is an XString object.
An XStringSetList object of the same length as x
(and same effective shape as at
) if x
is an
XStringSet object.
For replaceAt
: An object of the same class as x
.
If x
is an XStringSet object, its length and names and
metadata columns are preserved.
Like subseq
(defined and documented in the
XVector package), extractAt
does not copy the sequence data!
extractAt
is equivalent to extractList
(defined and documented in the IRanges package) when x
is an
XString object and at
a IntegerRanges object.
H. Pagès
The subseq
and subseq<-
functions in the XVector package for simpler forms of
subsequence extractions and replacements.
The extractList
and
unstrsplit
functions defined and
documented in the IRanges package.
The replaceLetterAt
function for a DNA-specific
single-letter replacement functions useful for SNP injections.
The padAndClip
function for padding and clipping
strings.
The XString, XStringSet, and XStringSetList classes.
The IntegerRanges, IntegerRangesList, IntegerList, and CharacterList classes defined and documented in the IRanges package.
## --------------------------------------------------------------------- ## (A) ON AN XString OBJECT ## --------------------------------------------------------------------- x <- BString("abcdefghijklm") at1 <- IRanges(5:1, width=3) extractAt(x, at1) names(at1) <- LETTERS[22:26] extractAt(x, at1) at2 <- IRanges(c(1, 5, 12), c(3, 4, 12), names=c("X", "Y", "Z")) extractAt(x, at2) extractAt(x, rev(at2)) value <- c("+", "-", "*") replaceAt(x, at2, value=value) replaceAt(x, rev(at2), value=rev(value)) at3 <- IRanges(c(14, 1, 1, 1, 1, 11), c(13, 0, 10, 0, 0, 10)) value <- 1:6 replaceAt(x, at3, value=value) # "24536klm1" replaceAt(x, rev(at3), value=rev(value)) # "54236klm1" ## Deletions: stopifnot(replaceAt(x, at2) == "defghijkm") stopifnot(replaceAt(x, rev(at2)) == "defghijkm") stopifnot(replaceAt(x, at3) == "klm") stopifnot(replaceAt(x, rev(at3)) == "klm") ## Insertions: at4 <- IRanges(c(6, 10, 2, 5), width=0) stopifnot(replaceAt(x, at4, value="-") == "a-bcd-e-fghi-jklm") stopifnot(replaceAt(x, start(at4), value="-") == "a-bcd-e-fghi-jklm") at5 <- c(5, 1, 6, 5) # 2 insertions before position 5 replaceAt(x, at5, value=c("+", "-", "*", "/")) ## No-ops: stopifnot(replaceAt(x, NULL, value=NULL) == x) stopifnot(replaceAt(x, at2, value=extractAt(x, at2)) == x) stopifnot(replaceAt(x, at3, value=extractAt(x, at3)) == x) stopifnot(replaceAt(x, at4, value=extractAt(x, at4)) == x) stopifnot(replaceAt(x, at5, value=extractAt(x, at5)) == x) ## The order of successive transformations matters: ## T1: insert "+" before position 1 and 4 ## T2: insert "-" before position 3 ## T1 followed by T2 x2a <- replaceAt(x, c(1, 4), value="+") x3a <- replaceAt(x2a, 3, value="-") ## T2 followed by T1 x2b <- replaceAt(x, 3, value="-") x3b <- replaceAt(x2b, c(1, 4), value="+") ## T1 and T2 simultaneously: x3c <- replaceAt(x, c(1, 3, 4), value=c("+", "-", "+")) ## ==> 'x3a', 'x3b', and 'x3c' are all different! ## Append "**" to 'x3c': replaceAt(x3c, length(x3c) + 1L, value="**") ## --------------------------------------------------------------------- ## (B) ON AN XStringSet OBJECT ## --------------------------------------------------------------------- x <- BStringSet(c(seq1="ABCD", seq2="abcdefghijk", seq3="XYZ")) at6 <- IRanges(c(1, 3), width=1) extractAt(x, at=at6) unstrsplit(extractAt(x, at=at6)) at7 <- IRangesList(IRanges(c(2, 1), c(3, 0)), IRanges(c(7, 2, 12, 7), c(6, 5, 11, 8)), IRanges(2, 2)) ## Set inner names on 'at7'. unlisted_at7 <- unlist(at7) names(unlisted_at7) <- paste0("rg", sprintf("%02d", seq_along(unlisted_at7))) at7 <- relist(unlisted_at7, at7) extractAt(x, at7) # same as 'as(mapply(extractAt, x, at7), "List")' extractAt(x, at7[3]) # same as 'as(mapply(extractAt, x, at7[3]), "List")' replaceAt(x, at7, value=extractAt(x, at7)) # no-op replaceAt(x, at7) # deletions at8 <- IRangesList(IRanges(1:5, width=0), IRanges(c(6, 8, 10, 7, 2, 5), width=c(0, 2, 0, 0, 0, 0)), IRanges(c(1, 2, 1), width=c(0, 1, 0))) replaceAt(x, at8, value="-") value8 <- relist(paste0("[", seq_along(unlist(at8)), "]"), at8) replaceAt(x, at8, value=value8) replaceAt(x, at8, value=as(c("+", "-", "*"), "List")) ## Append "**" to all sequences: replaceAt(x, as(width(x) + 1L, "List"), value="**") ## --------------------------------------------------------------------- ## (C) ADVANCED EXAMPLES ## --------------------------------------------------------------------- library(hgu95av2probe) probes <- DNAStringSet(hgu95av2probe) ## Split the probes in 5-mer chunks: at <- successiveIRanges(rep(5, 5)) extractAt(probes, at) ## Replace base 13 by its complement: at <- IRanges(13, width=1) base13 <- extractAt(probes, at) base13comp <- relist(complement(unlist(base13)), base13) replaceAt(probes, at, value=base13comp) ## See ?xscat for a more efficient way to do this. ## Replace all the occurences of a given pattern with another pattern: midx <- vmatchPattern("VCGTT", probes, fixed=FALSE) matches <- extractAt(probes, midx) unlist(matches) unique(unlist(matches)) probes2 <- replaceAt(probes, midx, value="-++-") ## See strings with 2 or more susbtitutions: probes2[elementNROWS(midx) >= 2] ## 2 sanity checks: stopifnot(all(replaceAt(probes, midx, value=matches) == probes)) probes2b <- gsub("[ACG]CGTT", "-++-", as.character(probes)) stopifnot(identical(as.character(probes2), probes2b))
Please choose more modern alternatives, such as Google Chrome or Mozilla Firefox.