# 查找图案并过滤开始位置

``````library(stringr)
Sequence <- data.frame(All = c("ggcgaagcagugcucccaguguuuuagagcuagaaauagcaaguuaaaauaaggcuaguccguuaucaacuugaaaaaguggcaccgagucggugcuu",
"aggacaacucgcuccacggccguuuuagagcuagaaauagcaaguuaaaauaaggcuaguccguuaucaacuugaaaaaguggcaccgagucggugcuu",
"cugaaauggcagcagaaacguuuuagagcuagaaauagcaaguuaaaauaaggcuaguccguuaucaacuugaaaaaguggcaccgagucggugcaacaaa",
"ggucaaagaggaggagcucguuuuagagcuagaaauagcaaguuaaaauaaggcuaguccguuaucaacuugaaaaaguggcaccgagucggugcuu"))
str_locate_all(pattern = 'gaaa', Sequence\$All)

[[1]]
start end
[1,]    33  36
[2,]    73  76

[[2]]
start end
[1,]    34  37
[2,]    74  77

[[3]]
start end
[1,]     3   6
[2,]    15  18
[3,]    32  35
[4,]    72  75

[[4]]
start end
[1,]    32  35
[2,]    72  75

``````

``````       start
1         33
2         34
3         32
4         32
``````

``````Sequence\$start <-
sapply(str_locate_all(pattern = 'gaaa', Sequence\$All),
function(z) { ind <- which(30 <= z[,1] & z[,1] <= 34); if (length(ind)) z[ind[1],1] else NA })
Sequence[,2,drop=FALSE]
#   start
# 1    33
# 2    34
# 3    32
# 4    32
``````

One `dplyr` and `purrr` solution could be:

``````map_dfr(.x = str_locate_all(pattern = "gaaa", Sequence\$All),
~ as.data.frame(.x) %>%
filter(start %in% c(30:34)),
.id = "ID")

ID start end
1  1    33  36
2  2    34  37
3  3    32  35
4  4    32  35
``````
Clare

Here is a way. It uses the output of the `str_locate_all` instruction in the question and filters it inn a `lapply` loop.

``````found <- str_locate_all(pattern = 'gaaa', Sequence\$All)
found <- lapply(found, function(x){
y <- x[, 'start']
data.frame(start = y[y >= 30 & y <= 34])
})
do.call(rbind, found)
#  start
#1    33
#2    34
#3    32
#4    32
``````