Queries the provided regions and produces M and Coverage matrices and their
genomic positions. Parallelized across files using threads from the
"iscream.threads"
option. The output of query_all
may be used to create
a BSseq object: do.call(BSseq, query_all(...))
.
Usage
query_all(
bedfiles,
regions,
aligner = "biscuit",
merged = TRUE,
sparse = FALSE,
prealloc = 10000,
nthreads = NULL
)
Arguments
- bedfiles
A vector of bedfile paths
- regions
A vector, data frame or GenomicRanges of genomic regions. See details.
- aligner
The aligner used to produce the BED files - one of "biscuit", "bismark", "bsbolt".
- merged
Whether the input strands have been merged/collapsed
- sparse
Whether to return M and coverage matrices as sparse matrices ("dgCMatrix"). Set this
TRUE
only for scWGBS data- prealloc
The number of rows to initialize the matrices with. If the number of methyltion loci are approximately known, this can reduce runtime as fewer resizes need to be made.
- nthreads
Set number of threads to use overriding the
"iscream.threads"
option. See?set_threads
for more information.
Value
A named list of
M and coverage matrices
a character vector of chromosomes and numeric vector of corresponding CpG base positions
a character vector of the input sample names
Details
The input regions may be string vector in the form "chr:start-end" or a GRanges object. If a data frame is provided, they must have "chr", "start", and "end" columns.
Examples
bedfiles <- system.file("extdata", package = "iscream") |>
list.files(pattern = "[a|b|c|d].bed.gz$", full.names = TRUE)
# examine the bedfiles
colnames <- c("chr", "start", "end", "beta", "coverage")
lapply(bedfiles, function(i) knitr::kable(read.table(i, col.names = colnames)))
#> [[1]]
#>
#>
#> |chr | start| end| beta| coverage|
#> |:----|-----:|---:|----:|--------:|
#> |chr1 | 1| 2| 1.0| 1|
#> |chr1 | 3| 4| 1.0| 1|
#> |chr1 | 5| 6| 0.0| 2|
#> |chr1 | 7| 8| 0.0| 1|
#> |chr1 | 9| 10| 0.5| 2|
#> |chr1 | 11| 12| 1.0| 2|
#> |chr1 | 13| 14| 1.0| 3|
#>
#> [[2]]
#>
#>
#> |chr | start| end| beta| coverage|
#> |:----|-----:|---:|----:|--------:|
#> |chr1 | 1| 2| 0| 2|
#> |chr1 | 5| 6| 1| 2|
#> |chr1 | 7| 8| 1| 1|
#> |chr1 | 11| 12| 0| 2|
#> |chr1 | 13| 14| 1| 1|
#>
#> [[3]]
#>
#>
#> |chr | start| end| beta| coverage|
#> |:----|-----:|---:|----:|--------:|
#> |chr1 | 3| 4| 1| 2|
#> |chr1 | 7| 8| 0| 2|
#> |chr1 | 9| 10| 1| 1|
#>
#> [[4]]
#>
#>
#> |chr | start| end| beta| coverage|
#> |:----|-----:|---:|----:|--------:|
#> |chr1 | 1| 2| 1.0| 1|
#> |chr1 | 3| 4| 1.0| 2|
#> |chr1 | 7| 8| 0.0| 1|
#> |chr1 | 9| 10| 0.5| 2|
#> |chr1 | 13| 14| 1.0| 1|
#>
# make a vector of regions
regions <- c("chr1:1-6", "chr1:7-10", "chr1:11-14")
query_all(bedfiles, regions)
#> [17:18:17.930494] [iscream::query_all] [info] Querying 3 regions from 4 bedfiles
#>
#> [17:18:17.930928] [iscream::query_all] [info] Creating metadata vectors
#> [17:18:17.930976] [iscream::query_all] [info] nrows 10000 - 9993 extra rows allocated with 0 resizes
#> [17:18:17.930980] [iscream::query_all] [info] Creating dense matrix
#> $M
#> a b c d
#> chr1:2 1 0 0 1
#> chr1:4 1 0 2 2
#> chr1:6 0 2 0 0
#> chr1:8 0 1 0 0
#> chr1:10 1 0 1 1
#> chr1:12 2 0 0 0
#> chr1:14 3 1 0 1
#>
#> $Cov
#> a b c d
#> chr1:2 1 2 0 1
#> chr1:4 1 0 2 2
#> chr1:6 2 2 0 0
#> chr1:8 1 1 2 1
#> chr1:10 2 0 1 2
#> chr1:12 2 2 0 0
#> chr1:14 3 1 0 1
#>
#> $pos
#> [1] 1 3 5 7 9 11 13
#>
#> $chr
#> [1] "chr1" "chr1" "chr1" "chr1" "chr1" "chr1" "chr1"
#>
#> $sampleNames
#> [1] "a" "b" "c" "d"
#>
# for BSseq object run
if (FALSE) { # \dontrun{
library(bsseq)
do.call(BSseq, query_all(bedfiles, regions))
} # }