This document follows the TileDB API usage examples. A shorter introductory vignette is also available.
We will show two initial and basic examples for a dense and sparse array simply to create array data on disk to refer to later in examples that follow.
library(tiledb)
tdir <- tempdir()
uridense <- file.path(tdir, "dense")
uridensefix <- file.path(tdir, "densefix")
uridensevar <- file.path(tdir, "densevar")
uridensewkey <- file.path(tdir, "denseenc")
create_array <- function(array_name) {
# Check if the array already exists.
if (tiledb_object_type(array_name) == "ARRAY") {
message("Array already exists.")
return(invisible(NULL))
}
# The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4].
dom <- tiledb_domain(dims = c(tiledb_dim("rows", c(1L, 4L), 4L, "INT32"),
tiledb_dim("cols", c(1L, 4L), 4L, "INT32")))
# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
schema <- tiledb_array_schema(dom, attrs = tiledb_attr("a", type = "INT32"))
# Create the (empty) array on disk, and return the path invisibly
invisible(tiledb_array_create(array_name, schema))
}
write_array <- function(array_name) {
data <- array(c(c(1L, 5L, 9L, 13L),
c(2L, 6L, 10L, 14L),
c(3L, 7L, 11L, 15L),
c(4L, 8L, 12L, 16L)), dim = c(4,4))
# Open the array and write to it.
A <- tiledb_array(uri = array_name)
A[] <- data
}
create_array(uridense)
write_array(uridense)
urisparse <- file.path(tdir, "sparse")
create_array <- function(array_name) {
# Check if the array already exists.
if (tiledb_object_type(array_name) == "ARRAY") {
message("Array already exists.")
return(invisible(NULL))
}
# The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4].
dom <- tiledb_domain(dims = c(tiledb_dim("rows", c(1L, 4L), 4L, "INT32"),
tiledb_dim("cols", c(1L, 4L), 4L, "INT32")))
# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
schema = tiledb_array_schema(dom, attrs=tiledb_attr("a", type = "INT32"), sparse = TRUE)
# Create the (empty) array on disk, and return the path invisibly.
invisible(tiledb_array_create(array_name, schema))
}
write_array <- function(array_name) {
I <- c(1, 2, 2)
J <- c(1, 4, 3)
data <- c(1L, 2L, 3L)
# Open the array and write to it.
A <- tiledb_array(uri = array_name)
A[I, J] <- data
}
create_array(urisparse)
write_array(urisparse)
close_and_reopen <- function(arr, txt) {
res <- tiledb:::libtiledb_array_close(arr@ptr)
res <- tiledb:::libtiledb_array_open_with_ptr(arr@ptr, txt)
}
# Create attribute
attr <- tiledb_attr("attr", type = "INT32")
# Create attribute
attr <- tiledb_attr("a1", type = "INT32")
# Access cell value via generic or functions
cell_val_num(attr)
tiledb_attribute_get_cell_val_num(attr)
## Attribute value counts can be set via a generic method and a direct method
cell_val_num(attr) <- 3
tiledb_attribute_set_cell_val_num(attr, 3)
## set char attribute to variable length which is encoded as a NA
cell_val_num(attr) <- NA
tiledb_attribute_set_cell_val_num(attr, NA)
# ... create int attribute attr
attr <- tiledb_attr("a1", type = "INT32")
# set fill value to 42L
tiledb_attribute_set_fill_value(attr, 42L)
# ... create variable-sized attributte attr
attr <- tiledb_attr("attr", type = "CHAR")
tiledb_attribute_set_cell_val_num(attr, 3)
# set fill value to "..."
tiledb_attribute_set_fill_value(attr, "...")
# ... create domain dom
attr1 <- tiledb_attr("attr1", "INT32", filter_list = filter_list)
attr2 <- tiledb_attr("attr2", "FLOAT64", filter_list = filter_list)
# Create a dense array
schema <- tiledb_array_schema(dom, c(attr1, attr2), sparse = FALSE)
# Or, create a sparse array
# schema <- tiledb_array_schema(dom, c(attr1, attr2), sparse = TRUE)
# ... create domain dom
# ... create attributes attr1, attr2
# ... create filter lists fl1, fl2, similar to attributes
f1 <- tiledb_filter("BIT_WIDTH_REDUCTION")
f2 <- tiledb_filter("ZSTD")
fl1 <- tiledb_filter_list(c(f1))
fl2 <- tiledb_filter_list(c(f2))
# Create the schema setting the coordinates and offsets filter lists
schema <- tiledb_array_schema(dom, c(attr1, attr2),
coords_filter_list = fl1,
offsets_filter_list = fl2)
## prepare a larger 5 x 5 to embed into
tmp <- tempfile()
d1 <- tiledb_dim("d1", domain = c(1L, 5L))
d2 <- tiledb_dim("d2", domain = c(1L, 5L))
dom <- tiledb_domain(c(d1, d2))
val <- tiledb_attr(name="val", type = "INT32")
sch <- tiledb_array_schema(dom, c(val))
tiledb_array_create(tmp, sch)
dat <- matrix(as.integer(rnorm(25)*100), 5, 5)
arr <- tiledb_array(tmp, return_as = "data.frame")
arr[] <- dat
# Prepare a 2x3 dense array
# Contrary to Python, R by default stores arrays in col-major order
data <- array(c(1L, 4L, 2L, 5L, 3L, 6L), dim=c(2,3))
# Prepare the [1,2] x [2,4] subarray to write to
I <- c(1:2)
J <- c(2:4)
# Open the array and write the data to it
A <- tiledb_dense(uri = tmp)
A[I, J] <- data
unlink(tmp, recursive=TRUE)
ctx <- tiledb_ctx()
arrptr <- tiledb:::libtiledb_array_open(ctx@ptr, uridense, "WRITE")
## data: simple (integer sequence) of 1:16 times 10
vec <- 1:16 * 10L
subarr <- c(1L,4L, 1L,4L)
qryptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "WRITE")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "COL_MAJOR")
qryptr <- tiledb:::libtiledb_query_set_buffer(qryptr, "a", vec)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
res <- tiledb:::libtiledb_array_close(arrptr)
tmp <- urisparse
unlink(tmp, recursive=TRUE)
d1 <- tiledb_dim("d1", domain = c(1L, 5L))
d2 <- tiledb_dim("d2", domain = c(1L, 5L))
dom <- tiledb_domain(c(d1, d2))
val <- tiledb_attr("val", type = "INT32")
sch <- tiledb_array_schema(dom, val, sparse=TRUE)
tiledb_array_create(tmp, sch)
# Prepare some data
data <- c(3L, 4L, 1L, 2L)
I <- c(3, 4, 1, 2)
J <- c(3, 4, 2, 1)
# Open the array and write the data to it
A <- tiledb_array(uri = tmp)
A[I, J] <- data
if (dir.exists(uridensefix)) unlink(uridensefix, recursive=TRUE)
d1 <- tiledb_dim("d1", domain = c(1L, 4L))
d2 <- tiledb_dim("d2", domain = c(1L, 4L))
dom <- tiledb_domain(c(d1, d2))
vec <- 1:32 * 10L
attr <- tiledb_attr("a", type = r_to_tiledb_type(vec))
## set to two values per cell
tiledb:::libtiledb_attribute_set_cell_val_num(attr@ptr, 2)
sch <- tiledb_array_schema(dom, attr)
tiledb_array_create(uridensefix, sch)
ctx <- tiledb_ctx()
arrptr <- tiledb:::libtiledb_array_open(ctx@ptr, uridensefix, "WRITE")
subarr <- c(1L,4L, 1L,4L)
qryptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "WRITE")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "COL_MAJOR")
qryptr <- tiledb:::libtiledb_query_set_buffer(qryptr, "a", vec)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
res <- tiledb:::libtiledb_array_close(arrptr)
#TODO Higher-level R support
if (dir.exists(uridensevar)) unlink(uridensevar, recursive=TRUE)
## Define array
## The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4].
dom <- tiledb_domain(dims = c(tiledb_dim("rows", c(1L, 4L), 4L, "INT32"),
tiledb_dim("cols", c(1L, 4L), 4L, "INT32")))
attr <- tiledb_attr("a1", type = "CHAR")
## set to variable length
tiledb:::libtiledb_attribute_set_cell_val_num(attr@ptr, NA)
## now set the schema
ctx <- tiledb_ctx()
schptr <- tiledb:::libtiledb_array_schema_create(ctx@ptr, "DENSE")
tiledb:::libtiledb_array_schema_set_domain(schptr, dom@ptr)
tiledb:::libtiledb_array_schema_set_cell_order(schptr, "COL_MAJOR")
tiledb:::libtiledb_array_schema_set_tile_order(schptr, "COL_MAJOR")
tiledb:::libtiledb_array_schema_add_attribute(schptr, attr@ptr)
## Create the (empty) array on disk.
tiledb:::libtiledb_array_create(uridensevar, schptr)
data <- "abbcccddeeefghhhijjjkklmnoop";
offsets <- c(0L, 1L, 3L, 6L, 8L, 11L, 12L, 13L, 16L, 17L, 20L, 22L, 23L, 24L, 25L, 27L)
ctx <- tiledb_ctx()
arrptr <- tiledb:::libtiledb_array_open(ctx@ptr, uridensevar, "WRITE")
qryptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "WRITE")
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "COL_MAJOR")
bufptr <- tiledb:::libtiledb_query_buffer_var_char_create(offsets, data)
qryptr <- tiledb:::libtiledb_query_set_buffer_var_char(qryptr, "a1", bufptr)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
tiledb:::libtiledb_array_close(arrptr)
#TODO Higher-level R support
# 'at' uses Sys.time() from R in seconds, and shifts back 10 minutes
at <- Sys.time() - 10*60
# 'arr' is an already created array, could also be encrypted and carry key
arr <- tiledb_array_open_at(arr, "WRITE", Sys.time() - 600)
# arr is now open for writing, any suitable content can be written the usual way
# continuing from previous example on dense variable length array
# (but this works of course with any array after a write is needed
# Number of fragments
numfrag <- tiledb_query_get_fragment_num(qry)
# URI of given fragment, with 0 <= idx < numfrag
uri <- tiledb_query_get_fragment_uri(qry, idx)
# Timestamp range of given fragment, with 0 <= idx < numfrag
tsrange <- tiledb_query_get_fragment_timestamp_range(qry, idx)
# get a schema directly from storage, uri holds a valid array URI
uri <- "<array_uri>"
sch <- schema(uri)
# get an encrypted scheme directory from storage, enc_key is the AES-256 key
sch <- schema(uri, enc_key)
# get a schema from an already openened array
# using a sparse array example, works the same for dense arrays
array_name <- urisparse
A <- tiledb_array(uri = array_name, is.sparse = TRUE)
sch <- schema(A)
# one can also open encrypted arrays with key for AES-256 encryption
# and all other options (for sparse arrays, data.frame objects...)
key <- "0123456789abcdeF0123456789abcdeF"
A <- tiledb_array(uri = array_name, encryption_key = key)
sch <- schema(A)
# Get array schema, this shows the sparse accessor
# and it is similar for tiledb_dense()
A <- tiledb_array(uri = urisparse, is.sparse = TRUE)
schema <- schema(A)
# Get array type
sparse <- is.sparse(schema)
# Get tile capacity
t_capacity <- capacity(schema)
# Get tile order
t_order <- tile_order(schema)
# Get cell order
c_order <- cell_order(schema)
# Get coordinates and offset filter list
reslist <- filter_list(schema)
# Get the array domain
dom <- domain(schema)
# Get all attributes as list
attrs <- attrs(schema)
# Check if given attribute exists
has_attr <- has_attribute(schema, "attr")
# Get attribute from name
attr <- attrs(schema, "attr")
# Dump the array schema in ASCII format in the selected output
show(schema)
# ... get array schema
# ... get domain from schema
# Get the domain datatype (i.e., the datatype of all dimensions)
type <- datatype(dom)
# Get number of dimensions
dim_num <- dim(dom)
# Get all dimension
dims <- dimensions(dom)
# Get dimension by index (0 <= i < dim_num)
dim <- tiledb_domain_get_dimension_from_index(dom, 1)
# Get dimension by name
dim <- tiledb_domain_get_dimension_from_name(dom, "dimname")
# Check dimension for name
tiledb_domain_has_dimension(dom, "dimname")
# Dump the domain in ASCII format in the selected output
show(dom)
# ... get array schema
# ... get domain
# ... get dimension by index or name
# Get dimension name
dim_name <- name(dim)
# Get dimension datatype
dim_type <- datatype(dim)
# Get dimension domain
domain <- domain(dim)
# Get tile extent
tile_extent <- tile(dim)
# Dump the dimension in ASCII format in the selected output
show(dim)
# ... get array schema
# ... get attribute by index or name
# Get attribute name
attr_name <- name(attr)
# Get attribute datatype
attr_type <- datatype(attr)
# Get filter list
filter_list <- filter_list(attr)
# Check if attribute is variable-length
is_var <- tiledb_attribute_is_variable_sized(attr)
# Get number of values per cell
num <- ncells(attr)
# Get cell size for this attribute
sz <- tiledb_attribute_get_cell_size(attr)
# Get the fill value (for both fixed and variable sized attributes)
tiledb_attribute_get_fill_value(attr)
# Dump the attribute in ASCII format in the selected output
show(attr)
# dim hold a previously created or load Dimension object
fltrlst <- filter_list(dim)
# or fltrlst <- filter_list(attr) for some attribute `attr`
# get number of filter
nb <- nfilters(fltrlst)
# get max chunk size
mxsz <- max_chunk_size(fltrlst)
# get filter by index from filter list (0 <= idx < num_filters)
idx <- i
fltr <- fltrlst[idx]
# get option (that is filter-dependent) from filter
tiledb_filter_get_option(fltr, "COMPRESSION_LEVEL")
# set option (that is filter-dependent) for filter
tiledb_filter_set_option(fltr, "COMPRESSION_LEVEL", 9)
# get filter type
tiledb_filter_type(fltr)
# Open a dense array
A <- tiledb_array(uri = uridense)
# Or, open a sparse array
# A <- tiledb_sparse(uri = "<array-uri>", ctx=ctx)
# Slice only rows 1, 2 and cols 2, 3, 4
data <- A[1:2, 2:4]
show(data)
ctx <- tiledb_ctx()
arrptr <- tiledb:::libtiledb_array_open(ctx@ptr, uridense, "READ")
## subarray of rows 1,2 and cols 2,3,4
subarr <- c(1L,2L, 2L,4L)
qryptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "READ")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "COL_MAJOR")
v <- integer(6) # reserve space
qryptr <- tiledb:::libtiledb_query_set_buffer(qryptr, "a", v)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
print(v) # unformed array, no coordinates
res <- tiledb:::libtiledb_array_close(arrptr)
ctx <- tiledb_ctx()
arrptr <- tiledb:::libtiledb_array_open(ctx@ptr, uridensevar, "READ")
subarr <- c(1L,4L, 1L,4L)
bufptr <- tiledb:::libtiledb_query_buffer_var_char_alloc(arrptr, subarr, "a1", 16, 100)
qryptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "READ")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "COL_MAJOR")
qryptr <- tiledb:::libtiledb_query_set_buffer_var_char(qryptr, "a1", bufptr)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
tiledb:::libtiledb_array_close(arrptr)
mat <- tiledb:::libtiledb_query_get_buffer_var_char(bufptr)
print(mat, quote=FALSE)
# example with one fixed- and one variable-sized domain
dom <- tiledb_domain(dims = c(tiledb_dim("d1", c(1L, 4L), 4L, "INT32"),
tiledb_dim("d2", NULL, NULL, "ASCII")))
# ... add attribute(s), write content, ...
# ... arr is the array opened
# retrieve non-empty domain for fixed-sized dimension
tiledb_array_get_non_empty_domain_from_index(arr, 1)
tiledb_array_get_non_empty_domain_from_name(arr, "d1")
# retrieve non-empty domain for variable-sized dimension
tiledb_array_get_non_empty_domain_from_index(arr, 2)
tiledb_array_get_non_empty_domain_from_name(arr, "d2")
# Open the array and read as a data.frame from it.
A <- tiledb_array(uri = array_name, return_as = "data.frame",
encryption_key = encryption_key)
# Slice rows 1 and 2, and cols 2, 3 and 4
A[1:2, 2:4]
# timestamps for TileDB are milliseconds since epoch, we use
# R Datime object to pass the value
tstamp <- as.POSIXct(1577955845.678, origin="1970-01-01")
# open the array for reading at the timestamp
A <- tiledb_array_open_at(A, "READ", tstamp)
# create query, allocate result buffer, ...
# add two query range on the first dimension
qry <- tiledb_query_add_range(qry, schema, "d1", 2L, 4L)
qry <- tiledb_query_add_range(qry, schema, "d1", 6L, 8L)
# add a query range on the second dimension, using variable size
qry <- tiledb_query_add_range(qry, schema, "d2", "caaa", "gzzz")
# number of ranges given index
num <- tiledb_query_get_range_num(qry, idx)
# range start, end and stride for range i (1 <= i <= num)
rng <- tiledb_query_get_range(qry, idx, i)
# range start and end for variable-sized dimension for range i (1 <= i <= num)
strrng <- tiledb_query_get_range_var(qry, idx, i)
ctx <- tiledb_ctx()
arrptr <- tiledb:::libtiledb_array_open(ctx@ptr, uridense, "READ")
qryptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "READ")
subarr <- c(1L,4L, 1L,4L)
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
vec <- integer(4) # reserve (insufficient) space
qryptr <- tiledb:::libtiledb_query_set_buffer(qryptr, "a", vec)
finished <- FALSE
while (!finished) {
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
print(vec)
finished <- tiledb:::libtiledb_query_status(qryptr) == "COMPLETE"
}
res <- tiledb:::libtiledb_array_close(arrptr)
# time traveling is currently only accessible via the lower-level API
# we use the R Datetime type; internally TileDB uses milliseconds since epoch
tstamp <- Sys.time() - 60*60 # one hour ago
ctx <- tiledb_ctx()
arrptr <- tiledb:::libtiledb_array_open_at(ctx@ptr, uridense, "READ", tstamp)
subarr <- c(1L,2L, 2L,4L)
qryptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "READ")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "COL_MAJOR")
a <- integer(6) # reserve space
qryptr <- tiledb:::libtiledb_query_set_buffer(qryptr, "a", a)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
res <- tiledb:::libtiledb_array_close(arrptr)
a
# we can do the same with encrypted arrays
encryption_key <- "0123456789abcdeF0123456789abcdeF"
arrptr <- tiledb:::libtiledb_array_open_at_with_key(ctx@ptr, uridensewkey, "READ",
encryption_key, tstamp)
One can read by key:
Or one can retrieve all metadata at once:
# binary file to be written
uri <- tempfile(pattern = "tiledb_vfs", fileext = ".bin")
# open file
fhbuf <- tiledb_vfs_open(uri, "WRITE")
# create a binary payload from a serialized R object
payload <- as.integer(serialize(list(dbl=153, string="abcde"), NULL))
# write it and close file
tiledb_vfs_write(fhbuf, payload)
tiledb_vfs_close(fhbuf)
# write again overwriting previous write
fhbuf <- tiledb_vfs_open(uri, "WRITE")
payload <- as.integer(serialize(list(dbl=153.1, string="abcdef"), NULL))
tiledb_vfs_write(fhbuf, payload)
tiledb_vfs_close(fhbuf)
# append to existing file
fhbuf <- tiledb_vfs_open(uri, "APPEND")
payload <- as.integer(serialize(c(string="ghijkl"), NULL))
tiledb_vfs_write(fhbuf, payload)
tiledb_vfs_close(fhbuf)
# Creating a directory
if (!tiledb_vfs_is_dir("dir_A")) {
tiledb_vfs_create_dir("dir_A")
cat("Created 'dir_A'\n")
} else {
cat("'dir_A' already exists\n")
}
# Creating an (empty) file
if (!tiledb_vfs_is_file("dir_A/file_A")) {
tiledb_vfs_touch("dir_A/file_A")
cat("Created empty file 'dir_A/file_A'\n")
} else {
cat("File 'dir_A/file_A' already existed\n")
}
# Getting the file size
cat("Size of file 'dir_A/file_A': ",
tiledb_vfs_file_size("dir_A/file_A"), "\n")
# Moving files (moving directories is similar)
tiledb_vfs_move_file("dir_A/file_A", "dir_A/file_B")
# Cleaning up
tiledb_vfs_remove_file("dir_A/file_B")
tiledb_vfs_remove_dir("dir_A")
# Start collecting statistics
tiledb_stats_enable()
# ... create some query here
res <- A[1:4]
# Stop collecting statistics
tiledb_stats_disable()
# Show the statistics on the console
tiledb_stats_print()
# Save the statistics to a file
tiledb_stats_dump("my_file_name")
# You can also reset the stats as follows
tiledb_stats_reset()
result <- tryCatch({
# Create a group. The code below creates a group `my_group` and prints a
# message because (normally) it will succeed.
tiledb_group_create("/tmp/my_group")
# Create the same group again. If we attempt to create the same group
# `my_group` as shown below, TileDB will return an error.
tiledb_group_create("/tmp.my_group")
}, warning = function(w) {
cat(w)
}, error = function(e) {
cat(e)
}, finally = {}
)