--- title: "Getting started with cppally" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Getting started with cppally} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", cache = FALSE ) ``` ```{r, include = FALSE} required <- c("bench", "brio", "callr", "cli", "decor", "desc", "glue", "purrr", "readr", "stringr", "utils", "vctrs", "withr") if (!all(vapply(required, requireNamespace, logical(1), quietly = TRUE))) { knitr::opts_chunk$set(eval = FALSE) knitr::knit_exit() } ``` Below we explore some of the capabilities of cppally, from its custom C++ scalar and vectors, to using templates and concepts. **Note:** The classes `r_vec` and `r_vector` are aliases of one another and thus can be used interchangeably. ## Setup Let's start by loading cppally ```{r} library(cppally) ``` ```{r, include=FALSE} # Helpers to compile all examples in debug mode cpp_source <- function(..., code, debug = TRUE, env = parent.frame()){ preamble <- c("#include ", "using namespace cppally;") code <- paste(c(preamble, code), collapse = "\n") cppally::cpp_source(debug = debug, env = env, code = code, ...) } cpp_eval <- function(..., debug = TRUE, env = parent.frame()){ cppally::cpp_eval(debug = debug, env = env, ...) } # Helpers to source and display C++/R code chunk_impl <- function(x, language){ paste0("```", language, "\n", x, "\n```\n") } as_code_chunk <- function(x, language){ cat(chunk_impl(x, language)) } as_cpp_chunk <- function(x){ as_code_chunk(x, "cpp") } # Pre-register named single-line expressions so they can be referenced later register_single_exprs <- function(exprs, env = parent.frame(), ...){ if (is.null(names(exprs))){ stop("`exprs` must be named") } utils::getFromNamespace("source_single_exprs", "cppally")( exprs, env = env, ... ) wrappers <- setNames( lapply(seq_along(exprs), \(i) { fn <- get(paste0("f", i), envir = env) function() { out <- fn() if (out[["is_void"]]) invisible() else out$result } }), names(exprs) ) list2env(wrappers, envir = env) invisible() } ``` ```{r, include=FALSE} # Compile necessary examples in one-go # as it's faster when building the vignette examples <- c( hello_world = ' [[cppally::register]] void hello_world(){ print("Hello World!"); }', lgl_ops = ' [[cppally::register]] r_vec lgl_ops(){ return make_vec( r_true || r_false, // true r_true && r_false, // false r_na || r_true, // true r_na && r_true, // NA r_na && r_false, // false r_na || r_na, // NA r_na && r_na // NA ); } ', bad_lgl_print = ' [[cppally::register]] void bad_lgl_print(r_lgl condition){ if (condition){ print("true"); } else { print("false"); } } ', good_lgl_print = ' [[cppally::register]] void good_lgl_print(r_lgl condition){ if (is_na(condition)){ print("NA"); } else if (condition){ print("true"); } else { print("false"); } } ', also_good_lgl_print = ' [[cppally::register]] void also_good_lgl_print(r_lgl condition){ if (condition.is_true()){ print("true"); } else { print("not true"); } } ', cppally_equality = ' [[cppally::register]] void cppally_equality(){ r_int x = na(); r_int y = na(); r_lgl x_equal_to_y = x == y; bool x_identical_to_y = identical(x, y); // NA so not printed if ( x_equal_to_y.is_true() ){ print("x is equal to y\\n"); } // NA so not printed if ( x_equal_to_y.is_false() ){ print("x is not equal to y\\n"); } // NA so printed if (is_na(x_equal_to_y)){ print("`x == y` produces `NA`\\n"); } // Both na() therefore they are identical to each other if (x_identical_to_y){ print("x is identical to y\\n"); } } ', cppally_identical = ' template [[cppally::register]] bool cpp_identical(T x, U y){ return identical(x, y); } ', new_integer_vector = ' // Integer vector of size n [[cppally::register]] r_vec new_integer_vector(int n){ r_vec int_vctr(n, /*fill = */ r_int(0)); return int_vctr; } ', all_vectors = ' [[cppally::register]] r_vec all_vectors(){ return make_vec( arg("logical") = r_vec(), arg("integer") = r_vec(), arg("integer64") = r_vec(), // Requires bit64 arg("double") = r_vec(), arg("character") = r_vec(), arg("character") = r_vec(), arg("raw") = r_vec(), arg("date") = r_vec(), arg("date-time") = r_vec(), arg("list") = r_vec() ); } ', cppally_math = ' [[cppally::register]] r_vector cppally_math(r_dbl x){ return make_vec( arg("abs") = abs(x), arg("floor") = floor(x), arg("ceiling") = ceiling(x), arg("trunc") = trunc(x), arg("round") = round(x), arg("signif") = signif(x, 3), arg("sign") = sign(x), arg("min") = min(0, x), arg("max") = max(0, x), arg("sqrt") = sqrt(x), arg("pow") = pow(x, 2), arg("exp") = exp(x), arg("log") = log(x), arg("log_base") = log(x, 2), arg("log10") = log10(x) ); }', cpp_abs = ' template [[cppally::register]] T cpp_abs(T x){ if (is_na(x)){ return na(); } else if (x < 0){ return -x; } else { return x; } } ', scalar_default = ' // Return the default constructor result of RScalar types template [[cppally::register]] T scalar_default(T ptype){ return T(); } ', double_to_int = ' [[cppally::register]] r_int double_to_int(r_dbl x){ return as(x); } ', to_int_vec = ' [[cppally::register]] r_vec to_int_vec(r_vec x){ return as>(x); } ', coercions = ' [[cppally::register]] r_vec coercions(){ r_dbl a(4.2); r_vec b = make_vec(2.5); return make_vec( as>(a), as(a), as(b), as(b) ); } ', to_from_cpp_vec = ' [[cppally::register]] r_vector cpp_vectors_example(r_vector x){ std::vector x_cpp = as>(x); x_cpp.push_back(r_int(42)); return as>(x_cpp); } ', str_concatenate = ' [[cppally::register]] r_str str_concatenate(r_str x, r_str y, r_str sep){ std::string left = std::string(x.cpp_str()); std::string right = std::string(y.cpp_str()); std::string middle = std::string(sep.cpp_str()); std::string combined = left + middle + right; return r_str(combined.c_str()); } ', new_list = ' using list = r_vec; [[cppally::register]] list new_list(int n){ return list(n); } ', resize_all = ' [[cppally::register]] r_vec resize_all(r_vec x, r_size_t n){ r_size_t list_length = x.length(); for (r_size_t i = 0; i < list_length; ++i){ r_sexp_visit(x.view(i), [&](T vec) { x.set(i, vec.resize(n)); }); } return x; } ', new_factor = ' [[cppally::register]] r_factors new_factor(r_vec x){ return r_factors(x); } ', factor_codes = ' static_assert(!RVector); [[cppally::register]] r_vec factor_codes(r_factors x){ return x.codes(); } ', find_empty_string = ' [[cppally::register]] r_vector find_empty_string(r_vector x){ return x.find(r_str("")); } ', match_strs = ' [[cppally::register]] r_vector match_strs(r_vector x, r_vector table){ return match(x, table); } ', cpp_in = ' [[cppally::register]] r_vector cpp_in(r_vector x, r_vector table){ return x IS_IN table; } ', cpp_not_in = ' [[cppally::register]] r_vector cpp_not_in(r_vector x, r_vector table){ return !(x IS_IN table); } ', cpp_subset = ' template requires any [[cppally::register]] T cpp_subset(T x, r_vector y){ return subset(x, y); } ', cpp_negative_subset = ' template requires any [[cppally::register]] T cpp_negative_subset(T x, r_vector y){ return subset(x, y, /*invert=*/ true); } ', setnames = ' [[cppally::register]] r_vec set_list_names(r_vec x, r_vec names){ x.set_names(names); return x; } ', cpp_seq_len = ' [[cppally::register]] r_vec cpp_seq_len(r_size_t n){ return sequence(n, /* from = */ r_int(1), /* by = */ r_int(1)); } ', cpp_sequences = ' template requires (any) [[cppally::register]] r_vec cpp_sequences(r_vec size, r_vec from, r_vec by){ return pmap([](auto a, auto b, auto c){ return as(sequence(a, b, c)); }, size, from, by); } ' ) # Benchmarks need debug = FALSE benchmark_examples <- c( cpp_n_unique = ' template [[cppally::register]] r_int cpp_n_unique(T x){ return as(n_unique(x)); } ', primitive_sum = ' [[cppally::register]] double primitive_sum(const r_vec& x){ // r_vec::data_type always returns typename T using data_t = typename std::remove_cvref_t::data_type; using primitive_t = unwrap_t; primitive_t *p_x = x.data(); r_size_t n = x.length(); double sum = 0; OMP_SIMD_REDUCTION1(+:sum) for (r_size_t i = 0; i < n; ++i){ sum += p_x[i]; } return sum; } ' ) cpp_source(code = paste(examples, collapse = "\n"), debug = TRUE) cpp_source(code = paste(benchmark_examples, collapse = "\n"), debug = FALSE) # Single-line expressions, pre-registered as R functions of the same name. # Each can be invoked later as e.g. `r_true_val()` to get the evaluated result. single_exprs <- c( r_true_val = 'r_true', r_false_val = 'r_false', r_na_val = 'r_na', r_na_check = 'is_na(na())', r_plus = 'r_int(0) + r_dbl(2.5)', r_plus_na = 'na() + r_dbl(2.5)', r_minus = 'r_int(0) - r_int(1)', r_minus_na = 'na() - r_int(1)', r_mult = 'r_int(2) * r_int(3)', r_mult_na = 'na() * r_int(3)', r_div = 'r_dbl(9) / 3', r_div_na = 'na() / 3', r_lt = 'r_int(1) < r_int(2)', r_lt_na = 'na() < r_int(2)', r_lte = 'r_dbl(2) <= r_dbl(2)', r_lte_na = 'na() <= r_dbl(2)', r_gt = 'r_int(3) > r_int(2)', r_gt_na = 'na() > r_int(2)', r_gte = 'r_dbl(2) >= r_dbl(3)', r_gte_na = 'na() >= r_dbl(3)', make_vec_dbl = 'make_vec(1, 1.5, 2, na())', make_vec_dbl_named = ' make_vec( arg("first") = 1, arg("second") = 1.5, arg("third") = 2, arg("last") = na() ) ', make_vec_sexp = 'make_vec(1, 2, 3)', r_str_hello = 'r_str("hello")', r_str_hello_c_str = 'r_str("hello").c_str()', r_sym_new = 'r_sym("new_symbol")', r_sym_from_str = 'r_sym(r_str("symbol_from_string"))', cached_str_demo = 'cached_str<"cached_string">()', cached_sym_demo = 'cached_sym<"cached_symbol">()', cpp_seq = 'seq(r_dbl(1), r_dbl(5), r_dbl(0.5))', cpp_sequence = 'sequence(5, /*from = */ r_int(0), /*by = */ r_int(-1))' ) register_single_exprs(single_exprs, debug = TRUE) ``` ## Registering C++ functions (to R) To make a C++ function available to R we use the `[[cppally::register]]` tag. ``` cpp #include using namespace cppally; [[cppally::register]] void hello_world(){ print("Hello World!"); } ``` After tagging our functions we want to make them available to R. To do that we have a few routes. ### Registering C++ functions outside of a package context After writing our hello world program in foo.cpp we can use `cpp_source()` to compile and register the function to R. ``` r cpp_source(file = "src/foo.cpp") ``` Now the function is available in R ```{r} hello_world() ``` Similarly we can use the helper `cpp_eval` to run simple expressions and return the result without needing to include cppally.hpp and register the function. ```{r} cpp_eval('print("Hello World Again!")') ``` **Note** - For the rest of the examples it is assumed that the following code is always included beforehand. ``` cpp #include using namespace cppally; ``` ### Registering C++ functions inside a cppally-linked package Since cppally is header-only, we can include the headers directly into our own package. ### General steps to using cppally in a package 1. Create package (if you haven't already done so) using `usethis::create_tidy_package()` 2. Run `cppally::use_cppally()` 3. Run `cppally::document()` This will automatically add the necessary package content needed to start working with cppally. For continuous development, use `cppally::load_all()` to compile and register cppally tagged functions, including our hello world function. **Note:** We aim to integrate cppally registration into the `devtools` framework for ease-of-use. ## C++ types cppally offers a rich set of R types in C++ that are NA-aware. This means that common arithmetic and logical operations will account for `NA` in a similar fashion to R. ## Scalars ### logical scalar - `r_lgl` cppally's scalar version of `logical`, `r_lgl` can represent true, false or NA. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(paste( single_exprs[["r_true_val"]], single_exprs[["r_false_val"]], single_exprs[["r_na_val"]], sep = "\n" )) ``` ```{r, echo=FALSE} r_true_val() r_false_val() r_na_val() ``` Logical operators work just like in R ```{r, echo=FALSE, comment="", results='asis'} as_cpp_chunk(examples[["lgl_ops"]]) ``` ```{r} lgl_ops() ``` **Using `r_lgl`** in if-statements For type-safety reasons `r_lgl` cannot be implicitly converted to `bool` except in if-statements where an error is thrown if the value is `NA`. **DON'T** do this: ```{r, echo=FALSE, comment="", results='asis'} as_cpp_chunk(examples[["bad_lgl_print"]]) ``` ```{r, error=TRUE} bad_lgl_print(TRUE) bad_lgl_print(FALSE) bad_lgl_print(NA) # Can't implicitly convert NA to bool ``` **DO** this: ```{r, echo=FALSE, comment="", results='asis'} as_cpp_chunk(examples[["good_lgl_print"]]) ``` ```{r} good_lgl_print(TRUE) good_lgl_print(FALSE) good_lgl_print(NA) # NA is handled explicitly so no issues ``` We can also use `r_lgl` members `is_true()` and `is_false()` which return `bool` and are equivalent to R's `isTRUE()` and `isFALSE()` ```{r, echo=FALSE, comment="", results='asis'} as_cpp_chunk(examples[["also_good_lgl_print"]]) ``` ```{r} also_good_lgl_print(TRUE) also_good_lgl_print(FALSE) also_good_lgl_print(NA) # Falls into 'not true' branch here as expected ``` **Important:** The `&&` and `||` operators for `r_lgl` do **NOT short-circuit** like they do for `bool`. Both sides of the expression are always evaluated. If you specifically require short-circuiting behaviour, use `is_true()` and `is_false()` before using `&&` and `||`. All cppally scalar types are implemented as structs that contain the underlying C/C++ types as well as other member functions. | cppally type | Description | Built on | |:--------------|-----------------------------|:-----------------------| | `r_lgl` | Scalar logical | `int` | | `r_int` | Scalar integer | `int` | | `r_int64` | Scalar 64-bit integer | `int64_t` | | `r_dbl` | Scalar double | `double` | | `r_str` | Scalar string | `r_sexp` | | `r_str_view` | Scalar string (view) | `SEXP` | | `r_cplx` | Scalar double complex | `std::complex` | | `r_raw` | Scalar raw | `unsigned char` | | `r_sym` | Symbol | `SEXP` | | `r_date` [^1] | Scalar date | `r_dbl` | | `r_psxct` | Scalar date-time | `r_dbl` | | `r_sexp` | Generic R object (SEXP)[^2] | `SEXP` | [^1]: Unlike `r_str` which is composite and holds an `r_sexp` member, `r_date` and `r_psxct` instead inherit directly from `r_dbl`. This means that they can implicitly convert to `r_dbl` [^2]: `r_sexp` represents a generic R object which can include cppally vectors. We will explain how to disambiguate `r_sexp` later which is most useful when working with lists and data frames ### NA values Use `is_na()` to check that a value is NA and `na()` to generate NA values. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_na_check"]]) ``` ```{r, echo=FALSE} r_na_check() ``` ### C++ NA values and their R C API equivalents | Type | Value | R C API Value | |--------------------------|-------------------------|----------------| | `r_lgl` | `na()`/`r_na` | `NA_LOGICAL` | | `r_int` | `na()` | `NA_INTEGER` | | `r_int64` | `na()` | Not applicable | | `r_dbl` | `na()` | `NA_REAL` | | `r_str` | `na()` | `NA_STRING` | | `r_cplx` | `na()` | Not applicable | | `r_sym` | Not applicable | Not applicable | | `r_sexp`[^3]


| `na()`/`r_null` | `R_NilValue` | [^3]: Having an `NA` sentinel for `r_sexp` is very useful when writing templates involving vectors. For this reason the `NA` sentinel is `r_null`. This doesn't mean `is_na(r_null)` is true, and is intentionally not true because it is not a scalar and therefore cannot be `NA`. As `r_null` represents the absence of a tangible R object, it can be thought of as a zero-length object and since all `NA` values are represented as length-1 vectors (in R), `is_na(r_null)` should not return true. ## Checking equality There are two ways to check for exact equality of cppally scalars - with the `==` operator or with `identical()`. The cppally `==` operator always returns `r_lgl` and `identical()` always returns `bool`, which is a particularly important distinction when dealing with `NA` values as the former can represent `NA` while the latter cannot. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["cppally_equality"]]) ``` ```{r} cppally_equality() ``` `identical()` can not only compare scalars, but also vectors, lists, factors, and data frames. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["cppally_identical"]]) ``` ```{r} cpp_identical(3L, 3L) cpp_identical(NA, NA) cpp_identical(3L, 3) # int != double cpp_identical(1:10, 1:10) cpp_identical(list(1, 2, 3), list(3, 2, 1)) cpp_identical(iris, iris) ``` ## Scalar operators cppally also defines arithmetic and relational comparison operators for its scalar types. Like the logical and equality operators seen earlier, they are all `NA`-aware. ### Scalar arithmetic operators **Addition** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_plus"]]) ``` ```{r, echo=FALSE} r_plus() ``` **Subtraction** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_minus"]]) ``` ```{r, echo=FALSE} r_minus() ``` **Multiplication** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_mult"]]) ``` ```{r, echo=FALSE} r_mult() ``` **Division** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_div"]]) ``` ```{r, echo=FALSE} r_div() ``` **Addition with `NA`** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_plus_na"]]) ``` ```{r, echo=FALSE} r_plus_na() ``` **Subtraction with `NA`** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_minus_na"]]) ``` ```{r, echo=FALSE} r_minus_na() ``` **Multiplication with `NA`** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_mult_na"]]) ``` ```{r, echo=FALSE} r_mult_na() ``` **Division with `NA`** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_div_na"]]) ``` ```{r, echo=FALSE} r_div_na() ``` ### Scalar relational operators **Less than** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_lt"]]) ``` ```{r, echo=FALSE} r_lt() ``` **Less than or equal to** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_lte"]]) ``` ```{r, echo=FALSE} r_lte() ``` **Greater than** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_gt"]]) ``` ```{r, echo=FALSE} r_gt() ``` **Greater than or equal to** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_gte"]]) ``` ```{r, echo=FALSE} r_gte() ``` **Less than with `NA`** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_lt_na"]]) ``` ```{r, echo=FALSE} r_lt_na() ``` **Less than or equal to with `NA`** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_lte_na"]]) ``` ```{r, echo=FALSE} r_lte_na() ``` **Greater than with `NA`** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_gt_na"]]) ``` ```{r, echo=FALSE} r_gt_na() ``` **Greater than or equal to with `NA`** ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_gte_na"]]) ``` ```{r, echo=FALSE} r_gte_na() ``` Other defined operators not showcased: `++`, `--`, `+=`, `-=`, `*=`, `/=`, `%=`, `-`, `%`, `|`, `&`, `!` ## Vectors cppally vectors are templated and can be thought of as containers of scalar elements like `r_int`, `r_dbl`, etc. We can create vectors like so ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["new_integer_vector"]]) ``` ```{r} new_integer_vector(3) ``` ### inline vectors To create inline vectors, use `make_vec<>` ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["make_vec_dbl"]]) ``` ```{r, echo=FALSE} make_vec_dbl() ``` We can add names on the fly with `arg()` ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["make_vec_dbl_named"]]) ``` ```{r, echo=FALSE} make_vec_dbl_named() ``` In R a list is a generic vector, so cppally defines lists as `r_vec`, a vector of the generic type `r_sexp`. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["make_vec_sexp"]]) ``` ```{r, echo=FALSE} make_vec_sexp() ``` A list of all cppally vectors of length 0 ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["all_vectors"]]) ``` ```{r} all_vectors() ``` ## Scalar math There is a rich suite of math functions that accept cppally types. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["cppally_math"]]) ``` ```{r} cppally_math(2.5) cppally_math(NA) ``` ## Coercion To coerce from one scalar to another we can use `as` ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["double_to_int"]]) ``` ```{r} double_to_int(pi) double_to_int(NA_real_) ``` We can also coerce from one vector type to another ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["to_int_vec"]]) ``` ```{r} to_int_vec(c(0, 1.5, NA)) ``` Since `as` is extremely flexible, we can also coerce from a scalar to a vector or vice versa ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["coercions"]]) ``` ```{r} coercions() ``` We can even coerce to and from C++ vectors ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["to_from_cpp_vec"]]) ``` ```{r} cpp_vectors_example(41L) ``` While coercing to a `std::vector` just to push back an element before coercing back might not be the most efficient, it does showcase how easy it is to work with cppally vectors and C++ vectors. ## Strings cppally provides the useful string type `r_str` We can create R strings easily ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_str_hello"]]) ``` ```{r, echo=FALSE} r_str_hello() ``` To get a C or C++ string, use the members `c_str()` and `cpp_str()` respectively C string via `c_str()` ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_str_hello_c_str"]]) ``` ```{r, echo=FALSE} r_str_hello_c_str() ``` C++ string_view via `cpp_str()` This can be converted into a std::string via its constructor ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["str_concatenate"]]) ``` ```{r} str_concatenate("hello", "how are you?", sep = ", ") ``` ## Symbols Symbols have class `r_sym` and can be created directly from a string literal ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_sym_new"]]) ``` ```{r, echo=FALSE} r_sym_new() ``` Or from a cppally string ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["r_sym_from_str"]]) ``` ```{r, echo=FALSE} r_sym_from_str() ``` ## Cached strings & symbols cppally provides an efficient caching strategy for constructing cppally strings/symbols from string literals `cached_str<>` ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["cached_str_demo"]]) ``` ```{r, echo=FALSE} cached_str_demo() ``` This initialises the string once, caches it (to R's CHARSXP pool), and efficiently re-uses the cached string for each subsequent call. We can cache symbols in a similar way ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["cached_sym_demo"]]) ``` ```{r, echo=FALSE} cached_sym_demo() ``` ## Lists `r_sexp` is generally interpreted as an "element of a list" since lists are defined as `r_vec`, a vector that holds generic `r_sexp` elements. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["new_list"]]) ``` ```{r} new_list(0) new_list(3) ``` The problem with a class like `r_sexp` is that it is by design generic and therefore difficult to work with in C++. To disambiguate the actual type we can use `r_sexp_visit()` via a C++ lambda. **Example:** using `r_sexp_visit()` to resize every vector to length n in-place ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["resize_all"]]) ``` ## Factors We can create a factor via `r_factors()` ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["new_factor"]]) ``` ```{r} new_factor(letters) ``` In cppally, like R, factors are not vectors and therefore do not satisfy the RVector concept. To access the underlying integer codes vector, use the public `codes()` member function ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["factor_codes"]]) ``` ```{r} letter_fct <- new_factor(letters) letter_fct |> factor_codes() ``` ## Value matching Use `r_vec` member `find()` to find the **0-indexed** locations of a scalar value. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["find_empty_string"]]) ``` ```{r} x <- c("zero", "one", "two", "three", "four") find_empty_string(x) # Add empty strings x[c(1, 3)] <- "" find_empty_string(x) ``` To find the locations of multiple values (first match), use `match()`. It works like R's `base::match()` but is 0-indexed. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["match_strs"]]) ``` ```{r} letters vowels <- c("a", "e", "i", "o", "u") # cppally::match is 0-indexed match_strs(vowels, letters) ``` cppally provides the `IS_IN` infix operator, identical to R's `%in%` ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["cpp_in"]]) ``` ```{r} cpp_in(c("a", "A", NA), letters) ``` To mimic R's new `%notin%` operator, simply use the logical negation operator. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["cpp_not_in"]]) ``` **Technical note:** cppally internally negates the intermediate result of `x IS_IN table` in-place in this particular case because it satisfies specific properties of exclusivity which while not covered here, may be covered in a later vignette. This in-place negation is naturally efficient as it avoids allocating a new vector. ```{r} cpp_not_in(c("a", "A", NA), letters) ``` ## Subsetting Subsetting is 0-indexed in cppally, as is all other indexing. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["cpp_subset"]]) ``` Subsetting with **integer** indices ```{r} x <- 1:10 cpp_subset(x, 0L) # index 0 is 1st value cpp_subset(x, 9L) # index 9 is last value here cpp_subset(x, 10L) # index 10 is out-of-bounds so NA is returned cpp_subset(x, NA_integer_) # NA is returned with integer NA index ``` Subsetting with **logical** indices ```{r} cpp_subset(x, x > 5) cpp_subset(x, x > 100) # It differs to base subsetting (via `[`) cpp_subset(x, rep(NA, 10)) # cppally only returns values associated with TRUE x[rep(NA, 10)] # base R returns NA values when subsetting with NA logicals ``` base R performs negative subsetting by using negative numbers. This is not possible with our 0-indexed subset as we would have to also represent negative zero to subset everything except the first element at index 0. Instead cppally has an `invert` argument. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["cpp_negative_subset"]]) ``` ```{r} cpp_negative_subset(x, 0L) # Everything but 1st cpp_negative_subset(x, x > 5) # Everything except where x > 5 cpp_negative_subset(x, integer()) # Everything ``` Named subsetting is also supported. cppally internally hashes the vector names and performs hash lookups. For more info, see the 'Vector Names Hashing' vignette. ```{r} names(x) <- LETTERS[seq_along(x)] cpp_subset(x, c("A", "J", "Z")) cpp_negative_subset(x, "A") ``` ## Concepts and Templates One of the most powerful features of C++20 are concepts. These allow users to write human-readable templates and constraints. When writing your own templates, it is necessary to place them in headers for cppally registration to work correctly. Let's practice by creating the `abs()` function in C++ using templates and the `RMathType` concept. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["cpp_abs"]]) ``` What's nice is that it works correctly for integers and doubles while simultaneously preserving their type ```{r} cpp_abs(-4.2) cpp_abs(-3L) class(cpp_abs(-4.2)) # Double preserved class(cpp_abs(-3L)) # Integer preserved ``` This type of programming is historically tricky within the R C API and typically necessitates a switch statement that switches on the object's type, handling each type separately. With our `abs()` template, the logic is correctly handled with one set of operations. ### How it works The top-line `template ` declares a template that encapsulates `T`, an `RMathType` - a concept that contains `r_lgl`, `r_int`, `r_int64` and `r_dbl` If x is NA then we immediately also return NA via `na()` which is a templated function that returns NA of the input type `T`. To correctly register templates, the '[[cppally::register]]' tag must always go above the function name. ``` cpp template [[cppally::register]] // <--- Here T foo(T x){ return x; } ``` ### Templates without function arguments Explicit instantiation (from R) is unfortunately not possible and template types must be deduced from supplied arguments. ``` cpp template [[cppally::register]] T foo(){ return T(); } ``` Here `foo()` will not be compiled because the function has no arguments that let the compiler automatically deduce what `T` is. In C++ you would always call this function like so: `foo()`. Unfortunately we can't do that from R directly. You may get a cryptic compiler error like this ``` cpp error: no matching function for call to 'foo()' []() -> decltype(cpp_to_r(::foo())) { ``` along with an equally cryptic note ``` cpp note: couldn't deduce template parameter 'T' []() -> decltype(cpp_to_r(::foo())) { ``` Even though these kinds of templates can be written with cppally in C++, they cannot be exported to R. An obvious and somewhat ugly workaround is to include a prototype argument that allows the template parameter to be deduced from. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["scalar_default"]]) ``` ```{r} scalar_default(integer(1)) # Default is 0L scalar_default(numeric(1)) # Default is 0.0 scalar_default(character(1)) # Default is "" ``` Exporting variadic templates are also not supported. The best alternative is to use lists (`r_vec`). In the above example we used the `RScalar` concept which includes all cppally scalar types (excluding `r_sexp`). For a list of all cppally concepts, please see the **Annex** ## Attributes Attributes can be manipulated via functions defined in the attr namespace. **Example:** Adding names to a list ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["setnames"]]) ``` ```{r} set.seed(42) norm_samples <- lapply(1:5, \(x) rnorm(10, mean = x)) set_list_names(norm_samples, paste0("sample_", 1:5)) ``` More useful attribute helpers - `get_attrs()` - Returns a list of attributes (possibly `r_vec(r_null)`) - `set_attrs()` - Sets attributes to ones specified. Note: replaces any current attributes - `clear_attrs()` - Removes all attributes - `set_attr()` - Set a single attribute - `get_attr()` - Get a single attribute - `inherits1()` - Does object inherit class? - `inherits_any()` - Does object inherit at least one of the specified classes? - `inherits_all()` - Does object inherit all of the specified classes? - `modify_attrs()` - Modifies current attributes but doesn't remove any existing ones ## Regular sequences There are two core functions for generating regular sequences - `sequence()` and `seq()`. `seq()` behaves exactly like the R equivalent `base::seq()`, and `sequence()` behaves like the R equivalent `base::sequence()`, with the exception that it accepts scalar arguments instead of vector ones. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["cpp_seq"]]) ``` ```{r, echo=FALSE} cpp_seq() ``` ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(single_exprs[["cpp_sequence"]]) ``` ```{r, echo=FALSE} cpp_sequence() ``` We can also use `sequence()` to easily replicate `base::seq_len()` ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["cpp_seq_len"]]) ``` ```{r} cpp_seq_len(5) ``` It is also straightforward to replicate `base::sequence()` with `pmap()` ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(examples[["cpp_sequences"]]) ``` ```{r} cpp_sequences(1:3, from = 0L, by = 1L) |> unlist() # Same as base R sequence(1:3, from = 0L, by = 1L) ``` ## Sugar functions cppally also offers many useful and high-performance common functions in cppally/sugar **Example:** `n_unique()` - fast calculation of number of unique values. ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(benchmark_examples[["cpp_n_unique"]]) ``` ```{r} library(bench) x <- sample(1:100, 10^5, replace = TRUE) mark( base_n_unique = length(unique(x)), cppally_n_unique = cpp_n_unique(x) ) ``` More useful sugar functions - `unique()` - Like R's `unique()` but with a `sort` argument to return sorted unique values - `order()` - Like base R's order but it internally uses a hybrid approach of ska sort, count sorting, quick sort, etc. - `make_groups()` - An advanced function that returns a struct containing group IDs and number of groups (i.e number of unique group IDs). The `groups` struct contains the following members: - `r_vec ids` - The cached group IDs - `int n_groups` - Number of unique groups - `bool ordered` - Do the group IDs specify a sorting order, or are they by order-of-first-appearance? - `bool sorted` - Are the group IDs sorted? (This can also be true for order-of-first-appearance IDs) - `r_vec start()` - Returns an r_vec(n_groups) vector of start locations of each unique group, signifying the location in the data at which each group initially appeared - `r_vec counts()` - Returns an r_vec(n_groups) vector of frequency counts of each unique group - `r_vec order()` - Returns an r_vec(ids.length()) order vector. This is a 0-indexed permutation vector that can be used to return sorted group IDs - `recycle()` - Recycles supplied vectors to common length - `r_vec::subset()` - Fast subsetting of vectors **Stats sugar functions** Some statistical summary functions that are all very highly optimised for speed - `sum()` - Sum of values - `mean()` - Average of values - `range()` - Min and max range of values - `var()` - Variance ## Annex ### Symbols in R-registered templates `r_sym` is unsupported in templates when it's part of a template argument but is supported when the argument is explicitly an `r_sym`. ``` cpp [[cppally::register]] r_str symbol_to_string(r_sym x){ return as(x); } ``` ``` r hello_world_symbol <- as.symbol("hello world!") hello_world_symbol `hello world!` symbol_to_string(hello_world_symbol) [1] "hello world!" ``` ### All core cppally concepts - RIntegerType - Includes `r_lgl`, `r_int`, `r_int64` - RMathType - Includes `r_lgl`, `r_int`, `r_int64` and `r_dbl` - RStringType - Includes `r_str` and `r_str_view` - RScalar - Includes all cppally specific scalar types - RVal - Includes anything a cppally vector (`r_vec<>`) can contain: RScalar +`r_sexp` - RVector - Includes `r_vec` where `T` is an RVal - RFactor - Factors - RDataFrame - Data frames - RComposite - Includes vectors, factors and data frames - RTimeType - Includes `r_date` and `r_psxct` - RNumericType - Numeric types, including RMathType and RTimeType - RSortableType - Includes RNumericType and RStringType (strings can also be sorted) - RAtomicVector - A vector that contains RScalar elements - CppallyType - Any R type defined by R, including RVal, RVector, RFactor, RDataFrame, RSymbol - CppType - Anything that is not an CppallyType - CastableToRScalar - Anything that can be constructed or cast into an RScalar (which also includes RScalar) Other useful type traits - `unwrap_t` - Returns the underlying unwrapped type - `as_r_scalar_t` - Returns the equivalent RScalar type - `as_r_composite_t` - Returns the equivalent RComposite type - `common_r_t` - Returns the common cppally type between 2 types ### Accessing the underlying types and values While it is generally recommended not to access the underlying objects, you can do so with `unwrap()` which returns the underlying C/C++ value. For example, `unwrap(r_int(5))` will return an `int` of value `5`. To access the underlying type, use `unwrap_t<>` which always aligns with `unwrap()` The main reason for wanting to access underlying values would likely be optimisation and so `unwrap()` and `unwrap_t` allow this to be done consistently. **Example:** Summing a double vector using `r_vec::data()` member ```{r, echo=FALSE, results = 'asis'} as_cpp_chunk(benchmark_examples[["primitive_sum"]]) ``` ```{r} x <- rnorm(10^5) primitive_sum(x) ```