Title: | Date Utils |
---|---|
Description: | Utilities for mixed frequency data. In particular, use to aggregate and normalize tabular mixed frequency data, index dates to end of period, and seasonally adjust tabular data. |
Authors: | Seth Leonard [aut, cre], Jiancong Liu [ctb] |
Maintainer: | Seth Leonard <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.1.5 |
Built: | 2024-12-14 06:46:58 UTC |
Source: | CRAN |
Add NA values to the tail of a wide data.table to be filled by forecasting routines
add_forecast_dates( dt, horizon = 1, frq = c("month", "week", "quarter", "year"), date_name = "ref_date" )
add_forecast_dates( dt, horizon = 1, frq = c("month", "week", "quarter", "year"), date_name = "ref_date" )
dt |
data.table in wide format |
horizon |
number of periods to add at specified 'frq' |
frq |
frequency for aggregation, one of '"month"', '"week"', '"quarter"', or '"year"' |
date_name |
name of date column |
NA-filled data.table in wide format
add_forecast_dates(fred[series_name == "gdp constant prices"],frq="quarter")
add_forecast_dates(fred[series_name == "gdp constant prices"],frq="quarter")
Aggregate a data.table in long format to a specified frequency
agg_to_freq( dt_long, frq = c("month", "week", "quarter", "year"), date_name = "ref_date", id_name = "series_name", value_name = "value" )
agg_to_freq( dt_long, frq = c("month", "week", "quarter", "year"), date_name = "ref_date", id_name = "series_name", value_name = "value" )
dt_long |
data.table in long format |
frq |
frequency for aggregation, one of '"month"', '"week"', '"quarter"', or '"year"' |
date_name |
name of date column |
id_name |
name of id column |
value_name |
name of value column |
Aggregated data at specified frequency in long format
out <- agg_to_freq(fred[series_name == "gdp constant prices"], frq = "year")
out <- agg_to_freq(fred[series_name == "gdp constant prices"], frq = "year")
Aggregate a data.table to a specified frequency and return wide format data
agg_to_freq_wide( dt, date_name = "ref_date", frq = c("month", "week", "quarter", "year"), id_name = "series_name", value_name = "value", dt_is_wide = FALSE )
agg_to_freq_wide( dt, date_name = "ref_date", frq = c("month", "week", "quarter", "year"), id_name = "series_name", value_name = "value", dt_is_wide = FALSE )
dt |
data.table in long format |
date_name |
name of date column |
frq |
frequency for aggregation, one of '"month"', '"week"', '"quarter"', or '"year"' |
id_name |
name of id column |
value_name |
name of value column |
dt_is_wide |
T/F, is input data 'dt' in wide format |
Aggregated data at specificed frequency in wide format
out <- agg_to_freq_wide(fred,frq="quarter")
out <- agg_to_freq_wide(fred,frq="quarter")
Return indexes of rows with only finite values
all_finite(Y)
all_finite(Y)
Y |
matrix like data object |
Indexes of rows with with only finite values
X <- matrix(1,10,2) X[3,1] <- NA all_finite(X)
X <- matrix(1,10,2) X[3,1] <- NA all_finite(X)
Return a logical indicating if all elements are 'NA'
allNA(x)
allNA(x)
x |
data vector |
A logical variable indicating all elements are 'NA'
allNA(c(NA, NA, 1, NA)) ## FALSE
allNA(c(NA, NA, 1, NA)) ## FALSE
Return indexes of rows with at least one finite value
any_finite(Y)
any_finite(Y)
Y |
matrix like data object |
Indexes of rows with at least one finite value
X <- matrix(1,10,2) X[3,] <- NA any_finite(X)
X <- matrix(1,10,2) X[3,] <- NA any_finite(X)
Return a logical indicating whether data at given dates can be seasonally adjusted using seas()
can_seasonal(dates)
can_seasonal(dates)
dates |
dates |
A logical variable indicating whether data can be seasonally adjusted
can_seasonal(fred$ref_date[1:20]) ## TRUE
can_seasonal(fred$ref_date[1:20]) ## TRUE
Return 'Y' with each column as a list
col_to_list(Y)
col_to_list(Y)
Y |
matrix like data object |
Each column as a list
row_to_list(matrix(rnorm(20),10,2))
row_to_list(matrix(rnorm(20),10,2))
Put the transition matrix 'B' into companion form
comp_form(B)
comp_form(B)
B |
Transition matrix from a VAR model |
Companion matrix of the input matrix
comp_form(matrix(c(1:4), nrow = 2, byrow = TRUE)) ## matrix(c(4,-2,-3,1), nrow = 2, byrow = TRUE)
comp_form(matrix(c(1:4), nrow = 2, byrow = TRUE)) ## matrix(c(4,-2,-3,1), nrow = 2, byrow = TRUE)
Return the number of finite observations in 'x'
count_obs(x)
count_obs(x)
x |
data vector |
The Number of observations
count_obs(c(1,3,5,7,9,NA)) # 5
count_obs(c(1,3,5,7,9,NA)) # 5
Return the day of a Date value as an integer
day(date)
day(date)
date |
date value formated as.Date() |
the day of the date (integer)
day(as.Date("2019-09-15")) ## 15
day(as.Date("2019-09-15")) ## 15
Wrapper for 'diff()' maintaining the same number of observations in 'x'
Diff(x, lag = 1)
Diff(x, lag = 1)
x |
data |
lag |
number of lags to use |
Differenced data
Diff(c(100,50,100,20,100,110))
Diff(c(100,50,100,20,100,110))
Return the date of the last day of the period (week, month, quarter, year). Weekly dates are indexed to Friday.
end_of_period(dates, period = c("month", "week", "quarter", "year"), shift = 0)
end_of_period(dates, period = c("month", "week", "quarter", "year"), shift = 0)
dates |
Date values formatted as.Date() |
period |
One of ''month'‘, '’week'‘, '’quarter'‘, '’year''. |
shift |
Integer, shift date forward (positive values) or backwards (negative values) by the number of periods. |
Last day of period in as.Date() format
end_of_period(as.Date("2019-09-15")) ## 2019-09-30
end_of_period(as.Date("2019-09-15")) ## 2019-09-30
Find the end of year for a vector of dates
end_of_year(dates)
end_of_year(dates)
dates |
Transition matrix from a VAR model |
The last day of the year for the dates
end_of_year(as.Date("2019-09-15")) ## 2019-12-31
end_of_year(as.Date("2019-09-15")) ## 2019-12-31
Extract character values from x excluding space and underscore
extract_basic_character(x)
extract_basic_character(x)
x |
object containing character (and other) values |
Character values without space and underscore
extract_basic_character(c("this_1one", "abc123")) ## c("thisone", "abc123)
extract_basic_character(c("this_1one", "abc123")) ## c("thisone", "abc123)
Extract character values from x including space and underscore
extract_character(x)
extract_character(x)
x |
object containing character values |
Character valus from the object
extract_character(c("this_1one", "abc123")) ## c("this_one", "abc")
extract_character(c("this_1one", "abc123")) ## c("this_one", "abc")
Extract numeric values from x
extract_numeric(x)
extract_numeric(x)
x |
object containing numeric (and other) values |
Numeric values from the object
extract_numeric(c("7+5", "abc123")) ## c(75, 123)
extract_numeric(c("7+5", "abc123")) ## c(75, 123)
Fill missing observations forward using the last finite observation
fill_forward(x)
fill_forward(x)
x |
Transition matrix from a VAR model |
x with missing obs filled by forward value
fill_forward(c(1,2,NA,NA,3,NA,5)) ## 1 2 2 2 3 3 5
fill_forward(c(1,2,NA,NA,3,NA,5)) ## 1 2 2 2 3 3 5
Return the first day of the month for each date in 'dates'
first_of_month(dates)
first_of_month(dates)
dates |
A sequence of dates in 'as.Date()' format |
First day of the month
dates <- seq.Date(from = as.Date("2020-09-11"), by = "day", length.out = 10) first_of_month(dates)
dates <- seq.Date(from = as.Date("2020-09-11"), by = "day", length.out = 10) first_of_month(dates)
Find the first date in the quarter for a vector of dates
first_of_quarter(dates)
first_of_quarter(dates)
dates |
Transition matrix from a VAR model |
The first day of the quarter for the dates
first_of_quarter(as.Date("2019-9-15")) ## 2019-07-01
first_of_quarter(as.Date("2019-9-15")) ## 2019-07-01
Return the date of the first day of the previous quarter
first_previous_quarter(date)
first_previous_quarter(date)
date |
date value formated as.Date() |
The first day of the previous quarter of the date
first_previous_quarter(as.Date("2019-09-15")) ## 2019-04-01
first_previous_quarter(as.Date("2019-09-15")) ## 2019-04-01
Sample mixed frequency data from FRED
Seth Leonard [email protected]
Library of metadata for mixed frequency dataset 'fred'
Seth Leonard [email protected]
Guess the frequency of a data series based on the pattern of missing observations
get_data_frq(x = NULL, dates)
get_data_frq(x = NULL, dates)
x |
data, potentially with missing observations |
dates |
corresponding dates in 'as.Date()' format |
The frequency of the data
dates <- as.Date(c("2020-1-1", "2020-1-15", "2020-2-1", "2020-2-15", "2020-3-1", "2020-3-15", "2020-4-1")) get_data_frq(c(1,NA,2,NA,3,NA,4), dates) ## "month"
dates <- as.Date(c("2020-1-1", "2020-1-15", "2020-2-1", "2020-2-15", "2020-3-1", "2020-3-15", "2020-4-1")) get_data_frq(c(1,NA,2,NA,3,NA,4), dates) ## "month"
Retrieve object 'what' from 'lst'
get_from_list(lst, what)
get_from_list(lst, what)
lst |
list |
what |
object to retrieve (by name or index) |
Element of the list indicated
get_from_list(list("a" = "alpha", "b" = c(1,2,3)), "a") # "alpha"
get_from_list(list("a" = "alpha", "b" = c(1,2,3)), "a") # "alpha"
Find the Friday in a given week from a sequence of Dates Vectors should be in as.Date() format
index_by_friday(dates)
index_by_friday(dates)
dates |
vector of dates |
The date of the Friday in the week of the given date
dates <- seq.Date(from = as.Date("2020-09-21"), by = "week", length.out = 10) fridays <- index_by_friday(dates) weekdays(fridays)
dates <- seq.Date(from = as.Date("2020-09-21"), by = "week", length.out = 10) fridays <- index_by_friday(dates) weekdays(fridays)
Find element of this_in that, ie 'this_in
is_in(that, this_in)
is_in(that, this_in)
that |
first object |
this_in |
second object |
Logical variables indicating whether the element exists in both objects
that <- seq.Date(from = as.Date("2020-09-15"), by = "day", length.out = 10) this_in <- seq.Date(from = as.Date("2020-09-11"), by = "day", length.out = 10) is_in(that, this_in)
that <- seq.Date(from = as.Date("2020-09-15"), by = "day", length.out = 10) this_in <- seq.Date(from = as.Date("2020-09-11"), by = "day", length.out = 10) is_in(that, this_in)
Return the latest date in each month for the values in 'dates'
last_in_month(dates)
last_in_month(dates)
dates |
A sequence of dates in 'as.Date()' format |
Last day of each month
dates <- seq.Date(from = as.Date("2020-09-11"), by = "day", length.out = 10) last_in_month(dates)
dates <- seq.Date(from = as.Date("2020-09-11"), by = "day", length.out = 10) last_in_month(dates)
Return the latest date in the quarter fop the values in 'dates'
last_in_quarter(dates)
last_in_quarter(dates)
dates |
A sequence of dates in 'as.Date()' format |
Last day of the quarter
dates <- seq.Date(from = as.Date("2020-09-11"), by = "day", length.out = 10) last_in_quarter(dates)
dates <- seq.Date(from = as.Date("2020-09-11"), by = "day", length.out = 10) last_in_quarter(dates)
Return the latest date in each week for the values in 'dates'
last_in_week(dates)
last_in_week(dates)
dates |
A sequence of dates in 'as.Date()' format |
Last day of each week
dates <- seq.Date(from = as.Date("2020-09-21"), by = "day", length.out = 10) last_in_week(dates)
dates <- seq.Date(from = as.Date("2020-09-21"), by = "day", length.out = 10) last_in_week(dates)
Return the latest date in each year for the values in 'dates'
last_in_year(dates)
last_in_year(dates)
dates |
A sequence of dates in 'as.Date()' format |
Last day of the year
dates <- seq.Date(from = as.Date("2020-09-11"), by = "day", length.out = 10) last_in_year(dates)
dates <- seq.Date(from = as.Date("2020-09-11"), by = "day", length.out = 10) last_in_year(dates)
Return the last finite observation of 'x'
last_obs(x)
last_obs(x)
x |
data potentially with non-finite values |
The last finite observation
last_obs(c(NA,1,2,3,NA,5,NA,7,NA,NA)) ## 7
last_obs(c(NA,1,2,3,NA,5,NA,7,NA,NA)) ## 7
limit the number of characters in a string and remove spacial characters (will not drop numbers)
limit_character(x, limit = 100)
limit_character(x, limit = 100)
x |
object containing character values |
limit |
maximum number of characters to return |
Character values within the limit
limit_character("a%b+&cd!efghij",limit = 3) ## "abc"
limit_character("a%b+&cd!efghij",limit = 3) ## "abc"
Find the long run variance of a VAR using the transition equation 'A' and shocks to observations 'Q'
long_run_var(A, Q, m, p)
long_run_var(A, Q, m, p)
A |
Transition matrix from a VAR model in companion form |
Q |
Covariance of shocks |
m |
Number of series in the VAR |
p |
Number of lags in the VAR |
The variance matrix
long_run_var(comp_form(matrix(c(.2,.1,.1,.2,0,0,0,0), 2, 4)), matrix(c(1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0),4,4),2, 2)
long_run_var(comp_form(matrix(c(.2,.1,.1,.2,0,0,0,0), 2, 4)), matrix(c(1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0),4,4),2, 2)
Match index values of this to that
match_index(this, that)
match_index(this, that)
this |
first object |
that |
second object |
A list of indexes indicating the elements that are matched to each other
match_index(c(1,2,3),c(2,3,4)) ## $that_idx: 1 2; $this_idx: 2 3
match_index(c(1,2,3),c(2,3,4)) ## $that_idx: 1 2; $this_idx: 2 3
Find values in 'new_ts' that correspond to dates in 'old_ts'
match_ts_dates(old_ts, new_ts)
match_ts_dates(old_ts, new_ts)
old_ts |
timeseries data |
new_ts |
timeseries data |
Timeseries data in which 'new_ts' corresponds to 'old_ts'
old_ts <- ts(c(1,2,3,4), start=c(2020,1), end=c(2020,4), frequency=4) new_ts <- ts(c(5,6,3,4), start=c(2019,4), end=c(2020,3), frequency=4) match_ts_dates(old_ts, new_ts)
old_ts <- ts(c(1,2,3,4), start=c(2020,1), end=c(2020,4), frequency=4) new_ts <- ts(c(5,6,3,4), start=c(2019,4), end=c(2020,3), frequency=4) match_ts_dates(old_ts, new_ts)
Return the mean of 'x'. If no observations, return 'NA'. This is a workaround for the fact that in data.table, ':= mean(x, na.rm = TRUE)' will return 'NaN' where there are no observations
mean_na(x)
mean_na(x)
x |
data potentially with non-finite values |
Mean of the input
mean_na(c(1,2,3,7,9,NA)) ## 4.4
mean_na(c(1,2,3,7,9,NA)) ## 4.4
Get the number of days in a month given the year and month
month_days(year, month)
month_days(year, month)
year |
integer year value |
month |
integer month value |
The number of days in the month (integer)
month_days(2021,9) ## 30 month_days(2020,2) ## 29
month_days(2021,9) ## 30 month_days(2020,2) ## 29
Return the number of finite values in a column of Y
number_finite(Y)
number_finite(Y)
Y |
matrix like data object |
The number of finite values per column
X <- matrix(1,10,2) X[3,1] <- NA number_finite(X)
X <- matrix(1,10,2) X[3,1] <- NA number_finite(X)
Create dummy variables for unique numeric values in 'x'
numdum(x)
numdum(x)
x |
Numeric vector |
Dummy variables for each unique value in the data
numdum(c(3,3,5,3,4,3,5,4,4,5)) ## dummies for each of 3, 4, and 5
numdum(c(3,3,5,3,4,3,5,4,4,5)) ## dummies for each of 3, 4, and 5
Calculate the percent change in 'y' from one period to the next
pct_chng(y, lag = 1)
pct_chng(y, lag = 1)
y |
data |
lag |
number of periods for percent change |
The percentage change among the lag period
pct_chng(c(100,50,100,20,100,110))
pct_chng(c(100,50,100,20,100,110))
Return the percent of responses to categorical answers at a specified frequency
pct_response( dt, col_name = NULL, by = c("month", "quarter", "week"), date_name = "ref_date" )
pct_response( dt, col_name = NULL, by = c("month", "quarter", "week"), date_name = "ref_date" )
dt |
data table of responses |
col_name |
name of column containing responses |
by |
frequency of response aggregation, one of '"month"', '"quarter"', '"week"' |
date_name |
name of column containing dates |
The percent of responses at the frequency
dt <- data.frame("ref_date" = seq.Date(as.Date("2000-01-01"), length.out = 100, by = "week"), "response" = c(rep("yes", 20), rep("no",50),rep("yes",30))) out <- pct_response(dt, col_name = "response")
dt <- data.frame("ref_date" = seq.Date(as.Date("2000-01-01"), length.out = 100, by = "week"), "response" = c(rep("yes", 20), rep("no",50),rep("yes",30))) out <- pct_response(dt, col_name = "response")
Process data to ensure stationarity in long format for time series modeling
process( dt, lib, detrend = TRUE, center = TRUE, scale = TRUE, as_of = NULL, date_name = "ref_date", id_name = "series_name", value_name = "value", pub_date_name = NULL, ignore_numeric_names = TRUE, silent = FALSE )
process( dt, lib, detrend = TRUE, center = TRUE, scale = TRUE, as_of = NULL, date_name = "ref_date", id_name = "series_name", value_name = "value", pub_date_name = NULL, ignore_numeric_names = TRUE, silent = FALSE )
dt |
Data in long format. |
lib |
Library with instructions regarding how to process data; see details. |
detrend |
T/F should data be detrended (see details)? |
center |
T/F should data be centered (i.e. de-meaned)? |
scale |
T/F should data be scaled (i.e. variance 1)? |
as_of |
"As of" date at which to censor observations for backesting. This requires 'pub_date_name' is specified. |
date_name |
Name of data column in the data. |
id_name |
Name of ID column in the data. |
value_name |
Name of value column in the data |
pub_date_name |
Name of publication date column in the data; required if 'as_of' specified. |
ignore_numeric_names |
T/F ignore numeric values in matching series names in 'dt' to series names in 'lib'. This is required for data aggregated using 'process_MF()', as lags of LHS and RHS data are tagged 0 for contemporaneous data, 1 for one lag, 2 for 2 lags, etc. Ignoring these tags insures processing from 'lib' is correctly identified. |
silent |
T/F, supress warnings? |
Process data can be used to transform data to insure stationarity and to censor data for backtesting. Directions for processing each file come from the data.table 'lib'. This table must include the columns 'series_name', 'take_logs', and 'take_diffs'. Unique series may also be identified by a combination of 'country' and 'series_name'. Optional columns include 'needs_SA' for series that need seasonal adjustment, 'detrend' for removing low frequency trends (nowcasting only; detrend should not be used for long horizon forecasts), 'center' to de-mean the data, and 'scale' to scale the data. If the argument to 'process_wide()' of 'detrend', 'center', or 'scale' is 'FALSE', the operation will not be performed. If 'TRUE', the function will check for the column of the same name in 'lib'. If the column exists, T/F entries from this column are used to determine which series to transform. If the column does not exist, all series will be transformed.
data.table of processed values in long format.
dt <- process(fred, fredlib) LHS <- fred[series_name == "gdp constant prices"] RHS <- fred[series_name != "gdp constant prices"] dtQ <- process_MF(LHS, RHS) dt_processed <- process(dtQ, fredlib)
dt <- process(fred, fredlib) LHS <- fred[series_name == "gdp constant prices"] RHS <- fred[series_name != "gdp constant prices"] dtQ <- process_MF(LHS, RHS) dt_processed <- process(dtQ, fredlib)
Process mixed frequency data for nowcasting applications by identifying the missing observations in the contemporaneous data and replicating this pattern of missing observations in the historical data prior to aggregation. This allows the incorporation of all available information into the model while still using uniform frequency models to actually generate predictions, and can thus be applied to a wide array of econometrics and machine learning applications.
process_MF( LHS, RHS, LHS_lags = 1, RHS_lags = 1, as_of = NULL, frq = c("auto", "week", "month", "quarter", "year"), date_name = "ref_date", id_name = "series_name", value_name = "value", pub_date_name = "pub_date", return_dt = TRUE )
process_MF( LHS, RHS, LHS_lags = 1, RHS_lags = 1, as_of = NULL, frq = c("auto", "week", "month", "quarter", "year"), date_name = "ref_date", id_name = "series_name", value_name = "value", pub_date_name = "pub_date", return_dt = TRUE )
LHS |
Left hand side data in long format. May include multiple LHS variables, but LHS variance MUST have the same frequency. |
RHS |
Right hand side data in long format at any frequency. |
LHS_lags |
Number of lags of LHS variables to include in output. |
RHS_lags |
Number of lags of RHS variables to include in output (may be 0, indicating contemporaneous values only). |
as_of |
Backtesting the model "as of" this date; requires that 'pub_date' is specified in the data |
frq |
Frequency of LHS data, one of 'week', 'month', 'quarter', 'year'. If not specified, the function will attempt to automatically identify the frequency. |
date_name |
Name of date column in data. |
id_name |
Name of ID column in the data. |
value_name |
Name of value column in the data. |
pub_date_name |
Name of publication date in the data. |
return_dt |
T/F, should the function return a 'data.table'? IF FALSE the function will return matrix data. |
Right hand side data will always include observations contemporaneous with LHS data. Use 'RHS_lags' to add lags of RHS data to the output, and 'LHS_lags' to add lags of LHS data to the output. By default the function will return data in long format designed to be used with the 'dateutils' function 'process()'. Specifying 'return_dt = FALSE' will return LHS variables in the matrix 'Y', RHS variables in the matrix 'X', and corresponding dates (by index) in the date vector 'dates'.
data.table in long format (unless ‘return_dt = FALSE'). Variables ending in ’0' are contemporaneous, ending in '1' are at one lag, '2' at two lags, etc.
LHS <- fred[series_name == "gdp constant prices"] RHS <- fred[series_name != "gdp constant prices"] dt <- process_MF(LHS, RHS)
LHS <- fred[series_name == "gdp constant prices"] RHS <- fred[series_name != "gdp constant prices"] dt <- process_MF(LHS, RHS)
Process data in wide format for time series modeling
process_wide( dt_wide, lib, detrend = TRUE, center = TRUE, scale = TRUE, date_name = "ref_date", ignore_numeric_names = TRUE, silent = FALSE )
process_wide( dt_wide, lib, detrend = TRUE, center = TRUE, scale = TRUE, date_name = "ref_date", ignore_numeric_names = TRUE, silent = FALSE )
dt_wide |
Data in wide format. |
lib |
Library with instructions regarding how to process data; see details. |
detrend |
T/F should data be detrended (see details)? |
center |
T/F should data be centered (i.e. de-meaned)? |
scale |
T/F should data be scaled (i.e. variance 1)? |
date_name |
Name of data column in the data. |
ignore_numeric_names |
T/F ignore numeric values in matching series names in 'dt' to series names in 'lib'. This is required for data aggregated using 'process_MF()', as lags of LHS and RHS data are tagged 0 for contemporaneous data, 1 for one lag, 2 for 2 lags, etc. Ignoring these tags insures processing from 'lib' is correctly identified. |
silent |
T/F, supress warnings? |
'process_wide()' can be used to transform wide data to insure stationarity. Censoring by pub_date requires long format. Directions for processing each file come from the data.table 'lib'. This table must include the columns 'series_name', 'take_logs', and 'take_diffs'. Unique series may also be identified by a combination of 'country' and 'series_name'. Optional columns include 'needs_SA' for series that need seasonal adjustment, 'detrend' for removing low frequency trends (nowcasting only; 'detrend' should not be used for long horizon forecasts), 'center' to de-mean the data, and 'scale' to scale the data. If the argument to 'process_wide()' of 'detrend', 'center', or 'scale' is 'FALSE', the operation will not be performed. If 'TRUE', the function will check for the column of the same name in 'lib'. If the column exists, T/F entries from this column are used to determine which series to transform. If the column does not exist, all series will be transformed.
data.table of processed data
LHS <- fred[series_name == "gdp constant prices"] RHS <- fred[series_name != "gdp constant prices"] dtQ <- process_MF(LHS, RHS) dt_wide <- data.table::dcast(dtQ, ref_date ~ series_name, value.var = "value") dt_processed <- process_wide(dt_wide, fredlib)
LHS <- fred[series_name == "gdp constant prices"] RHS <- fred[series_name != "gdp constant prices"] dtQ <- process_MF(LHS, RHS) dt_wide <- data.table::dcast(dtQ, ref_date ~ series_name, value.var = "value") dt_processed <- process_wide(dt_wide, fredlib)
Find the rolling maximum in 'x' with span 'n'
rollmax(x, n)
rollmax(x, n)
x |
Numeric vector |
n |
Integer span |
The maximum value of 'x' with span 'n'
rollmax(c(1,2,3), 2) ## c(2,3,3)
rollmax(c(1,2,3), 2) ## c(2,3,3)
Take the rolling mean of 'x' over 'n' elements
rollmean(x, n)
rollmean(x, n)
x |
data vector |
n |
span of rolling mean |
Rolling mean of the input
rollmean(c(1,2,3),2) ## NA, 1.5, 2.5
rollmean(c(1,2,3),2) ## NA, 1.5, 2.5
Find the rolling minimum in 'x' with span 'n'
rollmin(x, n)
rollmin(x, n)
x |
Numeric vector |
n |
Integer span |
The minimum value of 'x' with span 'n'
rollmin(c(1,2,3),2) ## c(1,1,2)
rollmin(c(1,2,3),2) ## c(1,1,2)
Return 'Y' with each row as a list
row_to_list(Y)
row_to_list(Y)
Y |
matrix like data object |
Each row as a list
row_to_list(matrix(rnorm(20),10,2))
row_to_list(matrix(rnorm(20),10,2))
Seasonaly adjust monthly or quarterly data using X-13 SEATS via seas()
run_sa(x, dates, x11 = FALSE, transfunc = c("none", "auto", "log"))
run_sa(x, dates, x11 = FALSE, transfunc = c("none", "auto", "log"))
x |
data |
dates |
dates corresponding to data 'x' |
x11 |
T/F, use x11 as opposed to X-13 SEATS |
transfunc |
Data transformation, one of 'none' for no transformation, 'auto' for automatic detection, or 'log' for log transformation |
A list with 'adj_fact' containing seasonal factors and 'sa_final' containing seasonally adjusted data.
x <- fred[series_name == "gdp constant prices", value] dates <- fred[series_name == "gdp constant prices", ref_date ] run_sa(x, dates, transfunc = "log")
x <- fred[series_name == "gdp constant prices", value] dates <- fred[series_name == "gdp constant prices", ref_date ] run_sa(x, dates, transfunc = "log")
Return the standard deviation of 'x'. If no observations, return 'NA'. This is a workaround for the fact that in data.table, ':= sd(x, na.rm = TRUE)' will return 'NaN' where there are no observations
sd_na(x)
sd_na(x)
x |
data potentially with non-finite values |
Standard deviation of the input
sd_na(c(1,2,3,NA)) ## 1
sd_na(c(1,2,3,NA)) ## 1
Seasonaly adjust multiple monthly or quarterly series in long format using X-13 SEATS via seas()
seas_df_long( df, sa_names, x11 = FALSE, transfunc = "none", series_names = "series_name", value_var = "value", date_var = "ref_date" )
seas_df_long( df, sa_names, x11 = FALSE, transfunc = "none", series_names = "series_name", value_var = "value", date_var = "ref_date" )
df |
long format dataframe |
sa_names |
names of series to seasonally adjust |
x11 |
T/F, use x11 as opposed to X-13 SEATS |
transfunc |
Data transformation, one of 'none' for no transformation, 'auto' for automatic detection, or 'log' for log transformation |
series_names |
name of column containing series names |
value_var |
name of column containing values |
date_var |
name of column containing dates |
A list with data.frames 'sa_factors' containing seasonal factors and 'values_sa' containing seasonally adjusted data.
seas_df_long(fred[series_name == "gdp constant prices"], sa_names="value")
seas_df_long(fred[series_name == "gdp constant prices"], sa_names="value")
Seasonaly adjust multiple monthly or quarterly series in wide format using X-13 SEATS via seas()
seas_df_wide(df, sa_cols, x11 = FALSE, transfunc = "none")
seas_df_wide(df, sa_cols, x11 = FALSE, transfunc = "none")
df |
wide format dataframe |
sa_cols |
names or column indexes of series to seasonally adjust |
x11 |
T/F, use x11 as opposed to X-13 SEATS |
transfunc |
Data transformation, one of 'none' for no transformation, 'auto' for automatic detection, or 'log' for log transformation |
A list with data.frames 'sa_factors' containing seasonal factors and 'values_sa' containing seasonally adjusted data.
seas_df_wide(fred[series_name == "gdp constant prices"], sa_cols="value")
seas_df_wide(fred[series_name == "gdp constant prices"], sa_cols="value")
Spline fill missing observations from the first observation to the last, leaving NA observations in the head and tail
spline_fill(x)
spline_fill(x)
x |
data with missing observations |
data with interpolated missing observations, except at head and tail, which remain NA
spline_fill_trend(c(NA,1,2,3,NA,5)) ## NA 1 2 3 4 5
spline_fill_trend(c(NA,1,2,3,NA,5)) ## NA 1 2 3 4 5
Spline fill missing observations, designed for filling low frequency trend estimates
spline_fill_trend(x)
spline_fill_trend(x)
x |
data with missing observations |
data with interpolated missing observations
spline_fill_trend(c(1,2,3,NA,5)) ## 1 2 3 4 5
spline_fill_trend(c(1,2,3,NA,5)) ## 1 2 3 4 5
Stack time series observations in VAR format over series for p lags
stack_obs(Dat, p)
stack_obs(Dat, p)
Dat |
Data in a format convertable to a matrix |
p |
number of lags, integer value |
stacked time series obs with p lags
mat <- matrix(rnorm(100),50,2) Z <- stack_obs(mat, 2) ## stack the dataset `mat` with two lags ## Note: one "lag" will just return the original dataset.
mat <- matrix(rnorm(100),50,2) Z <- stack_obs(mat, 2) ## stack the dataset `mat` with two lags ## Note: one "lag" will just return the original dataset.
Return the sum of 'x'. If no observations, return 'NA'. This is a workaround for the fact that in data.table, ':= sum()' will return 'NaN' where there are no observations
sum_na(x)
sum_na(x)
x |
data potentially with non-finite values |
Sum of the input
sum_na(c(1,2,3,NA)) # 6
sum_na(c(1,2,3,NA)) # 6
transform data in 'x' corresponding to dates in 'dates' to ts() format
to_ts(x, dates)
to_ts(x, dates)
x |
data |
dates |
dates |
data in ts() format
x <- c(1,2,3,4) dates <- as.Date(c("2020-1-1","2020-2-1","2020-3-1","2020-4-1")) to_ts(x, dates)
x <- c(1,2,3,4) dates <- as.Date(c("2020-1-1","2020-2-1","2020-3-1","2020-4-1")) to_ts(x, dates)
Return the total number of responses to categorical answers at a specified frequency
total_response( dt, col_name = NULL, by = c("month", "quarter", "week"), date_name = "ref_date" )
total_response( dt, col_name = NULL, by = c("month", "quarter", "week"), date_name = "ref_date" )
dt |
data table of responses |
col_name |
name of column containing responses |
by |
frequency of response aggregation, one of '"month"', '"quarter"', '"week"' |
date_name |
name of column containing dates |
The number of responses at the frequency
dt <- data.frame("ref_date" = seq.Date(as.Date("2000-01-01"), length.out = 100, by = "week"), "response" = c(rep("yes", 20), rep("no",50),rep("yes",30))) out <- total_response(dt, col_name = "response")
dt <- data.frame("ref_date" = seq.Date(as.Date("2000-01-01"), length.out = 100, by = "week"), "response" = c(rep("yes", 20), rep("no",50),rep("yes",30))) out <- total_response(dt, col_name = "response")
Estimate low frequency trends via loess regression and remove them. If the function errors, return x (i.e. no trend)
try_detrend(x, outlier_rm = TRUE, span = 0.6)
try_detrend(x, outlier_rm = TRUE, span = 0.6)
x |
data |
outlier_rm |
T/F, remove outliers to estimate trends? |
span |
span for the loess regression |
Data with trends removed
try_detrend(c(1,3,6,7,9,11,14,15,17,18))
try_detrend(c(1,3,6,7,9,11,14,15,17,18))
Seasonaly adjust monthly or quarterly data using X-13 SEATS via seas()
try_sa(x, dates, x11 = FALSE, transfunc = "none", series_name = NULL)
try_sa(x, dates, x11 = FALSE, transfunc = "none", series_name = NULL)
x |
data |
dates |
dates corresponding to data 'x' |
x11 |
T/F, use x11 as opposed to X-13 SEATS |
transfunc |
Data transformation, one of 'none' for no transformation, 'auto' for automatic detection, or 'log' for log transformation |
series_name |
Include series name to print out if failure (for lapply() applications) |
A list with 'adj_fact' containing seasonal factors and 'sa_final' containing seasonally adjusted data. If seasonal adjsutment failed 'adj_fact' will contain zeros and 'sa_final' will contain the original data.
x <- fred[series_name == "gdp constant prices", value] dates <- fred[series_name == "gdp constant prices", ref_date ] try_sa(x, dates, transfunc = "log")
x <- fred[series_name == "gdp constant prices", value] dates <- fred[series_name == "gdp constant prices", ref_date ] try_sa(x, dates, transfunc = "log")
Estimate low frequency trends via loess regression. If the function errors, return zeros (i.e. no trend)
try_trend(x, outlier_rm = TRUE, span = 0.6)
try_trend(x, outlier_rm = TRUE, span = 0.6)
x |
data |
outlier_rm |
T/F, remove outliers to estimate trends? |
span |
span for the loess regression |
Estimated trend in the data
try_trend(c(1,3,6,7,9,11,14,15,17,18))
try_trend(c(1,3,6,7,9,11,14,15,17,18))
Transform monthly or quarterly ts() data to a dataframe
ts_to_df(x, end_period = TRUE)
ts_to_df(x, end_period = TRUE)
x |
ts() format data which is either monthly or quarterly |
end_period |
T/F, for monthly or quarterly data, should dates be indexed to the end of the period? |
Data in dataframe format
x <- ts(c(1,2,3,4), start=c(2020,1), end=c(2020,4), frequency=4) ts_to_df(x)
x <- ts(c(1,2,3,4), start=c(2020,1), end=c(2020,4), frequency=4) ts_to_df(x)