Creates a set of rules
Usage
ruleset(..., data_columns = NULL, meta = NULL, data_name = NULL)
# S3 method for class 'ruleset'
print(x, n = 3, ...)Arguments
- ...
a list of rules
- data_columns
optional list of schema declarations created with internal
data_column()helper.- meta
optional metadata list for v1 YAML workflows.
- data_name
optional name of the primary dataset when
check_data()receives a named list of datasets.- x
a ruleset to print
- n
a maximum number of rules to print
Examples
r1 <- rule(mpg > 10)
r2 <- rule(mpg < 20)
rs <- ruleset(r1, r2)
rs
#> <Verification Ruleset with 2 elements>
#> [1] 'Rule for: mpg' matching `mpg > 10` (allow_na: FALSE)
#> [2] 'Rule for: mpg' matching `mpg < 20` (allow_na: FALSE)
rs <- ruleset(
rule(cyl %in% c(4, 6, 8)),
rule(is.numeric(disp))
)
rs
#> <Verification Ruleset with 2 elements>
#> [1] 'Rule for: cyl' matching `cyl %in% c(4, 6, 8)` (allow_na: FALSE)
#> [2] 'Rule for: disp' matching `is.numeric(disp)` (allow_na: FALSE)
# combine row, schema, and relational checks
orders <- data.frame(order_id = 1:4, customer_id = c(10, 11, 99, NA), amount = c(10, 20, -5, 30))
customers <- data.frame(customer_id = c(10, 11, 12))
rs2 <- ruleset(
rule(amount >= 0, name = "amount must be non-negative"),
reference_rule(
local_col = "customer_id",
ref_dataset = "customers",
ref_col = "customer_id",
allow_na = TRUE
),
data_columns = list(
data_column("order_id", type = "int", optional = FALSE),
data_column("customer_id", type = "int", optional = FALSE),
data_column("amount", type = "double", optional = FALSE)
),
data_name = "orders"
)
check_data(list(orders = orders, customers = customers), rs2)
#> check_type name
#> <char> <char>
#> 1: schema Schema: column 'order_id' exists
#> 2: schema Schema: column 'order_id' has type 'int'
#> 3: schema Schema: column 'customer_id' exists
#> 4: schema Schema: column 'customer_id' has type 'int'
#> 5: schema Schema: column 'amount' exists
#> 6: schema Schema: column 'amount' has type 'double'
#> 7: row_rule amount must be non-negative
#> 8: reference_rule Reference rule: customer_id in customers$customer_id
#> expr allow_na negate tests pass fail
#> <char> <lgcl> <lgcl> <int> <int> <int>
#> 1: column_exists('order_id') FALSE FALSE 1 1 0
#> 2: column_type('order_id') == 'int' FALSE FALSE 1 1 0
#> 3: column_exists('customer_id') FALSE FALSE 1 1 0
#> 4: column_type('customer_id') == 'int' FALSE FALSE 1 0 1
#> 5: column_exists('amount') FALSE FALSE 1 1 0
#> 6: column_type('amount') == 'double' FALSE FALSE 1 1 0
#> 7: amount >= 0 FALSE FALSE 4 3 1
#> 8: customer_id %in% customers$customer_id TRUE FALSE 4 3 1
#> warn error
#> <char> <char>
#> 1:
#> 2:
#> 3:
#> 4: Column 'customer_id' does not match declared type 'int'.
#> 5:
#> 6:
#> 7:
#> 8:
#> time
#> <difftime>
#> 1: 0.000000e+00 secs
#> 2: 0.000000e+00 secs
#> 3: 0.000000e+00 secs
#> 4: 0.000000e+00 secs
#> 5: 0.000000e+00 secs
#> 6: 0.000000e+00 secs
#> 7: 4.100800e-04 secs
#> 8: 5.459785e-05 secs