Skip to content

Commit b738178

Browse files
GH-33390: [R] Field-level metadata (#49631)
### Rationale for this change - `field()` in R unlike Python does not support field-level metadata. ### What changes are included in this PR? - New active bindings on `Field`: `$HasMetadata`, `$metadata` - New methods on Field: `$WithMetadata()`, `$RemoveMetadata()` - New `check_metadata` parameter in `Field$Equals()`, defaulting to FALSE - Tests for the above ### Are these changes tested? - Yes, unit tests have been added, the functionality was also tested locally in R ### Are there any user-facing changes? - Yes, metadata= now works without throwing an error. - No breaking changes, since this was already included but errored before and parameter order was not changed in the implementation. * GitHub Issue: #33390 Lead-authored-by: Max Romagnoli <massimiliano.romagnoli02@gmail.com> Co-authored-by: Nic Crane <thisisnic@gmail.com> Signed-off-by: Nic Crane <thisisnic@gmail.com>
1 parent 4ed0916 commit b738178

File tree

5 files changed

+196
-15
lines changed

5 files changed

+196
-15
lines changed

r/R/arrowExports.R

Lines changed: 18 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/R/field.R

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,18 @@
2626
#' @section Methods:
2727
#'
2828
#' - `f$ToString()`: convert to a string
29-
#' - `f$Equals(other)`: test for equality. More naturally called as `f == other`
29+
#' - `f$Equals(other, check_metadata = FALSE)`: test for equality.
30+
#' More naturally called as `f == other`
31+
#' - `f$WithMetadata(metadata)`: returns a new `Field` with the key-value
32+
#' `metadata` set. Note that all list elements in `metadata` will be coerced
33+
#' to `character`.
34+
#' - `f$RemoveMetadata()`: returns a new `Field` without metadata.
35+
#'
36+
#' @section Active bindings:
37+
#'
38+
#' - `$HasMetadata`: logical: does this `Field` have extra metadata?
39+
#' - `$metadata`: returns the key-value metadata as a named list, or `NULL`
40+
#' if no metadata is set.
3041
#'
3142
#' @name Field
3243
#' @rdname Field-class
@@ -38,8 +49,15 @@ Field <- R6Class(
3849
ToString = function() {
3950
prettier_dictionary_type(Field__ToString(self))
4051
},
41-
Equals = function(other, ...) {
42-
inherits(other, "Field") && Field__Equals(self, other)
52+
Equals = function(other, check_metadata = FALSE, ...) {
53+
inherits(other, "Field") && Field__Equals(self, other, isTRUE(check_metadata))
54+
},
55+
WithMetadata = function(metadata = NULL) {
56+
metadata <- prepare_key_value_metadata(metadata)
57+
Field__WithMetadata(self, metadata)
58+
},
59+
RemoveMetadata = function() {
60+
Field__RemoveMetadata(self)
4361
},
4462
export_to_c = function(ptr) ExportField(self, ptr)
4563
),
@@ -52,14 +70,27 @@ Field <- R6Class(
5270
},
5371
type = function() {
5472
Field__type(self)
73+
},
74+
HasMetadata = function() {
75+
Field__HasMetadata(self)
76+
},
77+
metadata = function() {
78+
if (self$HasMetadata) {
79+
as.list(Field__metadata(self))
80+
} else {
81+
NULL
82+
}
5583
}
5684
)
5785
)
58-
Field$create <- function(name, type, metadata, nullable = TRUE) {
86+
Field$create <- function(name, type, metadata = NULL, nullable = TRUE) {
5987
assert_that(inherits(name, "character"), length(name) == 1L)
6088
type <- as_type(type, name)
61-
assert_that(missing(metadata), msg = "metadata= is currently ignored")
62-
Field__initialize(enc2utf8(name), type, nullable)
89+
f <- Field__initialize(enc2utf8(name), type, nullable)
90+
if (!is.null(metadata)) {
91+
f <- f$WithMetadata(metadata)
92+
}
93+
f
6394
}
6495
#' @include arrowExports.R
6596
Field$import_from_c <- ImportField
@@ -68,11 +99,13 @@ Field$import_from_c <- ImportField
6899
#'
69100
#' @param name field name
70101
#' @param type logical type, instance of [DataType]
71-
#' @param metadata currently ignored
102+
#' @param metadata a named character vector or list to attach as field metadata.
103+
#' All values will be coerced to `character`.
72104
#' @param nullable TRUE if field is nullable
73105
#'
74106
#' @examples
75107
#' field("x", int32())
108+
#' field("x", int32(), metadata = list(key = "value"))
76109
#' @rdname Field
77110
#' @seealso [Field]
78111
#' @export

r/src/arrowExports.cpp

Lines changed: 42 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/src/field.cpp

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "./arrow_types.h"
1919

2020
#include <arrow/type.h>
21+
#include <arrow/util/key_value_metadata.h>
2122

2223
// [[arrow::export]]
2324
std::shared_ptr<arrow::Field> Field__initialize(
@@ -38,8 +39,46 @@ std::string Field__name(const std::shared_ptr<arrow::Field>& field) {
3839

3940
// [[arrow::export]]
4041
bool Field__Equals(const std::shared_ptr<arrow::Field>& field,
41-
const std::shared_ptr<arrow::Field>& other) {
42-
return field->Equals(other);
42+
const std::shared_ptr<arrow::Field>& other, bool check_metadata) {
43+
return field->Equals(other, check_metadata);
44+
}
45+
46+
// [[arrow::export]]
47+
bool Field__HasMetadata(const std::shared_ptr<arrow::Field>& field) {
48+
return field->HasMetadata();
49+
}
50+
51+
// [[arrow::export]]
52+
cpp11::writable::list Field__metadata(const std::shared_ptr<arrow::Field>& field) {
53+
auto meta = field->metadata();
54+
int64_t n = 0;
55+
if (field->HasMetadata()) {
56+
n = meta->size();
57+
}
58+
cpp11::writable::list out(n);
59+
std::vector<std::string> names_out(n);
60+
for (int i = 0; i < n; i++) {
61+
out[i] = cpp11::as_sexp(meta->value(i));
62+
names_out[i] = meta->key(i);
63+
}
64+
out.names() = names_out;
65+
return out;
66+
}
67+
68+
// [[arrow::export]]
69+
std::shared_ptr<arrow::Field> Field__WithMetadata(
70+
const std::shared_ptr<arrow::Field>& field, cpp11::strings metadata) {
71+
auto values = cpp11::as_cpp<std::vector<std::string>>(metadata);
72+
auto names = cpp11::as_cpp<std::vector<std::string>>(metadata.attr("names"));
73+
auto kv =
74+
std::make_shared<arrow::KeyValueMetadata>(std::move(names), std::move(values));
75+
return field->WithMetadata(std::move(kv));
76+
}
77+
78+
// [[arrow::export]]
79+
std::shared_ptr<arrow::Field> Field__RemoveMetadata(
80+
const std::shared_ptr<arrow::Field>& field) {
81+
return field->RemoveMetadata();
4382
}
4483

4584
// [[arrow::export]]

r/tests/testthat/test-field.R

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,58 @@ test_that("Field to C-interface", {
6363
# must clean up the pointer or we leak
6464
delete_arrow_schema(ptr)
6565
})
66+
67+
test_that("Field metadata", {
68+
x <- field("x", int32())
69+
expect_false(x$HasMetadata)
70+
expect_null(x$metadata)
71+
72+
x_meta <- field("x", int32(), metadata = list(key = "value"))
73+
expect_true(x_meta$HasMetadata)
74+
expect_identical(x_meta$metadata, list(key = "value"))
75+
76+
x_meta2 <- x$WithMetadata(list(key = "value"))
77+
expect_true(x_meta2$HasMetadata)
78+
expect_identical(x_meta2$metadata, list(key = "value"))
79+
80+
x_no_meta <- x_meta$RemoveMetadata()
81+
expect_false(x_no_meta$HasMetadata)
82+
expect_null(x_no_meta$metadata)
83+
})
84+
85+
test_that("Field$Equals with check_metadata", {
86+
x <- field("x", int32())
87+
x_meta <- field("x", int32(), metadata = list(key = "value"))
88+
89+
expect_true(x$Equals(x_meta))
90+
expect_false(x$Equals(x_meta, check_metadata = TRUE))
91+
expect_true(x == x_meta)
92+
})
93+
94+
test_that("Field WithMetadata(NULL) removes metadata", {
95+
x <- field("x", int32(), metadata = list(key = "value"))
96+
x_empty <- x$WithMetadata(NULL)
97+
expect_false(x_empty$HasMetadata)
98+
expect_null(x_empty$metadata)
99+
})
100+
101+
test_that("Field metadata IPC roundtrip", {
102+
x <- field("x", int32(), metadata = list(key = "value"))
103+
tab <- Table$create(x = 1L, schema = schema(x))
104+
bytes <- write_to_raw(tab)
105+
roundtripped <- read_ipc_stream(bytes, as_data_frame = FALSE)
106+
expect_true(roundtripped$schema$GetFieldByName("x")$Equals(x, check_metadata = TRUE))
107+
})
108+
109+
test_that("Field metadata with duplicate keys", {
110+
x <- field("x", int32(), metadata = list(a = "1", a = "2"))
111+
expect_true(x$HasMetadata)
112+
expect_length(x$metadata, 2)
113+
expect_equal(x$metadata, list(a = "1", a = "2"))
114+
})
115+
116+
test_that("Field metadata on nested struct child fields", {
117+
inner <- field("y", int32(), metadata = list(inner_key = "inner_value"))
118+
outer <- field("outer", struct__(list(inner)))
119+
expect_true(outer$type$GetFieldByName("y")$Equals(inner, check_metadata = TRUE))
120+
})

0 commit comments

Comments
 (0)