Skip to content

Commit 6120334

Browse files
committed
queries and compressed files passing
1 parent df7e711 commit 6120334

File tree

13 files changed

+1546
-1190
lines changed

13 files changed

+1546
-1190
lines changed

R/RcppExports.R

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,24 @@
11
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
22
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
33

4-
.deserialize_json <- function(json, query = NULL, empty_array = NULL, empty_object = NULL, single_null = NULL, error_ok = FALSE, on_error = NULL, simplify_to = 0L, type_policy = 0L, int64_r_type = 0L) {
5-
.Call(`_RcppSimdJson_deserialize`, json, query, empty_array, empty_object, single_null, error_ok, on_error, simplify_to, type_policy, int64_r_type)
4+
.deserialize_json <- function(json, query = NULL, empty_array = NULL, empty_object = NULL, single_null = NULL, parse_error_ok = FALSE, on_parse_error = NULL, query_error_ok = FALSE, on_query_error = NULL, simplify_to = 0L, type_policy = 0L, int64_r_type = 0L) {
5+
.Call(`_RcppSimdJson_deserialize`, json, query, empty_array, empty_object, single_null, parse_error_ok, on_parse_error, query_error_ok, on_query_error, simplify_to, type_policy, int64_r_type)
66
}
77

8-
.load_json <- function(json, query = NULL, empty_array = NULL, empty_object = NULL, single_null = NULL, error_ok = FALSE, on_error = NULL, simplify_to = 0L, type_policy = 0L, int64_r_type = 0L) {
9-
.Call(`_RcppSimdJson_load`, json, query, empty_array, empty_object, single_null, error_ok, on_error, simplify_to, type_policy, int64_r_type)
8+
.load_json <- function(json, query = NULL, empty_array = NULL, empty_object = NULL, single_null = NULL, parse_error_ok = FALSE, on_parse_error = NULL, query_error_ok = FALSE, on_query_error = NULL, simplify_to = 0L, type_policy = 0L, int64_r_type = 0L) {
9+
.Call(`_RcppSimdJson_load`, json, query, empty_array, empty_object, single_null, parse_error_ok, on_parse_error, query_error_ok, on_query_error, simplify_to, type_policy, int64_r_type)
10+
}
11+
12+
.is_valid_json_arg <- function(json) {
13+
.Call(`_RcppSimdJson_is_valid_json_arg`, json)
14+
}
15+
16+
.is_valid_query_arg <- function(query) {
17+
.Call(`_RcppSimdJson_is_valid_query_arg`, query)
18+
}
19+
20+
.diagnose_input <- function(x) {
21+
.Call(`_RcppSimdJson_diagnose_input`, x)
1022
}
1123

1224
.exceptions_enabled <- function() {

R/fload.R

Lines changed: 136 additions & 183 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,22 @@
1-
.file_extension <- function(x) {
2-
base_name <- basename(x)
3-
4-
captures <- regexpr("(?<!^|[.]|/)[.]([^.]+)$", base_name, perl = TRUE)
5-
out <- rep(NA_character_, length(x))
6-
out[!is.na(x) & captures > 0L] <- substring(
7-
base_name[!is.na(x) & captures > 0L],
8-
captures[!is.na(x) & captures > 0L]
9-
)
10-
11-
out
12-
}
13-
14-
15-
.url_prefix <- function(x) {
16-
vapply(x, function(.x) {
17-
if (is.na(.x)) return(NA_character_)
18-
if (substring(.x, 1L, 8L) == "https://") {
19-
"https://"
20-
} else if ((prefix <- substring(.x, 1L, 7L)) %in% c("http://", "ftps://", "file://")) {
21-
prefix
22-
} else if (substring(.x, 1L, 6L) == "ftp://") {
23-
"ftp://"
24-
} else {
25-
NA_character_
26-
}
27-
}, character(1L), USE.NAMES = FALSE)
28-
}
29-
30-
31-
.diagnose_input <- function(x) {
32-
init <- list(
33-
input = x,
34-
url_prefix = .url_prefix(x),
35-
file_ext = .file_extension(x)
36-
)
37-
38-
# init$compressed <- tolower(init$file_ext) %in% c(".gz", ".bz", ".bz2", ".xz", ".zip")
39-
# if (any(init$compressed)) {
40-
# stop(
41-
# "Compressed files are not yet supported. The following files are affected:",
42-
# sprintf("\n\t- %s", x[init$compressed]),
43-
# call. = FALSE
44-
# )
45-
# }
46-
47-
init$type <- ifelse(
48-
!is.na(init$url_prefix), "url",
49-
ifelse(!is.na(init$file_ext), "file", NA_character_)
50-
)
51-
52-
structure(init, class = "data.frame", row.names = seq_along(x))
53-
}
54-
55-
561
#' @rdname fparse
572
#'
583
#' @order 2
594
#'
5+
#'
6+
#' @param verbose Whether to display status messages.
7+
#' \code{TRUE} or \code{FALSE}, default: \code{FALSE}
8+
#'
9+
#' @param temp_dir Directory path to use for any temporary files.
10+
#' \code{character(1L)}, default: \code{tempdir()}
11+
#'
12+
#' @param keep_temp_files Whether to remove any temporary files created by
13+
#' \code{fload()} from \code{temp_dir}.
14+
#' \code{TRUE} or \code{FALSE}, default: \code{TRUE}
15+
#'
16+
#' @param compressed_download Whether to request server-side compression on
17+
#' the downloaded document, default: \code{FALSE}
18+
#'
19+
#'
6020
#' @examples
6121
#' # load JSON files ===========================================================
6222
#' single_file <- system.file("jsonexamples/small/demo.json", package = "RcppSimdJson")
@@ -68,9 +28,10 @@
6828
#' )
6929
#' fload(multiple_files)
7030
#'
71-
#' # load remote JSON ==========================================================
31+
#'
7232
#' \dontrun{
7333
#'
34+
#' # load remote JSON ==========================================================
7435
#' a_url <- "https://api.github.com/users/lemire"
7536
#' fload(a_url)
7637
#'
@@ -81,6 +42,10 @@
8142
#' "https://api.github.com/users/dcooley"
8243
#' )
8344
#' fload(multiple_urls, query = "name", verbose = TRUE)
45+
#'
46+
#' # download compressed (faster) JSON =========================================
47+
#' fload(multiple_urls, query = "name", verbose = TRUE,
48+
#' compressed_download = TRUE)
8449
#' }
8550
#'
8651
#' @export
@@ -89,143 +54,131 @@ fload <- function(json,
8954
empty_array = NULL,
9055
empty_object = NULL,
9156
single_null = NULL,
92-
error_ok = FALSE,
93-
on_error = NULL,
57+
parse_error_ok = FALSE,
58+
on_parse_error = NULL,
59+
query_error_ok = FALSE,
60+
on_query_error = NULL,
9461
max_simplify_lvl = c("data_frame", "matrix", "vector", "list"),
9562
type_policy = c("anything_goes", "numbers", "strict"),
9663
int64_policy = c("double", "string", "integer64"),
9764
verbose = FALSE,
9865
temp_dir = tempdir(),
99-
keep_temp_files = FALSE) {
100-
# validate arguments =========================================================
101-
if (!is.character(json) || length(json) == 0L) {
102-
stop("`json=` must be a non-empty `character`.")
103-
}
104-
if (all(is.na(json))) {
105-
if (length(json) == 1L) return(json) else return(as.list(json))
106-
}
107-
108-
# if (!is.null(query) && !()) {
109-
# query <- ""
110-
# } else if (!.is_scalar_chr(query)) {
111-
# stop("`query=` must be a single, non-`NA` `character`.")
112-
# }
66+
keep_temp_files = FALSE,
67+
compressed_download = FALSE) {
68+
# validate arguments =======================================================
69+
if (!.is_valid_json_arg(json)) {
70+
stop("`json=` must be a non-empty character vector, raw vector, or a list containing raw vectors.")
71+
}
72+
if (!.is_valid_query_arg(query)) {
73+
stop("`query=` must be `NULL`, a non-empty character vector, or a list containing non-empty character vectors.")
74+
}
11375

114-
if (!.is_scalar_lgl(error_ok)) {
115-
stop("`error_ok=` must be either `TRUE` or `FALSE`.")
116-
}
76+
if (!.is_scalar_lgl(parse_error_ok)) {
77+
stop("`parse_error_ok=` must be either `TRUE` or `FALSE`.")
78+
}
79+
if (!.is_scalar_lgl(query_error_ok)) {
80+
stop("`query_error_ok=` must be either `TRUE` or `FALSE`.")
81+
}
82+
if (!.is_scalar_lgl((verbose))) {
83+
stop("`verbose=` must be either `TRUE` or `FALSE`.")
84+
}
85+
if (!.is_scalar_lgl(keep_temp_files)) {
86+
stop("`keep_temp_files=` must be either `TRUE` or `FALSE`.")
87+
}
88+
if (!.is_scalar_lgl(compressed_download)) {
89+
stop("`compressed_download=` must be either `TRUE` or `FALSE`.")
90+
}
11791

118-
if (!.is_scalar_lgl((verbose))) {
119-
stop("`verbose=` must be either `TRUE` or `FALSE`.")
120-
}
121-
if (!.is_scalar_lgl(keep_temp_files)) {
122-
stop("`keep_temp_files=` must be either `TRUE` or `FALSE`.")
123-
}
124-
if (!length(temp_dir <- Sys.glob(temp_dir))) {
125-
stop("`temp_dir=` does not exist.")
126-
}
127-
# prep options ===============================================================
128-
# max_simplify_lvl -----------------------------------------------------------
129-
if (is.character(max_simplify_lvl)) {
130-
max_simplify_lvl <- switch(
131-
match.arg(max_simplify_lvl, c("data_frame", "matrix", "vector", "list")),
132-
data_frame = 0L,
133-
matrix = 1L,
134-
vector = 2L,
135-
list = 3L,
136-
stop("Unknown `max_simplify_lvl=`.")
137-
)
138-
} else if (is.numeric(max_simplify_lvl)) {
139-
stopifnot(max_simplify_lvl %in% 0:3)
140-
} else {
141-
stop("`max_simplify_lvl=` must be of type `character` or `numeric`.")
142-
}
143-
# type_policy ----------------------------------------------------------------
144-
if (is.character(type_policy)) {
145-
type_policy <- switch(
146-
match.arg(type_policy, c("anything_goes", "numbers", "strict")),
147-
anything_goes = 0L,
148-
numbers = 1L,
149-
strict = 2L,
150-
stop("Unknown `type_policy=`.")
151-
)
152-
} else if (is.numeric(type_policy)) {
153-
stopifnot(max_simplify_lvl %in% 0:2)
154-
} else {
155-
stop("`type_policy=` must be of type `character` or `numeric`.")
156-
}
157-
# int64_policy ---------------------------------------------------------------
158-
if (is.character(int64_policy)) {
159-
int64_policy <- switch(
160-
match.arg(int64_policy, c("double", "string", "integer64")),
161-
double = 0L,
162-
string = 1L,
163-
integer64 = 2L,
164-
stop("Unknown `int64_policy=`.")
165-
)
166-
} else if (is.numeric(int64_policy)) {
167-
stopifnot(int64_policy %in% 0:2)
168-
} else {
169-
stop("`int64_policy` must be of type `character` or `numeric`.")
170-
}
92+
if (!length(temp_dir <- Sys.glob(temp_dir))) {
93+
stop("`temp_dir=` does not exist.")
94+
}
95+
# prep options =============================================================
96+
# max_simplify_lvl ---------------------------------------------------------
97+
if (is.character(max_simplify_lvl)) {
98+
max_simplify_lvl <- switch(
99+
match.arg(
100+
max_simplify_lvl,
101+
c("data_frame", "matrix", "vector", "list")
102+
),
103+
data_frame = 0L,
104+
matrix = 1L,
105+
vector = 2L,
106+
list = 3L,
107+
stop("Unknown `max_simplify_lvl=`.")
108+
)
109+
} else if (is.numeric(max_simplify_lvl)) {
110+
stopifnot(max_simplify_lvl %in% 0:3)
111+
} else {
112+
stop("`max_simplify_lvl=` must be of type `character` or `numeric`.")
113+
}
114+
# type_policy --------------------------------------------------------------
115+
if (is.character(type_policy)) {
116+
type_policy <- switch(
117+
match.arg(type_policy, c(
118+
"anything_goes", "numbers", "strict"
119+
)),
120+
anything_goes = 0L,
121+
numbers = 1L,
122+
strict = 2L,
123+
stop("Unknown `type_policy=`.")
124+
)
125+
} else if (is.numeric(type_policy)) {
126+
stopifnot(max_simplify_lvl %in% 0:2)
127+
} else {
128+
stop("`type_policy=` must be of type `character` or `numeric`.")
129+
}
130+
# int64_policy -------------------------------------------------------------
131+
if (is.character(int64_policy)) {
132+
int64_policy <- switch(
133+
match.arg(int64_policy, c("double", "string", "integer64")),
134+
double = 0L,
135+
string = 1L,
136+
integer64 = 2L,
137+
stop("Unknown `int64_policy=`.")
138+
)
139+
} else if (is.numeric(int64_policy)) {
140+
stopifnot(int64_policy %in% 0:2)
141+
} else {
142+
stop("`int64_policy` must be of type `character` or `numeric`.")
143+
}
171144

172-
if (int64_policy == 2L && !requireNamespace("bit64", quietly = TRUE)) {
173-
# nocov start
174-
stop('`int64_policy="integer64", but the {bit64} package is not installed.')
175-
# nocov end
176-
}
177-
# files or URLs? =============================================================
178-
diagnosis <- .diagnose_input(json)
179-
# URLs -----------------------------------------------------------------------
180-
if (any(diagnosis$type == "url", na.rm = TRUE)) {
181-
for (i in seq_along(json)) {
182-
if (is.na(diagnosis$type[[i]]) || diagnosis$type[[i]] != "url") {
183-
next
184-
}
145+
if (int64_policy == 2L &&
146+
!requireNamespace("bit64", quietly = TRUE)) {
147+
# nocov start
148+
stop('`int64_policy="integer64", but the {bit64} package is not installed.')
149+
# nocov end
150+
}
185151

186-
temp_file <- tempfile(fileext = diagnosis$file_ext[[i]], tmpdir = temp_dir)
152+
diagnosis <- .prep_input(json,
153+
temp_dir = temp_dir,
154+
compressed_download = compressed_download,
155+
verbose = verbose)
156+
if (!keep_temp_files) {
157+
on.exit(unlink(diagnosis$input[diagnosis$is_from_url]), add = TRUE)
158+
}
187159

188-
switch(
189-
diagnosis$url_prefix[[i]],
190-
"https://" = ,
191-
"ftps://" = ,
192-
"http://" = ,
193-
"ftp://" = download.file(diagnosis$input[[i]], destfile = temp_file, method = getOption("download.file.method", default = "auto"), quiet = !verbose),
194-
"file://" = download.file(diagnosis$input[[i]], destfile = temp_file, method = "internal", quiet = !verbose),
195-
stop("Unknown URL prefix") # nocov
196-
)
160+
input <- diagnosis$input
197161

198-
diagnosis$input[[i]] <- temp_file
199-
diagnosis$type[[i]] <- "file"
200-
if (!keep_temp_files) {
201-
on.exit(unlink(diagnosis$input[[i]]), add = TRUE)
202-
}
162+
# prep names ===============================================================
163+
if (length(names(json))) {
164+
names(input) <- names(json)
165+
} else if (!anyNA(base_names <- basename(json))) {
166+
names(input) <- base_names
203167
}
204-
}
205-
# file -----------------------------------------------------------------------
206-
if (length(missing_files <- diagnosis$input[!is.na(diagnosis$input) & !file.exists(diagnosis$input)])) {
207-
stop("The following files don't exist:",
208-
sprintf("\n\t- %s", missing_files))
209-
}
210-
input <- rep(NA_character_, length(json))
211-
input[!is.na(diagnosis$input)] <- Sys.glob(diagnosis$input[!is.na(diagnosis$input)])
212-
# prep names =================================================================
213-
if (length(names(json))) {
214-
names(input) <- names(json)
215-
} else {
216-
names(input) <- basename(json)
217-
}
218-
# load =======================================================================
219-
.load_json(
220-
json = input,
221-
query = query,
222-
empty_array = empty_array,
223-
empty_object = empty_object,
224-
error_ok = error_ok,
225-
on_error = on_error,
226-
single_null = single_null,
227-
simplify_to = max_simplify_lvl,
228-
type_policy = type_policy,
229-
int64_r_type = int64_policy
230-
)
168+
169+
# load =====================================================================
170+
.load_json(
171+
json = input,
172+
query = query,
173+
empty_array = empty_array,
174+
empty_object = empty_object,
175+
parse_error_ok = parse_error_ok,
176+
on_parse_error = on_parse_error,
177+
query_error_ok = query_error_ok,
178+
on_query_error = on_query_error,
179+
single_null = single_null,
180+
simplify_to = max_simplify_lvl,
181+
type_policy = type_policy,
182+
int64_r_type = int64_policy
183+
)
231184
}

0 commit comments

Comments
 (0)