1
- .file_extension <- function (x ) {
2
- base_name <- basename(x )
3
-
4
- captures <- regexpr(" (?<!^|[.]|/)[.]([^.]+)$" , base_name , perl = TRUE )
5
- out <- rep(NA_character_ , length(x ))
6
- out [! is.na(x ) & captures > 0L ] <- substring(
7
- base_name [! is.na(x ) & captures > 0L ],
8
- captures [! is.na(x ) & captures > 0L ]
9
- )
10
-
11
- out
12
- }
13
-
14
-
15
- .url_prefix <- function (x ) {
16
- vapply(x , function (.x ) {
17
- if (is.na(.x )) return (NA_character_ )
18
- if (substring(.x , 1L , 8L ) == " https://" ) {
19
- " https://"
20
- } else if ((prefix <- substring(.x , 1L , 7L )) %in% c(" http://" , " ftps://" , " file://" )) {
21
- prefix
22
- } else if (substring(.x , 1L , 6L ) == " ftp://" ) {
23
- " ftp://"
24
- } else {
25
- NA_character_
26
- }
27
- }, character (1L ), USE.NAMES = FALSE )
28
- }
29
-
30
-
31
- .diagnose_input <- function (x ) {
32
- init <- list (
33
- input = x ,
34
- url_prefix = .url_prefix(x ),
35
- file_ext = .file_extension(x )
36
- )
37
-
38
- # init$compressed <- tolower(init$file_ext) %in% c(".gz", ".bz", ".bz2", ".xz", ".zip")
39
- # if (any(init$compressed)) {
40
- # stop(
41
- # "Compressed files are not yet supported. The following files are affected:",
42
- # sprintf("\n\t- %s", x[init$compressed]),
43
- # call. = FALSE
44
- # )
45
- # }
46
-
47
- init $ type <- ifelse(
48
- ! is.na(init $ url_prefix ), " url" ,
49
- ifelse(! is.na(init $ file_ext ), " file" , NA_character_ )
50
- )
51
-
52
- structure(init , class = " data.frame" , row.names = seq_along(x ))
53
- }
54
-
55
-
56
1
# ' @rdname fparse
57
2
# '
58
3
# ' @order 2
59
4
# '
5
+ # '
6
+ # ' @param verbose Whether to display status messages.
7
+ # ' \code{TRUE} or \code{FALSE}, default: \code{FALSE}
8
+ # '
9
+ # ' @param temp_dir Directory path to use for any temporary files.
10
+ # ' \code{character(1L)}, default: \code{tempdir()}
11
+ # '
12
+ # ' @param keep_temp_files Whether to remove any temporary files created by
13
+ # ' \code{fload()} from \code{temp_dir}.
14
+ # ' \code{TRUE} or \code{FALSE}, default: \code{TRUE}
15
+ # '
16
+ # ' @param compressed_download Whether to request server-side compression on
17
+ # ' the downloaded document, default: \code{FALSE}
18
+ # '
19
+ # '
60
20
# ' @examples
61
21
# ' # load JSON files ===========================================================
62
22
# ' single_file <- system.file("jsonexamples/small/demo.json", package = "RcppSimdJson")
68
28
# ' )
69
29
# ' fload(multiple_files)
70
30
# '
71
- # ' # load remote JSON ==========================================================
31
+ # '
72
32
# ' \dontrun{
73
33
# '
34
+ # ' # load remote JSON ==========================================================
74
35
# ' a_url <- "https://api.github.com/users/lemire"
75
36
# ' fload(a_url)
76
37
# '
81
42
# ' "https://api.github.com/users/dcooley"
82
43
# ' )
83
44
# ' fload(multiple_urls, query = "name", verbose = TRUE)
45
+ # '
46
+ # ' # download compressed (faster) JSON =========================================
47
+ # ' fload(multiple_urls, query = "name", verbose = TRUE,
48
+ # ' compressed_download = TRUE)
84
49
# ' }
85
50
# '
86
51
# ' @export
@@ -89,143 +54,131 @@ fload <- function(json,
89
54
empty_array = NULL ,
90
55
empty_object = NULL ,
91
56
single_null = NULL ,
92
- error_ok = FALSE ,
93
- on_error = NULL ,
57
+ parse_error_ok = FALSE ,
58
+ on_parse_error = NULL ,
59
+ query_error_ok = FALSE ,
60
+ on_query_error = NULL ,
94
61
max_simplify_lvl = c(" data_frame" , " matrix" , " vector" , " list" ),
95
62
type_policy = c(" anything_goes" , " numbers" , " strict" ),
96
63
int64_policy = c(" double" , " string" , " integer64" ),
97
64
verbose = FALSE ,
98
65
temp_dir = tempdir(),
99
- keep_temp_files = FALSE ) {
100
- # validate arguments =========================================================
101
- if (! is.character(json ) || length(json ) == 0L ) {
102
- stop(" `json=` must be a non-empty `character`." )
103
- }
104
- if (all(is.na(json ))) {
105
- if (length(json ) == 1L ) return (json ) else return (as.list(json ))
106
- }
107
-
108
- # if (!is.null(query) && !()) {
109
- # query <- ""
110
- # } else if (!.is_scalar_chr(query)) {
111
- # stop("`query=` must be a single, non-`NA` `character`.")
112
- # }
66
+ keep_temp_files = FALSE ,
67
+ compressed_download = FALSE ) {
68
+ # validate arguments =======================================================
69
+ if (! .is_valid_json_arg(json )) {
70
+ stop(" `json=` must be a non-empty character vector, raw vector, or a list containing raw vectors." )
71
+ }
72
+ if (! .is_valid_query_arg(query )) {
73
+ stop(" `query=` must be `NULL`, a non-empty character vector, or a list containing non-empty character vectors." )
74
+ }
113
75
114
- if (! .is_scalar_lgl(error_ok )) {
115
- stop(" `error_ok=` must be either `TRUE` or `FALSE`." )
116
- }
76
+ if (! .is_scalar_lgl(parse_error_ok )) {
77
+ stop(" `parse_error_ok=` must be either `TRUE` or `FALSE`." )
78
+ }
79
+ if (! .is_scalar_lgl(query_error_ok )) {
80
+ stop(" `query_error_ok=` must be either `TRUE` or `FALSE`." )
81
+ }
82
+ if (! .is_scalar_lgl((verbose ))) {
83
+ stop(" `verbose=` must be either `TRUE` or `FALSE`." )
84
+ }
85
+ if (! .is_scalar_lgl(keep_temp_files )) {
86
+ stop(" `keep_temp_files=` must be either `TRUE` or `FALSE`." )
87
+ }
88
+ if (! .is_scalar_lgl(compressed_download )) {
89
+ stop(" `compressed_download=` must be either `TRUE` or `FALSE`." )
90
+ }
117
91
118
- if (! .is_scalar_lgl((verbose ))) {
119
- stop(" `verbose=` must be either `TRUE` or `FALSE`." )
120
- }
121
- if (! .is_scalar_lgl(keep_temp_files )) {
122
- stop(" `keep_temp_files=` must be either `TRUE` or `FALSE`." )
123
- }
124
- if (! length(temp_dir <- Sys.glob(temp_dir ))) {
125
- stop(" `temp_dir=` does not exist." )
126
- }
127
- # prep options ===============================================================
128
- # max_simplify_lvl -----------------------------------------------------------
129
- if (is.character(max_simplify_lvl )) {
130
- max_simplify_lvl <- switch (
131
- match.arg(max_simplify_lvl , c(" data_frame" , " matrix" , " vector" , " list" )),
132
- data_frame = 0L ,
133
- matrix = 1L ,
134
- vector = 2L ,
135
- list = 3L ,
136
- stop(" Unknown `max_simplify_lvl=`." )
137
- )
138
- } else if (is.numeric(max_simplify_lvl )) {
139
- stopifnot(max_simplify_lvl %in% 0 : 3 )
140
- } else {
141
- stop(" `max_simplify_lvl=` must be of type `character` or `numeric`." )
142
- }
143
- # type_policy ----------------------------------------------------------------
144
- if (is.character(type_policy )) {
145
- type_policy <- switch (
146
- match.arg(type_policy , c(" anything_goes" , " numbers" , " strict" )),
147
- anything_goes = 0L ,
148
- numbers = 1L ,
149
- strict = 2L ,
150
- stop(" Unknown `type_policy=`." )
151
- )
152
- } else if (is.numeric(type_policy )) {
153
- stopifnot(max_simplify_lvl %in% 0 : 2 )
154
- } else {
155
- stop(" `type_policy=` must be of type `character` or `numeric`." )
156
- }
157
- # int64_policy ---------------------------------------------------------------
158
- if (is.character(int64_policy )) {
159
- int64_policy <- switch (
160
- match.arg(int64_policy , c(" double" , " string" , " integer64" )),
161
- double = 0L ,
162
- string = 1L ,
163
- integer64 = 2L ,
164
- stop(" Unknown `int64_policy=`." )
165
- )
166
- } else if (is.numeric(int64_policy )) {
167
- stopifnot(int64_policy %in% 0 : 2 )
168
- } else {
169
- stop(" `int64_policy` must be of type `character` or `numeric`." )
170
- }
92
+ if (! length(temp_dir <- Sys.glob(temp_dir ))) {
93
+ stop(" `temp_dir=` does not exist." )
94
+ }
95
+ # prep options =============================================================
96
+ # max_simplify_lvl ---------------------------------------------------------
97
+ if (is.character(max_simplify_lvl )) {
98
+ max_simplify_lvl <- switch (
99
+ match.arg(
100
+ max_simplify_lvl ,
101
+ c(" data_frame" , " matrix" , " vector" , " list" )
102
+ ),
103
+ data_frame = 0L ,
104
+ matrix = 1L ,
105
+ vector = 2L ,
106
+ list = 3L ,
107
+ stop(" Unknown `max_simplify_lvl=`." )
108
+ )
109
+ } else if (is.numeric(max_simplify_lvl )) {
110
+ stopifnot(max_simplify_lvl %in% 0 : 3 )
111
+ } else {
112
+ stop(" `max_simplify_lvl=` must be of type `character` or `numeric`." )
113
+ }
114
+ # type_policy --------------------------------------------------------------
115
+ if (is.character(type_policy )) {
116
+ type_policy <- switch (
117
+ match.arg(type_policy , c(
118
+ " anything_goes" , " numbers" , " strict"
119
+ )),
120
+ anything_goes = 0L ,
121
+ numbers = 1L ,
122
+ strict = 2L ,
123
+ stop(" Unknown `type_policy=`." )
124
+ )
125
+ } else if (is.numeric(type_policy )) {
126
+ stopifnot(max_simplify_lvl %in% 0 : 2 )
127
+ } else {
128
+ stop(" `type_policy=` must be of type `character` or `numeric`." )
129
+ }
130
+ # int64_policy -------------------------------------------------------------
131
+ if (is.character(int64_policy )) {
132
+ int64_policy <- switch (
133
+ match.arg(int64_policy , c(" double" , " string" , " integer64" )),
134
+ double = 0L ,
135
+ string = 1L ,
136
+ integer64 = 2L ,
137
+ stop(" Unknown `int64_policy=`." )
138
+ )
139
+ } else if (is.numeric(int64_policy )) {
140
+ stopifnot(int64_policy %in% 0 : 2 )
141
+ } else {
142
+ stop(" `int64_policy` must be of type `character` or `numeric`." )
143
+ }
171
144
172
- if (int64_policy == 2L && ! requireNamespace(" bit64" , quietly = TRUE )) {
173
- # nocov start
174
- stop(' `int64_policy="integer64", but the {bit64} package is not installed.' )
175
- # nocov end
176
- }
177
- # files or URLs? =============================================================
178
- diagnosis <- .diagnose_input(json )
179
- # URLs -----------------------------------------------------------------------
180
- if (any(diagnosis $ type == " url" , na.rm = TRUE )) {
181
- for (i in seq_along(json )) {
182
- if (is.na(diagnosis $ type [[i ]]) || diagnosis $ type [[i ]] != " url" ) {
183
- next
184
- }
145
+ if (int64_policy == 2L &&
146
+ ! requireNamespace(" bit64" , quietly = TRUE )) {
147
+ # nocov start
148
+ stop(' `int64_policy="integer64", but the {bit64} package is not installed.' )
149
+ # nocov end
150
+ }
185
151
186
- temp_file <- tempfile(fileext = diagnosis $ file_ext [[i ]], tmpdir = temp_dir )
152
+ diagnosis <- .prep_input(json ,
153
+ temp_dir = temp_dir ,
154
+ compressed_download = compressed_download ,
155
+ verbose = verbose )
156
+ if (! keep_temp_files ) {
157
+ on.exit(unlink(diagnosis $ input [diagnosis $ is_from_url ]), add = TRUE )
158
+ }
187
159
188
- switch (
189
- diagnosis $ url_prefix [[i ]],
190
- " https://" = ,
191
- " ftps://" = ,
192
- " http://" = ,
193
- " ftp://" = download.file(diagnosis $ input [[i ]], destfile = temp_file , method = getOption(" download.file.method" , default = " auto" ), quiet = ! verbose ),
194
- " file://" = download.file(diagnosis $ input [[i ]], destfile = temp_file , method = " internal" , quiet = ! verbose ),
195
- stop(" Unknown URL prefix" ) # nocov
196
- )
160
+ input <- diagnosis $ input
197
161
198
- diagnosis $ input [[ i ]] <- temp_file
199
- diagnosis $ type [[ i ]] <- " file "
200
- if ( ! keep_temp_files ) {
201
- on.exit(unlink( diagnosis $ input [[ i ]]), add = TRUE )
202
- }
162
+ # prep names ===============================================================
163
+ if (length(names( json ))) {
164
+ names( input ) <- names( json )
165
+ } else if ( ! anyNA( base_names <- basename( json ))) {
166
+ names( input ) <- base_names
203
167
}
204
- }
205
- # file -----------------------------------------------------------------------
206
- if (length(missing_files <- diagnosis $ input [! is.na(diagnosis $ input ) & ! file.exists(diagnosis $ input )])) {
207
- stop(" The following files don't exist:" ,
208
- sprintf(" \n\t - %s" , missing_files ))
209
- }
210
- input <- rep(NA_character_ , length(json ))
211
- input [! is.na(diagnosis $ input )] <- Sys.glob(diagnosis $ input [! is.na(diagnosis $ input )])
212
- # prep names =================================================================
213
- if (length(names(json ))) {
214
- names(input ) <- names(json )
215
- } else {
216
- names(input ) <- basename(json )
217
- }
218
- # load =======================================================================
219
- .load_json(
220
- json = input ,
221
- query = query ,
222
- empty_array = empty_array ,
223
- empty_object = empty_object ,
224
- error_ok = error_ok ,
225
- on_error = on_error ,
226
- single_null = single_null ,
227
- simplify_to = max_simplify_lvl ,
228
- type_policy = type_policy ,
229
- int64_r_type = int64_policy
230
- )
168
+
169
+ # load =====================================================================
170
+ .load_json(
171
+ json = input ,
172
+ query = query ,
173
+ empty_array = empty_array ,
174
+ empty_object = empty_object ,
175
+ parse_error_ok = parse_error_ok ,
176
+ on_parse_error = on_parse_error ,
177
+ query_error_ok = query_error_ok ,
178
+ on_query_error = on_query_error ,
179
+ single_null = single_null ,
180
+ simplify_to = max_simplify_lvl ,
181
+ type_policy = type_policy ,
182
+ int64_r_type = int64_policy
183
+ )
231
184
}
0 commit comments