Skip to content

Commit 2b5bf82

Browse files
committed
cleaning up structure and vestigial junk
1 parent dac90d8 commit 2b5bf82

File tree

10 files changed

+208
-230
lines changed

10 files changed

+208
-230
lines changed

R/RcppExports.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
.Call(`_RcppSimdJson_load`, json, query, empty_array, empty_object, single_null, parse_error_ok, on_parse_error, query_error_ok, on_query_error, simplify_to, type_policy, int64_r_type)
1010
}
1111

12+
.exceptions_enabled <- function() {
13+
.Call(`_RcppSimdJson_exceptions_enabled`)
14+
}
15+
1216
.is_valid_json_arg <- function(json) {
1317
.Call(`_RcppSimdJson_is_valid_json_arg`, json)
1418
}
@@ -21,10 +25,6 @@
2125
.Call(`_RcppSimdJson_diagnose_input`, x)
2226
}
2327

24-
.exceptions_enabled <- function() {
25-
.Call(`_RcppSimdJson_exceptions_enabled`)
26-
}
27-
2828
.check_int64 <- function() {
2929
.Call(`_RcppSimdJson_check_int64`)
3030
}

R/fparse.R

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
#' }
2222
#' }
2323
#'
24-
#' @param query If not \code{NULL}, a string used as a JSON Pointer to identify a
25-
#' specific element within \code{json}.
26-
#' \code{character(1L)}, default: \code{NULL}
24+
#' @param query If not \code{NULL}, JSON Pointer(s) used to identify and extract
25+
#' specific elements within \code{json}. See Details and Examples.
26+
#' \code{NULL}, \code{character()}, or \code{list()} of \code{character()}. default: \code{NULL}
2727
#'
2828
#' @param empty_array Any R object to return for empty JSON arrays.
2929
#' default: \code{NULL}
@@ -70,7 +70,7 @@
7070
#' \itemize{
7171
#' \item \code{"double"} or \code{0L}: big integers become \code{double}s
7272
#' \item \code{"string"} or \code{1L}: big integers become \code{character}s
73-
#' \item \code{"integer64"} or \code{2L}: big integers \code{bit64::integer64}s
73+
#' \item \code{"integer64"} or \code{2L}: big integers become \code{bit64::integer64}s
7474
#' }
7575
#'
7676
#'
@@ -92,6 +92,25 @@
9292
#' names each returned element using the file's \code{basename()}.
9393
#' }
9494
#'
95+
#' \item \code{query}'s goal is to minimize te amount of data that must be
96+
#' materialized as R objects (the main performance bottleneck) as well as
97+
#' facilitate any post-parse processing.
98+
#' \itemize{
99+
#' \item To maximize flexibility, there are two approaches to consider when designing \code{query} arguments.
100+
#' \itemize{
101+
#' \item \code{character} vectors are interpreted as containing queries that
102+
#' meant to be applied to all elements of \code{json=}.
103+
#' \itemize{
104+
#' \item If \code{json=} contains 3 strings and \code{query=} contains
105+
#' 3 strings, the returned object will be a list of 3 elements (1 for each element
106+
#' of \code{json=}), which themselves each contain 3 lists (1 for each element
107+
#' of \code{query=}).
108+
#' }
109+
#' \item \code{list}s of \code{character} vectors are interpreted as containing
110+
#' queries meant to be applied to \code{json} in a zip-like fashion.
111+
#' }
112+
#' }
113+
#'
95114
#' }
96115
#'
97116
#' @author Brendan Knapp
@@ -295,11 +314,6 @@ fparse <- function(json,
295314
# nocov end
296315
}
297316

298-
# prep names ===============================================================
299-
# if (length(names(json)) && !length(names(query))) {
300-
# names(query) <- names(json)
301-
# }
302-
303317
# deserialize ==============================================================
304318
.deserialize_json(
305319
json = json,

inst/include/RcppSimdJson/deserialize.hpp

Lines changed: 4 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -336,20 +336,19 @@ inline simdjson::simdjson_result<simdjson::dom::element> parse(simdjson::dom::pa
336336
const json_T& json) {
337337

338338
if constexpr (utils::resembles_vec_raw<json_T>()) {
339-
/* if `json` is a raw (unsigned char) vector, we (seem to be fine) cheating */
339+
/* if `json` is a raw (unsigned char) vector, we can cheat */
340340
return parser.parse(
341341
std::string_view(reinterpret_cast<const char*>(&(json[0])), std::size(json)));
342-
// return parser.parse(std::string(std::cbegin(json), std::cend(json))); /* the safe way */
343342
}
344343

345344
if constexpr (utils::resembles_vec_chr<json_T>()) {
346-
/* if `json` is a character vector, we're only parsing the first one */
345+
/* if `json` is a character vector, we're only parsing the first element */
347346
return parse<decltype(json[0]), is_file>(parser, json[0]);
348347
}
349348

350349
if constexpr (utils::resembles_r_string<json_T>()) {
351350
if constexpr (is_file) {
352-
if (const auto file_type = utils::get_memDecompress_type(std::string(json))) {
351+
if (const auto file_type = utils::get_memDecompress_type(std::string_view(json))) {
353352
return parse<Rcpp::RawVector, IS_NOT_FILE>(
354353
parser,
355354
utils::decompress(std::string(json), Rcpp::String((*file_type).data())));
@@ -455,17 +454,6 @@ template <typename json_T,
455454
inline SEXP no_query(const json_T& json,
456455
SEXP on_parse_error,
457456
const rcppsimdjson::deserialize::Parse_Opts& parse_opts) {
458-
#ifdef RCPPSIMDJSON_DEBUG
459-
utils::debug_msg<json_T>("no_query",
460-
is_file,
461-
is_single_json,
462-
false,
463-
parse_error_ok,
464-
query_error_ok,
465-
on_parse_error,
466-
R_NilValue);
467-
#endif
468-
469457
simdjson::dom::parser parser;
470458

471459
if constexpr (is_single_json) {
@@ -498,17 +486,6 @@ inline SEXP flat_query(const json_T& json,
498486
SEXP on_parse_error,
499487
SEXP on_query_error,
500488
const rcppsimdjson::deserialize::Parse_Opts& parse_opts) {
501-
#ifdef RCPPSIMDJSON_DEBUG
502-
utils::debug_msg<json_T>("flat_query",
503-
is_file,
504-
is_single_json,
505-
is_single_query,
506-
parse_error_ok,
507-
query_error_ok,
508-
on_parse_error,
509-
on_query_error);
510-
#endif
511-
512489
simdjson::dom::parser parser;
513490

514491
if constexpr (is_single_json) {
@@ -601,24 +578,7 @@ inline SEXP nested_query(const json_T& json,
601578
SEXP on_parse_error,
602579
SEXP on_query_error,
603580
const rcppsimdjson::deserialize::Parse_Opts& parse_opts) {
604-
#ifdef RCPPSIMDJSON_DEBUG
605-
utils::debug_msg<json_T>("nested_query",
606-
is_file,
607-
is_single_json,
608-
is_single_query,
609-
parse_error_ok,
610-
query_error_ok,
611-
on_parse_error,
612-
on_query_error);
613-
#endif
614-
615-
R_xlen_t n;
616-
if constexpr (is_single_json) {
617-
n = std::size(query);
618-
} else {
619-
n = std::size(json);
620-
}
621-
581+
const R_xlen_t n = std::size(json); /* query already checked to be the same size */
622582
Rcpp::List out(n);
623583
simdjson::dom::parser parser;
624584

inst/include/RcppSimdJson/utils.hpp

Lines changed: 19 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,8 @@
33

44

55
#include <Rcpp.h>
6-
#include <algorithm> // std::all_of
7-
#include <fstream>
8-
// #include <optional>
9-
// #include <string_view>
6+
#include <algorithm> /* std::all_of */
7+
#include <fstream> /* std::ifstream */
108

119
namespace rcppsimdjson {
1210
namespace utils {
@@ -119,19 +117,18 @@ inline SEXP resolve_int64(const std::vector<uint64_t>& x) {
119117
}
120118

121119

122-
template <typename file_path_T>
123-
inline std::optional<std::string_view> get_memDecompress_type(const file_path_T& file_path) {
124-
std::string test;
120+
inline std::optional<std::string_view> get_memDecompress_type(const std::string_view& file_path) {
125121
if (const auto dot = std::string_view(file_path).rfind('.'); dot != std::string_view::npos) {
126-
const auto ext = file_path.substr(dot + 1);
127-
if (ext == "gz") {
128-
return "gzip";
129-
}
130-
if (ext == "xz") {
131-
return "xz";
132-
}
133-
if (ext == "bz" || ext == "bz2") {
134-
return "bzip2";
122+
if (const auto ext = file_path.substr(dot + 1); std::size(ext) >= 2) {
123+
if (ext == "gz") {
124+
return "gzip";
125+
}
126+
if (ext == "xz") {
127+
return "xz";
128+
}
129+
if (ext == "bz" || ext == "bz2") {
130+
return "bzip2";
131+
}
135132
}
136133
}
137134
return std::nullopt;
@@ -182,59 +179,23 @@ template <typename file_path_T>
182179
inline Rcpp::RawVector decompress(const file_path_T& file_path, const Rcpp::String& file_type) {
183180
std::ifstream stream(file_path, std::ios::binary | std::ios::ate);
184181
if (!stream) {
185-
Rcpp::stop("");
182+
Rcpp::stop("There's a problem with this file:\n\t-%s", file_path);
186183
}
187184

188185
const auto end = stream.tellg();
189186
stream.seekg(0, std::ios::beg);
190187
const std::size_t n(end - stream.tellg());
191-
if (n == 0) {
192-
Rcpp::stop("");
188+
if (n == 0) { /* avoid undefined behavior */
189+
return Rcpp::RawVector(0);
193190
}
194191

195-
// std::vector<Rbyte> buffer(n);
196-
// stream.read(reinterpret_cast<char*>(buffer.data()), n);
197192
Rcpp::RawVector buffer(n);
198193
stream.read(reinterpret_cast<char*>(&(buffer[0])), n);
199194

200195
return Rcpp::Function("memDecompress")(buffer, file_type, false);
201196
}
202197

203198

204-
template <typename json_T>
205-
constexpr void debug_msg(const std::string_view f_name,
206-
const bool is_file,
207-
const bool single_json,
208-
const bool single_query,
209-
const bool parse_error_ok,
210-
const bool query_error_ok,
211-
SEXP on_parse_error,
212-
SEXP on_query_error) {
213-
Rcpp::Rcout << f_name << std::endl << std::endl;
214-
215-
if constexpr (std::is_same_v<json_T, Rcpp::CharacterVector>) {
216-
Rcpp::Rcout << "json_T: Rcpp::CharacterVector" << std::endl;
217-
}
218-
if constexpr (std::is_same_v<json_T, Rcpp::RawVector>) {
219-
Rcpp::Rcout << "json_T: Rcpp::RawVector" << std::endl;
220-
}
221-
if constexpr (std::is_same_v<json_T, Rcpp::ListOf<Rcpp::RawVector>>) {
222-
Rcpp::Rcout << "json_T: Rcpp::ListOf<Rcpp::RawVector>" << std::endl;
223-
}
224-
Rcpp::Rcout << (is_file ? "is_file" : "!is_file") << std::endl;
225-
Rcpp::Rcout << (single_json ? "single_json" : "!single_json") << std::endl;
226-
Rcpp::Rcout << (single_query ? "single_query" : "!single_query") << std::endl;
227-
Rcpp::Rcout << (parse_error_ok ? "parse_error_ok" : "!parse_error_ok") << std::endl;
228-
Rcpp::Rcout << (query_error_ok ? "query_error_ok" : "!query_error_ok") << std::endl;
229-
Rcpp::Rcout << std::endl << "on_parse_error: " << std::endl;
230-
Rcpp::print(on_parse_error);
231-
Rcpp::Rcout << std::endl << "on_query_error: " << std::endl;
232-
Rcpp::print(on_query_error);
233-
234-
Rcpp::Rcout << std::endl << std::endl;
235-
}
236-
237-
238199
inline constexpr std::optional<std::string_view> get_url_prefix(const std::string_view& str) {
239200
if (std::size(str) > 8) {
240201
if (const auto prefix = std::string_view(str).substr(0, 8); prefix == "https://") {
@@ -253,14 +214,10 @@ inline constexpr std::optional<std::string_view> get_url_prefix(const std::strin
253214
inline constexpr std::optional<std::string_view> get_file_ext(const std::string_view& str) {
254215
if (const auto dot = str.rfind('.'); dot != std::string_view::npos) {
255216
if (const auto out = str.substr(dot); /* `with_dot ? dot : dot + 1` */
256-
/* if the file path is a URL without an extension, we need to ensure that that we don't
257-
* extract everything after the domain by checking for '/' or '\\' (Windows),
258-
*/
259-
#ifdef _WIN32 /* both 32-bit and 64-bit Windows */
260-
out.find('\\') == std::string_view::npos) {
261-
#else
217+
/* if the file path is a URL without an extension, we need to ensure that that we don't
218+
* extract everything after the domain by checking for '/'
219+
*/
262220
out.find("/") == std::string_view::npos) {
263-
#endif
264221
return str.substr(dot);
265222
}
266223
}

inst/tinytest/test_compressed_files.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
if (RcppSimdJson:::.unsupportedArchitecture()) exit_file("Unsupported chipset")
22

3-
my_temp_dir <- paste0(tempdir(), "/rcppsimdjson-temp")
3+
my_temp_dir <- paste0(tempdir(), "/rcppsimdjson-compressed-temp")
44
dir.create(my_temp_dir)
55

66
.read_compress_write_load <- function(file_path) {
@@ -10,7 +10,7 @@ dir.create(my_temp_dir)
1010
init <- readBin(file_path, n = file.size(file_path), what = "raw")
1111

1212
mapply(function(type, ext) {
13-
target_path <- paste0(basename(file_path), ".", ext)
13+
target_path <- paste0(my_temp_dir, "/", basename(file_path), ".", ext)
1414
writeBin(memCompress(init, type = type), target_path)
1515
fload(target_path)
1616
}, types, exts, USE.NAMES = FALSE, SIMPLIFY = FALSE)

inst/tinytest/test_query.R

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,39 @@
11
if (RcppSimdJson:::.unsupportedArchitecture()) exit_file("Unsupported chipset")
22

3+
# parse errors =================================================================
4+
expect_error(fparse("junk", query = ""))
5+
expect_error(fparse("junk", query = c("", "")))
6+
expect_error(fparse(c("junk", "junk"), query = list("", "")))
7+
expect_identical(
8+
fparse("junk", query = "", parse_error_ok = TRUE),
9+
NULL
10+
)
11+
expect_identical(
12+
fparse("junk", query = c("", ""), parse_error_ok = TRUE),
13+
NULL
14+
)
15+
expect_identical(
16+
fparse(c("junk", "junk"), query = list("", ""), parse_error_ok = TRUE),
17+
list(NULL, NULL)
18+
)
319

4-
# pkgbuild::clean_dll(); devtools::document(); devtools::load_all(); library(tinytest)
20+
# query errors =================================================================
21+
expect_error(fparse("null", query = "junk"))
22+
expect_error(fparse("null", query = c("junk", "junk")))
23+
expect_error(fparse(c("null", "null"), query = list("junk", "junk")))
24+
expect_identical(
25+
fparse("null", query = "junk", query_error_ok = TRUE),
26+
NULL
27+
)
28+
expect_identical(
29+
fparse("null", query = "junk", query_error_ok = TRUE),
30+
NULL
31+
)
32+
expect_identical(
33+
fparse(c("null", "null"),
34+
query = list(c("junk", "junk"), c("junk", "junk")), query_error_ok = TRUE),
35+
list(list(NULL, NULL), list(NULL, NULL))
36+
)
537

638
# single json ==================================================================
739
js <- c(single_json = '[{"a":[[1,2],[3,4]]},{"b":[[5,6],[7,8]]}]')
@@ -25,6 +57,7 @@ expect_identical(
2557
fparse(js, list(na = NA_character_)),
2658
list(na = list(NA))
2759
)
60+
2861
#* flat query ------------------------------------------------------------------
2962
#** single query ---------------------------------------------------------------
3063
expect_identical(
@@ -102,6 +135,7 @@ expect_identical(
102135
)
103136

104137
# multi json ===================================================================
138+
105139
js <- c(A = '{"a":[[1,2],[3,4]]}', B = '{"a":[[5,6],[7,8]]}')
106140
#* flat query ------------------------------------------------------------------
107141
#** single query ---------------------------------------------------------------
@@ -148,3 +182,4 @@ expect_identical(
148182
fparse(js, query = q),
149183
list(a = list(a1 = 1:2, a2 = 3:4), b = list(b1 = 5:6, b2 = 7:8))
150184
)
185+

0 commit comments

Comments
 (0)