Skip to content

Commit 05dffb9

Browse files
authored
Merge pull request #5869 from neighbourhoodie/faster-norm-fields
Faster selector execution by pre-parsing fields
2 parents 219ed26 + 66703ea commit 05dffb9

7 files changed

Lines changed: 72 additions & 33 deletions

File tree

src/mango/src/mango_cursor.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ explain(#cursor{} = Cursor) ->
358358
{dbname, DbName},
359359
{index, JSON},
360360
{partitioned, Partitioned},
361-
{selector, Selector},
361+
{selector, mango_util:join_keys(Selector)},
362362
{opts, {Opts}},
363363
{limit, Limit},
364364
{skip, Skip},

src/mango/src/mango_cursor_view.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1016,7 +1016,7 @@ required_fields_disjoint_fields_test() ->
10161016
?assertEqual([<<"field1">>, <<"field2">>, <<"field3">>], required_fields(Cursor1)),
10171017
Fields2 = [<<"field1">>, <<"field2">>],
10181018
Selector2 = to_selector(#{<<"field3">> => undefined, <<"field4">> => undefined}),
1019-
Cursor2 = #cursor{fields = Fields2, selector = to_selector(Selector2)},
1019+
Cursor2 = #cursor{fields = Fields2, selector = Selector2},
10201020
?assertEqual(
10211021
[<<"field1">>, <<"field2">>, <<"field3">>, <<"field4">>], required_fields(Cursor2)
10221022
).

src/mango/src/mango_idx.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ add(DDoc, Idx) ->
154154
{ok, NewDDoc1} = Mod:add(DDoc, Idx),
155155
NewDDoc2 = set_ddoc_partitioned(NewDDoc1, Idx),
156156
% Round trip through JSON for normalization
157-
Body = ?JSON_DECODE(?JSON_ENCODE(NewDDoc2#doc.body)),
157+
Body = ?JSON_DECODE(?JSON_ENCODE(mango_util:join_keys(NewDDoc2#doc.body))),
158158
{ok, NewDDoc2#doc{body = Body}}.
159159

160160
remove(DDoc, Idx) ->

src/mango/src/mango_idx_view.erl

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ opts() ->
233233
make_view(Idx) ->
234234
View =
235235
{[
236-
{<<"map">>, Idx#idx.def},
236+
{<<"map">>, mango_util:join_keys(Idx#idx.def)},
237237
{<<"reduce">>, <<"_count">>},
238238
{<<"options">>, {Idx#idx.opts}}
239239
]},
@@ -271,23 +271,26 @@ validate_ddoc(VProps) ->
271271
% the equivalent of a multi-query. But that's for another
272272
% day.
273273

274+
indexable_fields(Selector) ->
275+
[mango_util:join_field(F) || F <- indexable_paths(Selector)].
276+
274277
% We can see through '$and' trivially
275-
indexable_fields({[{<<"$and">>, Args}]}) ->
276-
lists:usort(lists:flatten([indexable_fields(A) || A <- Args]));
278+
indexable_paths({[{<<"$and">>, Args}]}) ->
279+
lists:usort(lists:flatmap(fun(A) -> indexable_paths(A) end, Args));
277280
% So far we can't see through any other operator
278-
indexable_fields({[{<<"$", _/binary>>, _}]}) ->
281+
indexable_paths({[{<<"$", _/binary>>, _}]}) ->
279282
[];
280283
% If we have a field with a terminator that is locatable
281284
% using an index then the field is a possible index
282-
indexable_fields({[{Field, Cond}]}) ->
285+
indexable_paths({[{Field, Cond}]}) ->
283286
case indexable(Cond) of
284287
true ->
285288
[Field];
286289
false ->
287290
[]
288291
end;
289292
% An empty selector
290-
indexable_fields({[]}) ->
293+
indexable_paths({[]}) ->
291294
[].
292295

293296
% Check if a condition is indexable. The logical
@@ -320,8 +323,8 @@ indexable({[{<<"$", _/binary>>, _}]}) ->
320323

321324
% For each field, return {Field, Range}
322325
field_ranges(Selector) ->
323-
Fields = indexable_fields(Selector),
324-
field_ranges(Selector, Fields).
326+
Fields = indexable_paths(Selector),
327+
[{mango_util:join_field(F), R} || {F, R} <- field_ranges(Selector, Fields)].
325328

326329
field_ranges(Selector, Fields) ->
327330
field_ranges(Selector, Fields, []).

src/mango/src/mango_selector.erl

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ normalize(Selector) ->
3838
],
3939
{NProps} = lists:foldl(fun(Step, Sel) -> Step(Sel) end, Selector, Steps),
4040
FieldNames = [Name || {Name, _} <- NProps],
41-
case lists:member(<<>>, FieldNames) of
41+
case lists:member([], FieldNames) of
4242
true ->
4343
?MANGO_ERROR({invalid_selector, missing_field_name});
4444
false ->
@@ -210,7 +210,7 @@ norm_ops(Value) ->
210210
norm_fields({[]}) ->
211211
{[]};
212212
norm_fields(Selector) ->
213-
norm_fields(Selector, <<>>).
213+
norm_fields(Selector, []).
214214

215215
% Operators where we can push the field names further
216216
% down the operator tree
@@ -237,7 +237,7 @@ norm_fields({[{<<"$keyMapMatch">>, Arg}]}, Path) ->
237237
% $default field. This also asserts that the $default
238238
% field is at the root as well as that it only has
239239
% a $text operator applied.
240-
norm_fields({[{<<"$default">>, {[{<<"$text">>, _Arg}]}}]} = Sel, <<>>) ->
240+
norm_fields({[{<<"$default">>, {[{<<"$text">>, _Arg}]}}]} = Sel, []) ->
241241
Sel;
242242
norm_fields({[{<<"$default">>, _}]} = Selector, _) ->
243243
?MANGO_ERROR({bad_field, Selector});
@@ -249,12 +249,11 @@ norm_fields({[{<<"$", _/binary>>, _}]} = Cond, Path) ->
249249
% We've found a field name. Append it to the path
250250
% and skip this node as we unroll the stack as
251251
% the full path will be further down the branch.
252-
norm_fields({[{Field, Cond}]}, <<>>) ->
253-
% Don't include the '.' for the first element of
254-
% the path.
255-
norm_fields(Cond, Field);
256-
norm_fields({[{Field, Cond}]}, Path) ->
257-
norm_fields(Cond, <<Path/binary, ".", Field/binary>>);
252+
norm_fields({[{Field, Cond}]}, Path) when is_binary(Field) ->
253+
{ok, F} = mango_util:parse_field(Field),
254+
norm_fields({[{F, Cond}]}, Path);
255+
norm_fields({[{Field, Cond}]}, Path) when is_list(Field) ->
256+
norm_fields(Cond, Path ++ Field);
258257
% An empty selector
259258
norm_fields({[]}, Path) ->
260259
{Path, {[]}};
@@ -575,7 +574,14 @@ match({[_, _ | _] = _Props} = Sel, _Value, _Cmp) ->
575574
% match against.
576575

577576
has_required_fields(Selector, RequiredFields) ->
578-
Remainder = has_required_fields_int(Selector, RequiredFields),
577+
Paths = lists:map(
578+
fun(Field) ->
579+
{ok, Path} = mango_util:parse_field(Field),
580+
Path
581+
end,
582+
RequiredFields
583+
),
584+
Remainder = has_required_fields_int(Selector, Paths),
579585
Remainder == [].
580586

581587
% Empty selector
@@ -634,6 +640,9 @@ has_required_fields_int([{[{Field, Cond}]} | Rest], RequiredFields) ->
634640
end.
635641

636642
% Returns true if a field in the selector is a constant value e.g. {a: {$eq: 1}}
643+
is_constant_field(Selector, Field) when not is_list(Field) ->
644+
{ok, Path} = mango_util:parse_field(Field),
645+
is_constant_field(Selector, Path);
637646
is_constant_field({[]}, _Field) ->
638647
false;
639648
is_constant_field(Selector, Field) when not is_list(Selector) ->
@@ -653,7 +662,7 @@ is_constant_field([{[{_UnMatched, _}]} | Rest], Field) ->
653662
fields({[{<<"$", _/binary>>, Args}]}) when is_list(Args) ->
654663
lists:flatmap(fun fields/1, Args);
655664
fields({[{Field, _Cond}]}) ->
656-
[Field];
665+
[mango_util:join_field(Field)];
657666
fields({[]}) ->
658667
[].
659668

@@ -1516,7 +1525,7 @@ match_object_test() ->
15161525

15171526
% an inner empty object selector matches only empty objects
15181527
SelEmptyField = normalize({[{<<"x">>, {[]}}]}),
1519-
?assertEqual({[{<<"x">>, {[{<<"$eq">>, {[]}}]}}]}, SelEmptyField),
1528+
?assertEqual({[{[<<"x">>], {[{<<"$eq">>, {[]}}]}}]}, SelEmptyField),
15201529
?assertEqual(false, match_int(SelEmptyField, Doc1)),
15211530
?assertEqual(true, match_int(SelEmptyField, Doc2)),
15221531
?assertEqual(false, match_int(SelEmptyField, Doc3)),
@@ -1525,7 +1534,7 @@ match_object_test() ->
15251534

15261535
% negated empty object selector matches a value which is present and is not the empty object
15271536
SelNotEmptyField = normalize({[{<<"$not">>, {[{<<"x">>, {[]}}]}}]}),
1528-
?assertEqual({[{<<"x">>, {[{<<"$ne">>, {[]}}]}}]}, SelNotEmptyField),
1537+
?assertEqual({[{[<<"x">>], {[{<<"$ne">>, {[]}}]}}]}, SelNotEmptyField),
15291538
?assertEqual(false, match_int(SelNotEmptyField, Doc1)),
15301539
?assertEqual(false, match_int(SelNotEmptyField, Doc2)),
15311540
?assertEqual(true, match_int(SelNotEmptyField, Doc3)),
@@ -1534,7 +1543,7 @@ match_object_test() ->
15341543

15351544
% inner object selectors with fields match objects with at least those fields
15361545
Sel1Field = normalize({[{<<"x">>, {[{<<"a">>, 1}]}}]}),
1537-
?assertEqual({[{<<"x.a">>, {[{<<"$eq">>, 1}]}}]}, Sel1Field),
1546+
?assertEqual({[{[<<"x">>, <<"a">>], {[{<<"$eq">>, 1}]}}]}, Sel1Field),
15381547
?assertEqual(false, match_int(Sel1Field, Doc1)),
15391548
?assertEqual(false, match_int(Sel1Field, Doc2)),
15401549
?assertEqual(true, match_int(Sel1Field, Doc3)),
@@ -1546,8 +1555,8 @@ match_object_test() ->
15461555
?assertEqual(
15471556
{[
15481557
{<<"$and">>, [
1549-
{[{<<"x.a">>, {[{<<"$eq">>, 1}]}}]},
1550-
{[{<<"x.b">>, {[{<<"$eq">>, 2}]}}]}
1558+
{[{[<<"x">>, <<"a">>], {[{<<"$eq">>, 1}]}}]},
1559+
{[{[<<"x">>, <<"b">>], {[{<<"$eq">>, 2}]}}]}
15511560
]}
15521561
]},
15531562
Sel2Field
@@ -1560,7 +1569,7 @@ match_object_test() ->
15601569

15611570
% check shorthand syntax
15621571
SelShort = normalize({[{<<"x.b">>, 2}]}),
1563-
?assertEqual({[{<<"x.b">>, {[{<<"$eq">>, 2}]}}]}, SelShort),
1572+
?assertEqual({[{[<<"x">>, <<"b">>], {[{<<"$eq">>, 2}]}}]}, SelShort),
15641573
?assertEqual(false, match_int(SelShort, Doc1)),
15651574
?assertEqual(false, match_int(SelShort, Doc2)),
15661575
?assertEqual(false, match_int(SelShort, Doc3)),

src/mango/src/mango_selector_text.erl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,10 @@ convert(_Path, {[{<<"$", _/binary>> = Op, _}]}) ->
177177
convert(Path, {[{Field0, Cond}]}) ->
178178
{ok, PP0} =
179179
case Field0 of
180-
<<>> ->
181-
{ok, []};
182-
_ ->
183-
mango_util:parse_field(Field0)
180+
F when is_binary(F) ->
181+
mango_util:parse_field(F);
182+
F when is_list(F) ->
183+
{ok, F}
184184
end,
185185
% Later on, we perform a lucene_escape_user call on the
186186
% final Path, which calls parse_field again. Calling the function
@@ -394,7 +394,7 @@ append_sort_type(RawSortField, Selector) ->
394394
end.
395395

396396
get_sort_type(Field, Selector) ->
397-
Types = get_sort_types(Field, Selector, []),
397+
Types = get_sort_types(Field, mango_util:join_keys(Selector), []),
398398
case lists:usort(Types) of
399399
[str] -> <<"_3astring<string>">>;
400400
[num] -> <<"_3anumber<number>">>;

src/mango/src/mango_util.erl

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
join/2,
4242

4343
parse_field/1,
44+
join_field/1,
45+
join_keys/1,
4446

4547
cached_re/2
4648
]).
@@ -370,6 +372,31 @@ parse_field_slow(Field) ->
370372
),
371373
{ok, Path}.
372374

375+
join_keys({Sel}) when is_list(Sel) ->
376+
Pairs = [{join_field(K), join_keys(V)} || {K, V} <- Sel],
377+
{Pairs};
378+
join_keys(Sel) when is_list(Sel) ->
379+
[join_keys(S) || S <- Sel];
380+
join_keys(Sel) ->
381+
Sel.
382+
383+
join_field(Field) when is_list(Field) ->
384+
Parts = [field_to_binary(F) || F <- Field],
385+
binary_join(Parts, <<".">>);
386+
join_field(Field) ->
387+
Field.
388+
389+
% binary:join/2 is not available on all Erlang versions we support; it was
390+
% added in 28.0. For now, we use this function in its place, c.f.
391+
% https://www.erlang.org/doc/apps/stdlib/binary.html#join/2
392+
binary_join(Binaries, Separator) when is_list(Binaries), is_binary(Separator) ->
393+
iolist_to_binary(lists:join(Separator, Binaries)).
394+
395+
field_to_binary(Field) when is_list(Field) ->
396+
list_to_binary(Field);
397+
field_to_binary(Field) when is_binary(Field) ->
398+
Field.
399+
373400
check_non_empty(Field, Parts) ->
374401
case lists:member(<<>>, Parts) of
375402
true ->

0 commit comments

Comments
 (0)