Skip to content

Commit

Permalink
Merge pull request #47 from ferd/small-bugfixes
Browse files Browse the repository at this point in the history
Major bug fixing for s3 file listing
  • Loading branch information
ferd authored Oct 22, 2024
2 parents 42b020a + 85bfc72 commit f702dd9
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 19 deletions.
6 changes: 3 additions & 3 deletions apps/revault/src/revault_dirmon_event.erl
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ initial_sync(tracker_manual, Name, _Dir, _Ignore, _Time) ->
%% Send in a fake ref, which prevents any timer from ever running
{Set, make_ref()};
initial_sync(tracker, Name, _Dir, _Ignore, _Time) ->
%% Fake a message to rescan asap on start
Ref = make_ref(),
self() ! {timeout, Ref, poll},
%% Normal stateful mode, where we load files and scan ASAP to avoid
%% getting into modes where long delays mean we are unresponsive
%% to filesystem changes long after boot.
Expand All @@ -97,7 +100,4 @@ initial_sync(tracker, Name, _Dir, _Ignore, _Time) ->
%% ignore deletes from the history, they wouldn't
%% come up in a file scan
Hash =/= deleted]),
%% Fake a message to rescan asap on start
Ref = make_ref(),
self() ! {timeout, Ref, poll},
{Set, Ref}.
15 changes: 12 additions & 3 deletions apps/revault/src/revault_s3.erl
Original file line number Diff line number Diff line change
Expand Up @@ -381,11 +381,11 @@ list_all_files(Dir, Continuation) ->
Res = aws_s3:list_objects_v2(client(), bucket(),
BaseOpts#{<<"prefix">> => <<Dir/binary, "/">>}, #{}),
case Res of
{ok, Map=#{<<"ListBucketResult">> := #{<<"Contents">> := Contents}}, _} ->
{ok, #{<<"ListBucketResult">> := Map=#{<<"Contents">> := Contents}}, _} ->
Files = if is_list(Contents) ->
[fetch_content(C) || C <- Contents];
[fetch_content(C) || C <- Contents, is_valid_content(C)];
is_map(Contents) ->
[fetch_content(Contents)]
[fetch_content(Contents) || is_valid_content(Contents)]
end,
case Map of
#{<<"IsTruncated">> := <<"true">>, <<"NextContinuationToken">> := Next} ->
Expand All @@ -400,6 +400,15 @@ list_all_files(Dir, Continuation) ->
fetch_content(#{<<"Key">> := Path, <<"LastModified">> := Stamp}) ->
{Path, Stamp}.

is_valid_content(#{<<"Key">> := Path}) ->
%% Ignore directory objects. These may have been created by API or through
%% the console UI.
%% S3 returns valid objects for directories, but with a different content
%% type. They also happen to end in a /, which is cheaper to check than
%% doing a HEAD request on each entry via `is_regular/1`. ReVault assumes
%% slashes aren't valid in file paths, the way they aren't in linux or osx.
binary:last(Path) =/= $/.

%% @private standard options the AWS client expects to lengthen how long
%% we have to upload parts or files.
upload_opts() ->
Expand Down
31 changes: 22 additions & 9 deletions apps/revault/test/revault_s3_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -513,36 +513,49 @@ expect_consult(Bin) ->
expect_list_objects_v2() ->
meck:expect(aws_s3, list_objects_v2,
fun(_Cli, _Bucket, M=#{<<"prefix">> := _}, _) when not is_map_key(<<"continuation-token">>, M) ->
%% start with a fake subdirectory, as if the server was created
%% by hand.
{ok,
#{<<"ListBucketResult">> =>
#{<<"KeyCount">> => <<"1">>,
<<"IsTruncated">> => <<"true">>,
<<"NextContinuationToken">> => <<"0">>,
<<"Contents">> => [
#{<<"Key">> => <<"dir/">>, <<"LastModified">> => <<"123">>}
]}},
{200, [], make_ref()}};
(_Cli, _Bucket, #{<<"prefix">> := _,
<<"continuation-token">> := <<"0">>}, _) ->
{ok,
#{<<"ListBucketResult">> =>
#{<<"KeyCount">> => <<"1">>,
<<"IsTruncated">> => <<"true">>,
<<"NextContinuationToken">> => <<"a">>,
<<"Contents">> => [
#{<<"Key">> => <<"dir/a">>, <<"LastModified">> => <<"123">>}
]},
<<"IsTruncated">> => <<"true">>,
<<"NextContinuationToken">> => <<"a">>},
]}},
{200, [], make_ref()}};
(_Cli, _Bucket, #{<<"prefix">> := _,
<<"continuation-token">> := <<"a">>}, _) ->
{ok,
#{<<"ListBucketResult">> =>
#{<<"KeyCount">> => <<"1">>,
<<"IsTruncated">> => <<"true">>,
<<"NextContinuationToken">> => <<"b">>,
<<"Contents">> => #{<<"Key">> => <<"dir/b">>,
<<"LastModified">> => <<"123">>}},
<<"IsTruncated">> => <<"true">>,
<<"NextContinuationToken">> => <<"b">>},
<<"LastModified">> => <<"123">>}}},
{200, [], make_ref()}};
(_Cli, _Bucket, #{<<"prefix">> := _,
<<"continuation-token">> := <<"b">>}, _) ->
{ok,
#{<<"ListBucketResult">> =>
#{<<"KeyCount">> => <<"2">>,
<<"IsTruncated">> => <<"true">>,
<<"NextContinuationToken">> => <<"d">>,
<<"Contents">> => [
#{<<"Key">> => <<"dir/c">>, <<"LastModified">> => <<"123">>},
#{<<"Key">> => <<"dir/d">>, <<"LastModified">> => <<"123">>}
]},
<<"IsTruncated">> => <<"true">>,
<<"NextContinuationToken">> => <<"d">>},
]}},
{200, [], make_ref()}};
(_Cli, _Bucket, #{<<"prefix">> := _,
<<"continuation-token">> := <<"d">>}, _) ->
Expand Down
88 changes: 84 additions & 4 deletions apps/revault/test/s3_integration_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ all() -> [{group, api},

groups() ->
[{api, [sequence], [list_objects_empty, crud_object, rename_raw, pagination,
multipart_upload, get_object_range]},
multipart_upload, get_object_range, folder_object]},
{abstraction, [sequence], [read_write_delete, hasht, copy, rename,
find_hashes, consult, is_regular,
multipart, read_range]},
find_hashes, find_hashes_subdir, consult,
is_regular, multipart, read_range]},
{cache, [sequence], [find_hashes_cached]}
].

Expand Down Expand Up @@ -123,7 +123,6 @@ crud_object(Config) ->
?assertMatch({ok, #{}, _Http},
aws_s3:delete_object(Client, Bucket, Key, #{})),
ok.

rename_raw() ->
[{doc, "Simulate a rename operation."}].
rename_raw(Config) ->
Expand Down Expand Up @@ -323,6 +322,43 @@ get_object_range(Config) ->
aws_s3:delete_object(Client, Bucket, Key, #{})),
ok.

folder_object() ->
[{doc, "folder objects can be created, read, and deleted."}].
folder_object(Config) ->
Client = ?config(aws_client, Config),
Bucket = ?config(bucket, Config),
Dir = ?config(bucket_dir, Config),
DirKey = <<Dir/binary, "/">>,
Key = filename:join([Dir, "crud_object"]),
Body = <<"v1">>,
Chk = hash(Body),
%% Start an empty directory object
?assertMatch({ok, _, _},
aws_s3:put_object(Client, Bucket, DirKey,
#{<<"ContentType">> => <<"application/x-directory; charset=UTF-8">>})),
%% Create, with SHA256
?assertMatch({ok, #{<<"ChecksumSHA256">> := Chk}, _Http},
aws_s3:put_object(Client, Bucket, Key,
#{<<"Body">> => Body,
<<"ChecksumAlgorithm">> => <<"SHA256">>,
<<"ChecksumSHA256">> => Chk})),
%% Head
?assertMatch({ok, #{<<"ContentType">> := <<"application/x-directory; charset=UTF-8">>,
<<"ContentLength">> := <<"0">>}, _Http},
aws_s3:get_object(Client, Bucket, DirKey)),
%% Delete
?assertMatch({ok, #{}, _Http},
aws_s3:delete_object(Client, Bucket, DirKey, #{})),
%% The sub-element still exists and must be deleted alone
?assertMatch({ok, #{<<"ChecksumSHA256">> := Chk,
<<"ContentLength">> := <<"2">>}, _Http},
aws_s3:head_object(Client, Bucket, Key,
#{<<"ChecksumMode">> => <<"ENABLED">>})),
?assertMatch({ok, #{}, _Http},
aws_s3:delete_object(Client, Bucket, Key, #{})),
ok.


%%%%%%%%%%%%%%%%%%%%%%%%%
%%% ABSTRACTION GROUP %%%
%%%%%%%%%%%%%%%%%%%%%%%%%
Expand Down Expand Up @@ -400,6 +436,50 @@ find_hashes(Config) ->
?assertEqual(ok, revault_s3:delete(PathC)),
ok.

find_hashes_subdir() ->
[{doc, "exercise the hash-fetching functionality when a subdirectory "
"object was created."}].
find_hashes_subdir(Config) ->
Dir = ?config(bucket_dir, Config),
DirA = <<Dir/binary, "/">>,
SubDirA = <<(filename:join([Dir, "subdir"]))/binary, "/">>,
PathA = filename:join([Dir, "subdir", "hash.txt"]),
PathB = filename:join([Dir, "hash.txt"]),
PathC = filename:join([Dir, "hash.ext"]),
%% Directory objects can't be created with revault, only manually, so
%% break the abstraction here.
?assertMatch({ok, _, _},
aws_s3:put_object(
revault_s3_serv:get_client(),
application:get_env(revault, bucket, undefined),
DirA,
#{<<"ContentType">> => <<"application/x-directory; charset=UTF-8">>}
)),
?assertMatch({ok, _, _},
aws_s3:put_object(
revault_s3_serv:get_client(),
application:get_env(revault, bucket, undefined),
SubDirA,
#{<<"ContentType">> => <<"application/x-directory; charset=UTF-8">>}
)),

?assertEqual(ok, revault_s3:write_file(PathA, <<"a">>)),
?assertEqual(ok, revault_s3:write_file(PathB, <<"b">>)),
?assertEqual(ok, revault_s3:write_file(PathC, <<"c">>)),
?assertEqual([{<<"subdir/hash.txt">>, revault_file:hash_bin(<<"a">>)},
{<<"hash.txt">>, revault_file:hash_bin(<<"b">>)},
{<<"hash.ext">>, revault_file:hash_bin(<<"c">>)}],
lists:reverse(lists:sort(
revault_s3:find_hashes_uncached(Dir, fun(_) -> true end)
))),
?assertEqual(ok, revault_s3:delete(PathA)),
?assertEqual(ok, revault_s3:delete(PathB)),
?assertEqual(ok, revault_s3:delete(PathC)),
%% Directories can however be deleted like any object
?assertEqual(ok, revault_s3:delete(DirA)),
?assertEqual(ok, revault_s3:delete(SubDirA)),
ok.

consult() ->
[{doc, "do a basic check on file consulting"}].
consult(Config) ->
Expand Down

0 comments on commit f702dd9

Please sign in to comment.