From 8ea7b87eed7eeefcb155b663517013308009cdf3 Mon Sep 17 00:00:00 2001 From: Jan Weidner Date: Thu, 11 Jan 2018 15:50:58 +0100 Subject: [PATCH 1/2] unrolled transform! --- src/core.jl | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++- test/perf.jl | 16 ++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 test/perf.jl diff --git a/src/core.jl b/src/core.jl index f141497..35810d7 100644 --- a/src/core.jl +++ b/src/core.jl @@ -22,8 +22,9 @@ function conclude_round(a,b,c,d,f,g,pbuf,i) a,b,c,d end +transform!(ctx::MD5_CTX) = transform_unrolled!(ctx) -function transform!(context::MD5_CTX) +function transform_baseline!(context::MD5_CTX) pbuf = buffer_pointer(context) a,b,c,d = context.state @@ -50,6 +51,56 @@ function transform!(context::MD5_CTX) @inbounds context.state .+= [a,b,c,d] end +@generated function transform_unrolled!(context::MD5_CTX) + ret = quote + pbuf = buffer_pointer(context) + end + ex = quote + A = context.state[1] + B = context.state[2] + C = context.state[3] + D = context.state[4] + end + push!(ret.args, ex) + for i in 0:63 + if 0 ≤ i ≤ 15 + ex = :(F = (B & C) | ((~B) & D)) + g = i + elseif 16 ≤ i ≤ 31 + ex = :(F = (D & B) | ((~D) & C)) + g = 5i + 1 + elseif 32 ≤ i ≤ 47 + ex = :(F = B ⊻ C ⊻ D) + g = 3i + 5 + elseif 48 ≤ i ≤ 63 + ex = :(F = C ⊻ (B | (~D))) + g = 7i + end + push!(ret.args, ex) + g = (g % 16) + 1 + ex = quote + temp = D + D = C + C = B + inner = A + F + $(kk[i+1]) + unsafe_load(pbuf, $g) + rot_inner = lrot($(ss[i+1]), inner, 32) + B = B + rot_inner + A = temp + end + push!(ret.args, ex) + end + + ex = quote + context.state[1] += A + context.state[2] += B + context.state[3] += C + context.state[4] += D + end + push!(ret.args, ex) + quote + @inbounds $ret + end +end function digest!(context::T) where {T<:MD5_CTX} pad_remainder!(context) diff --git a/test/perf.jl b/test/perf.jl new file mode 100644 index 0000000..c12f398 --- /dev/null +++ b/test/perf.jl @@ -0,0 +1,16 @@ +import MD5 +using BenchmarkTools + +data = randstring(10^4) + +if Pkg.installed("Nettle") != nothing +import Nettle +@show Nettle.hexdigest("md5", data) +display(@benchmark Nettle.hexdigest("md5", data)) + println() +end + +@show bytes2hex(MD5.md5(data)) +display(@benchmark MD5.md5(data)) +println() + From c310e5bd6b3c379c4546fd72a028e08e16a1f3fa Mon Sep 17 00:00:00 2001 From: Jan Weidner Date: Thu, 11 Jan 2018 16:51:56 +0100 Subject: [PATCH 2/2] cleanup --- LICENSE.md | 2 +- src/core.jl | 73 ++++++++--------------------------------------------- 2 files changed, 11 insertions(+), 64 deletions(-) diff --git a/LICENSE.md b/LICENSE.md index 24d9bc6..c0e8d44 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ The MD5.jl package is licensed under the MIT "Expat" License: -> Copyright (c) 2018: Lyndon White. +> Copyright (c) 2018: Lyndon White and Jan Weidner. > Copyright (c) 2014: Elliot Saba. (SHA.jl) > Derived from the RSA Data Security, Inc. MD5 Message-Digest Algorithm, Copyright (C) 1991-2 > diff --git a/src/core.jl b/src/core.jl index 35810d7..c9af954 100644 --- a/src/core.jl +++ b/src/core.jl @@ -1,65 +1,12 @@ -# Nonlinear functions, in order to encourage inlining, these sadly are not an array of lambdas -f_round0(b,c,d) = SHA.Round0(b,c,d) #UInt32((b & c) | (~b & d)) -f_round1(b,c,d) = UInt32((b & d) | (c & ~d)) -f_round2(b,c,d) = SHA.Round1And3(b,c,d) # return UInt32(b ⊻ c ⊻ d) #xors -f_round3(b,c,d) = UInt32(c ⊻ (b | ~d)) - -g_round0(i) = i -g_round1(i) = (5i+1) % 16 -g_round2(i) = (3i + 5) % 16 -g_round3(i) = 7i % 16 - - -function conclude_round(a,b,c,d,f,g,pbuf,i) - @inbounds s = ss[i+1] - @inbounds k = kk[i+1] - @inbounds m = unsafe_load(pbuf, g+1) - f = f + a + k + m - a = d - d = c - c = b - b = b + lrot(s,f, 32) - a,b,c,d -end - -transform!(ctx::MD5_CTX) = transform_unrolled!(ctx) - -function transform_baseline!(context::MD5_CTX) - pbuf = buffer_pointer(context) - a,b,c,d = context.state - - for i in 0:15 - f = f_round0(b,c,d) - g = g_round0(i) - a,b,c,d = conclude_round(a, b, c, d, f, g, pbuf, i) - end - for i in 16:31 - f = f_round1(b,c,d) - g = g_round1(i) - a,b,c,d = conclude_round(a, b, c, d, f, g, pbuf, i) - end - for i in 32:47 - f = f_round2(b,c,d) - g = g_round2(i) - a,b,c,d = conclude_round(a, b, c, d, f, g, pbuf, i) - end - for i in 48:63 - f = f_round3(b,c,d) - g = g_round3(i) - a,b,c,d = conclude_round(a, b, c, d, f, g, pbuf, i) - end - @inbounds context.state .+= [a,b,c,d] -end - -@generated function transform_unrolled!(context::MD5_CTX) +@generated function transform!(context::MD5_CTX) ret = quote pbuf = buffer_pointer(context) end ex = quote - A = context.state[1] - B = context.state[2] - C = context.state[3] - D = context.state[4] + @inbounds A = context.state[1] + @inbounds B = context.state[2] + @inbounds C = context.state[3] + @inbounds D = context.state[4] end push!(ret.args, ex) for i in 0:63 @@ -91,14 +38,14 @@ end end ex = quote - context.state[1] += A - context.state[2] += B - context.state[3] += C - context.state[4] += D + @inbounds context.state[1] += A + @inbounds context.state[2] += B + @inbounds context.state[3] += C + @inbounds context.state[4] += D end push!(ret.args, ex) quote - @inbounds $ret + $ret end end