Skip to content

Commit

Permalink
Use String#append_bytes if available
Browse files Browse the repository at this point in the history
This is just a proof of concept / demo, there are some unknown
about how codegen is supposed to know if it's OK to use newly introduced
methods.

I also hacked the benchmark to load two versions of protoboeuf so
I can compare them together, that's proable not how we want it
but it gives a much clearer picture of the speedup.

```
/opt/rubies/head/bin/ruby --yjit -I lib:bench/lib bench/benchmark.rb
total encoded size: 5038040 bytes
=== encode ===
ruby 3.4.0dev (2024-08-26T08:40:45Z string-append-bytes 28a1b94c15) +YJIT [arm64-darwin23]
Warming up --------------------------------------
        upstream/jit    13.000 i/100ms
      protoboeuf/jit     5.000 i/100ms
     pboeuf-edge/jit     5.000 i/100ms
Calculating -------------------------------------
        upstream/jit    126.321 (± 4.7%) i/s -    637.000 in   5.053302s
      protoboeuf/jit     51.886 (± 3.9%) i/s -    260.000 in   5.017310s
     pboeuf-edge/jit     58.609 (± 3.4%) i/s -    295.000 in   5.041832s

Comparison:
        upstream/jit:      126.3 i/s
     pboeuf-edge/jit:       58.6 i/s - 2.16x  slower
      protoboeuf/jit:       51.9 i/s - 2.43x  slower
```
  • Loading branch information
byroot committed Aug 26, 2024
1 parent 81cbb65 commit e2aeb72
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 33 deletions.
17 changes: 15 additions & 2 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ rb_files = proto_files.pathmap("#{BASE_DIR}/test/fixtures/%n_pb.rb")

BENCHMARK_UPSTREAM_PB = "bench/lib/upstream/benchmark_pb.rb"
BENCHMARK_PROTOBOEUF_PB = "bench/lib/protoboeuf/benchmark_pb.rb"
BENCHMARK_PROTOBOEUF_EDGE_PB = "bench/lib/protoboeuf-edge/benchmark_pb.rb"

well_known_types = Rake::FileList[File.join(BASE_DIR, "lib/protoboeuf/protobuf/*.proto")]

Expand Down Expand Up @@ -73,7 +74,19 @@ file BENCHMARK_PROTOBOEUF_PB => ["bench/fixtures/benchmark.proto"] + codegen_rb_
unit.file.each { |f| f.package = "proto_boeuf" }
gen = ProtoBoeuf::CodeGen.new(unit)

File.binwrite(t.name, gen.to_ruby)
File.binwrite(t.name, gen.to_ruby(nil, { append_bytes: false }))
end

# This is a file task to generate an rb file from benchmark.proto
file BENCHMARK_PROTOBOEUF_EDGE_PB => ["bench/fixtures/benchmark.proto"] + codegen_rb_files do |t|
mkdir_p "bench/lib/protoboeuf-edge"
codegen_rb_files.each { |f| require_relative f }

unit = ProtoBoeuf.parse_file(t.source)
unit.file.each { |f| f.package = "proto_boeuf_edge" }
gen = ProtoBoeuf::CodeGen.new(unit)

File.binwrite(t.name, gen.to_ruby(nil, { append_bytes: true }))
end

Rake::TestTask.new do |t|
Expand All @@ -88,7 +101,7 @@ task gen_proto: rb_files
task test: [:gen_proto, :well_known_types]
task default: :test

task bench: [BENCHMARK_UPSTREAM_PB, BENCHMARK_PROTOBOEUF_PB] do
task bench: [BENCHMARK_UPSTREAM_PB, BENCHMARK_PROTOBOEUF_PB, BENCHMARK_PROTOBOEUF_EDGE_PB] do
rm_rf "bench/tmp"
mkdir_p "bench/tmp"

Expand Down
16 changes: 13 additions & 3 deletions bench/benchmark.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

require "protoboeuf/parser"
require "protoboeuf/benchmark_pb"
require "protoboeuf-edge/benchmark_pb"
require "upstream/benchmark_pb"
require "benchmark/ips"

Expand Down Expand Up @@ -32,7 +33,7 @@ def gen_fake_field_val(type_map, field)
rand < 0.5
when :TYPE_STRING
# TODO: better random strings with variable lengths
"foobar" + "_foo" * rand(0..8)
"foobar" + "_foo" * rand(0..8)
when :TYPE_UINT64, :TYPE_INT32, :TYPE_SINT32, :TYPE_UINT32, :TYPE_INT64,
:TYPE_SINT64, :TYPE_FIXED64, :TYPE_FIXED32, :TYPE_SFIXED32,
:TYPE_SFIXED64, :TYPE_ENUM
Expand Down Expand Up @@ -152,6 +153,7 @@ def gen_walk_fn(type_def)
# Decode the messages using protoboeuf so we can re-encode them for the encoding benchmark
# We do this because ProtoBoeuf can't directly encode Google's protobuf message classes
decoded_msgs_proto = encoded_bins.map { |bin| ProtoBoeuf::ParkingLot.decode(bin) }
edge_decoded_msgs_proto = encoded_bins.map { |bin| ProtoBoeufEdge::ParkingLot.decode(bin) }

version = RubyVM::YJIT.enabled? ? "/jit" : "/interp"

Expand All @@ -174,10 +176,18 @@ def gen_walk_fn(type_def)
end

puts "=== encode ==="
before_gc = GC.count
Benchmark.ips do |x|
x.report("upstream#{version}") { fake_msgs.each { |msg| Upstream::ParkingLot.encode(msg) } }
x.report("protoboeuf#{version}") { decoded_msgs_proto.each { |msg| ProtoBoeuf::ParkingLot.encode(msg) } }
# Call String#clear to appease GC. Each iteration generated ~5MiB of strings. Every ~30MiB malloced
# GC triggers, so by clearing these strings we reduce GC triggers, reducing variance.
# On my machine adding these clear reduce GC triggers from 445 to 248.

x.report("upstream#{version}") { fake_msgs.each { |msg| Upstream::ParkingLot.encode(msg).clear } }
x.report("protoboeuf#{version}") { decoded_msgs_proto.each { |msg| ProtoBoeuf::ParkingLot.encode(msg).clear } }
x.report("pboeuf-edge#{version}") { edge_decoded_msgs_proto.each { |msg| ProtoBoeufEdge::ParkingLot.encode(msg).clear } }

x.save!(File.join(ENV["BENCH_HOLD"], "encode.bench")) if ENV["BENCH_HOLD"]
x.compare!(order: :baseline)
end

puts "Encode GC count: #{GC.count - before_gc}"
78 changes: 50 additions & 28 deletions lib/protoboeuf/codegen.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,17 @@ class EnumCompiler
include TypeHelper

class << self
def result(enum, generate_types:)
new(enum, generate_types:).result
def result(enum, generate_types:, options: {})
new(enum, generate_types:, options:).result
end
end

attr_reader :enum

def initialize(enum, generate_types:)
def initialize(enum, generate_types:, options: {})
@enum = enum
@generate_types = generate_types
@options = options
end

def result
Expand Down Expand Up @@ -59,15 +60,15 @@ class MessageCompiler
include TypeHelper

class << self
def result(message, toplevel_enums, generate_types:, requires:, syntax:)
new(message, toplevel_enums, generate_types:, requires:, syntax:).result
def result(message, toplevel_enums, generate_types:, requires:, syntax:, options: {})
new(message, toplevel_enums, generate_types:, requires:, syntax:, options:).result
end
end

attr_reader :message, :fields, :oneof_fields, :syntax
attr_reader :optional_fields, :enum_field_types

def initialize(message, toplevel_enums, generate_types:, requires:, syntax:)
def initialize(message, toplevel_enums, generate_types:, requires:, syntax:, options:)
@message = message
@optional_field_bit_lut = []
@fields = @message.field
Expand All @@ -76,6 +77,7 @@ def initialize(message, toplevel_enums, generate_types:, requires:, syntax:)
@generate_types = generate_types
@has_submessage = false
@syntax = syntax
@options = options

@required_fields = []
@optional_fields = []
Expand Down Expand Up @@ -250,17 +252,6 @@ def encode_bool(field, value_expr, tagged)
RUBY
end

def encode_bytes(field, value_expr, tagged)
# Empty bytes is default value, so encodes nothing
<<~RUBY
val = #{value_expr}
if((bs = val.bytesize) > 0)
#{encode_tag_and_length(field, tagged, "bs")}
buff.concat(val.b)
end
RUBY
end

def encode_map(field, value_expr, tagged)
map_type = self.map_type(field)

Expand Down Expand Up @@ -357,13 +348,44 @@ def encode_repeated(field, value_expr, tagged)

def encode_string(field, value_expr, tagged)
# Empty string is default value, so encodes nothing
<<~RUBY
val = #{value_expr}
if((len = val.bytesize) > 0)
#{encode_tag_and_length(field, tagged, "len")}
buff << (val.ascii_only? ? val : val.b)
end
RUBY
if String.method_defined?(:append_bytes) && @options[:append_bytes] != false
<<~RUBY
val = #{value_expr}
if((len = val.bytesize) > 0)
#{encode_tag_and_length(field, tagged, "len")}
buff.append_bytes(val)
end
RUBY
else
<<~RUBY
val = #{value_expr}
if((len = val.bytesize) > 0)
#{encode_tag_and_length(field, tagged, "len")}
buff << (val.ascii_only? ? val : val.b)
end
RUBY
end
end

def encode_bytes(field, value_expr, tagged)
# Empty bytes is default value, so encodes nothing
if String.method_defined?(:append_bytes) && @options[:append_bytes] != false
<<~RUBY
val = #{value_expr}
if((bs = val.bytesize) > 0)
#{encode_tag_and_length(field, tagged, "bs")}
buff.append_bytes(val)
end
RUBY
else
<<~RUBY
val = #{value_expr}
if((bs = val.bytesize) > 0)
#{encode_tag_and_length(field, tagged, "bs")}
buff.concat(val.b)
end
RUBY
end
end

def encode_message(field, value_expr, tagged)
Expand Down Expand Up @@ -586,7 +608,7 @@ def enums

def constants
message.nested_type.reject { |x| x.options&.map_entry }.map do |x|
self.class.new(x, enum_field_types, generate_types:, requires:, syntax:).result
self.class.new(x, enum_field_types, generate_types:, requires:, syntax:, options: @options).result
end.join("\n")
end

Expand Down Expand Up @@ -1621,7 +1643,7 @@ def initialize(ast, generate_types: false)
@generate_types = generate_types
end

def to_ruby(this_file = nil)
def to_ruby(this_file = nil, options = {})
requires = Set.new
@ast.file.each do |file|
modules = resolve_modules(file)
Expand All @@ -1632,9 +1654,9 @@ def to_ruby(this_file = nil)
head += "\n"

toplevel_enums = file.enum_type.group_by(&:name)
body = file.enum_type.map { |enum| EnumCompiler.result(enum, generate_types:) }.join + "\n"
body = file.enum_type.map { |enum| EnumCompiler.result(enum, generate_types:, options:) }.join + "\n"
body += file.message_type.map do |message|
MessageCompiler.result(message, toplevel_enums, generate_types:, requires:, syntax: file.syntax)
MessageCompiler.result(message, toplevel_enums, generate_types:, requires:, syntax: file.syntax, options:)
end.join

head += requires.reject { |r| r == this_file }.map { |r| "require #{r.dump}" }.join("\n") + "\n\n"
Expand Down

0 comments on commit e2aeb72

Please sign in to comment.