Skip to content

Commit

Permalink
Add mirrored control bytes to enable seamless wraparound in probing
Browse files Browse the repository at this point in the history
- Introduced mirrored control bytes at the end of the control byte array to allow
  seamless wraparound during group probing.
- This mirrors the first 16 bytes (GroupSize) of the control array at the end,
  enabling efficient SIMD-based probing across boundaries without additional
  boundary checks or wraparound logic.
- This design improvement allows continuous group probing by treating the control
  byte array as circular, thereby enhancing performance, reducing branching, and
  simplifying the probe sequence.
  • Loading branch information
Wsm2110 committed Oct 29, 2024
1 parent 50c48a9 commit c2046ff
Showing 1 changed file with 33 additions and 18 deletions.
51 changes: 33 additions & 18 deletions src/DenseMap.cs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ public IEnumerable<TValue> Values
}
}

private int _groupWidth = 16;

#endregion

#region Fields
Expand Down Expand Up @@ -228,10 +230,10 @@ public DenseMap(uint length, double loadFactor)
_maxLookupsBeforeResize = (uint)(_length * _loadFactor);
_comparer = EqualityComparer<TKey>.Default;
_shift = (byte)(_shift - BitOperations.Log2(_length));
_entries = new Entry[_length + 16];

_controlBytes = new sbyte[_length + 16];
_entries = new Entry[_length];

// Calculate max tombstones before rehash based on the dynamic weight factor
_maxTombstoneBeforeRehash = _length * _baseThreshold;

Array.Fill(_controlBytes, _emptyBucket);
Expand Down Expand Up @@ -334,7 +336,7 @@ public void Emplace(TKey key, TValue value)
entry.Value = value;
// Update the control byte at position `i` in `_controlBytes` to `h2`, marking it as occupied.
// The control byte typically indicates the status of the slot (occupied, empty, or tombstone).
Find(_controlBytes, i) = h2;
SetCtrl(i, h2);
// Increment the total count of entries in the hash table, reflecting the new insertion.
Count++;
// Return immediately to indicate the insertion is complete.
Expand Down Expand Up @@ -379,7 +381,7 @@ public bool Get(TKey key, out TValue value)
var target = Vector128.Create(h2);
// This operation ensures that `index` is in the range [0, capacity - 1] by using only the lower bits of `hashcode`,
// which helps in efficient and quick indexing.
uint index = hashcode & _lengthMinusOne;
uint index = hashcode & _lengthMinusOne;
// Initialize a variable to keep track of the distance to jump when probing the map.
uint jumpDistance = 0;

Expand All @@ -392,16 +394,13 @@ public bool Get(TKey key, out TValue value)
// Compare the target vector (hashed key) with the loaded source vector to find matches.
// `ExtractMostSignificantBits()` returns a mask where each bit set indicates a match.
var mask = Vector128.Equals(target, source).ExtractMostSignificantBits();

// Process any matches indicated by the mask.
while (mask != 0)
{
// Get the position of the first set bit in the mask (indicating a match).
var bitPos = BitOperations.TrailingZeroCount(mask);

// Retrieve the entry corresponding to the matched bit position within the map's entries.
var entry = Find(_entries, index + Unsafe.As<int, byte>(ref bitPos));

// Check if the entry's key matches the specified key using the equality comparer.
if (_comparer.Equals(entry.Key, key))
{
Expand Down Expand Up @@ -514,7 +513,7 @@ public ref TValue GetValueRefOrAddDefault(TKey key)
// Set the key for the located entry to the specified `key`.
entry.Key = key;
// Set the control byte for the entry at position `i` to `h2` to mark it as occupied.
Find(_controlBytes, i) = h2;
SetCtrl(i, h2);
// Increment the total count of entries in the hash table.
Count++;

Expand Down Expand Up @@ -613,7 +612,6 @@ public bool Update(TKey key, TValue value)
}
}


/// <summary>
/// Removes a key and value from the map.
/// Example:
Expand Down Expand Up @@ -683,11 +681,11 @@ public bool Remove(TKey key)

if (emptyMask > 0)
{
Find(_controlBytes, i) = _emptyBucket;
SetCtrl(i, _emptyBucket);
}
else
{
Find(_controlBytes, i) = _tombstone;
SetCtrl(i, _tombstone);
_tombstoneCounter++;
}

Expand Down Expand Up @@ -745,7 +743,7 @@ public bool Contains(TKey key)
var target = Vector128.Create(h2);
// This operation ensures that `index` is in the range [0, capacity - 1] by using only the lower bits of `hashcode`,
// which helps in efficient and quick indexing.
uint index = hashcode & _lengthMinusOne;
uint index = hashcode & _lengthMinusOne;
// Initialize `jumpDistance` to control the distance between probes, starting at zero.
uint jumpDistance = 0;

Expand All @@ -755,17 +753,14 @@ public bool Contains(TKey key)
// Load a vector from `_controlBytes` at the calculated index.
// `_controlBytes` holds metadata about each slot in the map.
var source = Vector128.LoadUnsafe(ref Find(_controlBytes, index));

// Compare `source` with `target`, and `ExtractMostSignificantBits` returns a bitmask
// where each set bit indicates a position in `source` that matches `target`.
var mask = Vector128.Equals(source, target).ExtractMostSignificantBits();

// Process each match indicated by the bits set in `mask`.
while (mask != 0)
{
// Get the position of the first set bit in `mask`, indicating a potential key match.
var bitPos = BitOperations.TrailingZeroCount(mask);

// Check if the entry at this position has a key that matches the specified key.
// Use `_comparer` to ensure accurate key comparison.
if (_comparer.Equals(Find(_entries, index + Unsafe.As<int, uint>(ref bitPos)).Key, key))
Expand Down Expand Up @@ -892,9 +887,9 @@ private void Resize()
var oldEntries = _entries;
var oldMetadata = _controlBytes;

var size = Unsafe.As<uint, int>(ref _length) + 16;
var size = Unsafe.As<uint, int>(ref _length);

_controlBytes = GC.AllocateArray<sbyte>(size);
_controlBytes = GC.AllocateUninitializedArray<sbyte>(size + 16);
_entries = GC.AllocateArray<Entry>(size);

_controlBytes.AsSpan().Fill(_emptyBucket);
Expand All @@ -921,7 +916,7 @@ private void Resize()
var bitPos = BitOperations.TrailingZeroCount(mask);
index += Unsafe.As<int, uint>(ref bitPos);

Find(_controlBytes, index) = h2;
SetCtrl(i, h2);
Find(_entries, index) = entry;
break;
}
Expand Down Expand Up @@ -971,6 +966,26 @@ internal static uint ResetLowestSetBit(uint value)
return value & (value - 1);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal void SetCtrl(uint index, sbyte ctrl)
{
// When a probe sequence reaches the last few slots in the control array(e.g., starting at slot 60 in a 64 - slot table), loading a group of 16 bytes would naturally overflow beyond the array bounds, leading to an out-of - bounds memory read.
// By duplicating the first GroupSize control bytes(e.g., the first 16 bytes) at the end of the control array, SwissTable allows a single SIMD load to wrap seamlessly from the end of the array to the beginning, without any special handling.

// Imagine a control byte array for a hash table with 64 slots, where each slot has a corresponding control byte.If we use SIMD instructions to check 16 bytes at a time, we need an array of control bytes with an extra 16 bytes at the end to mirror the start of the array:

// Control Byte Array Size: capacity + GroupSize(e.g., 64 + 16 = 80 bytes).
// Mirrored Bytes: The last 16 bytes(indices 64–79) are duplicates of the first 16 bytes(indices 0–15).
// So, when probing in a table of 64 slots:

// If probing starts at index 60, a SIMD load from 60–75 will read indices 60–63 in the main array and wrap into the mirrored control bytes(64–75), which mirror indices 0–11.
// This allows the probe to seamlessly continue from the end of the array to the start, simulating a circular array without needing additional checks or modular arithmetic.

var index2 = ((index - _groupWidth) & _lengthMinusOne) + _groupWidth;
Find(_controlBytes, index) = ctrl;
Find(_controlBytes, Unsafe.As<long, int>(ref index2)) = ctrl;
}

#endregion

[StructLayout(LayoutKind.Sequential)]
Expand Down

0 comments on commit c2046ff

Please sign in to comment.