Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add benchmark for tfidf #239

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
"-k",
"inner_bf16"
],
"justMyCode": true
"justMyCode": false
}
]
}
55 changes: 43 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 17 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,27 @@ include = ["/rust/**", "/c/**", "/include/**", "/build.rs"]
name = "simsimd"
path = "rust/lib.rs"

[[bin]]
name = "spdot"
path = "scripts/spdot.rs"

[build-dependencies]
cc = "1.0.83"

[[bench]]
name = "sqeuclidean"
harness = false
path = "scripts/bench_sqeuclidean.rs"

[[bench]]
name = "cosine"
name = "tfidf"
harness = false
path = "scripts/bench_cosine.rs"
path = "scripts/bench_tfidf.rs"

[[bench]]
name = "sqeuclidean"
name = "spdot"
harness = false
path = "scripts/bench_sqeuclidean.rs"
path = "scripts/bench_spdot.rs"

[profile.bench]
opt-level = 3 # Corresponds to -O3
Expand All @@ -45,5 +53,9 @@ rpath = false # On some systems, setting this to false can help with optimiz

[dev-dependencies]
criterion = { version = "0.5.1" }
rand = { version = "0.8.5" }
half = { version = "2.4.0" }

[dependencies]
regex = "1.11.1"
rand = { version = "0.8.5" }

1 change: 1 addition & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ fn main() {

println!("cargo:rerun-if-changed=include/simsimd/dot.h");
println!("cargo:rerun-if-changed=include/simsimd/spatial.h");
println!("cargo:rerun-if-changed=include/simsimd/sparse.h");
println!("cargo:rerun-if-changed=include/simsimd/probability.h");
println!("cargo:rerun-if-changed=include/simsimd/binary.h");
println!("cargo:rerun-if-changed=include/simsimd/types.h");
Expand Down
41 changes: 41 additions & 0 deletions c/lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,45 @@ extern "C" {
metric(a, b, a_length, b_length, result); \
}

#define SIMSIMD_DECLARATION_SPARSE(name, extension, type) \
SIMSIMD_DYNAMIC void simsimd_##name##_##extension(simsimd_##type##_t const *a, simsimd_##type##_t const *b, \
simsimd_size_t a_length, simsimd_size_t b_length, \
simsimd_distance_t *result) { \
static simsimd_metric_sparse_punned_t metric = 0; \
if (metric == 0) { \
simsimd_capability_t used_capability; \
simsimd_find_kernel_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \
simsimd_capabilities(), simsimd_cap_any_k, \
(simsimd_kernel_punned_t *)(&metric), &used_capability); \
if (!metric) { \
*(simsimd_u64_t *)result = 0x7FF0000000000001ull; \
return; \
} \
} \
metric(a, b, a_length, b_length, result); \
}

#define SIMSIMD_DECLARATION_SPARSE_DOT(name, extension, type) \
SIMSIMD_DYNAMIC void simsimd_##name##_##extension(simsimd_##type##_t const *a, simsimd_##type##_t const *b, \
simsimd_##extension##_t const *a_weights, \
simsimd_##extension##_t const *b_weights, \
simsimd_size_t a_length, simsimd_size_t b_length, \
simsimd_distance_t *result) { \
static simsimd_metric_sparse_weight_punned_t metric = 0; \
if (metric == 0) { \
simsimd_capability_t used_capability; \
simsimd_find_kernel_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \
simsimd_capabilities(), simsimd_cap_any_k, \
(simsimd_kernel_punned_t *)(&metric), &used_capability); \
if (!metric) { \
*(simsimd_u64_t *)result = 0x7FF0000000000001ull; \
return; \
} \
} \
metric(a, b, a_weights, b_weights, a_length, b_length, result); \
}


#define SIMSIMD_DECLARATION_CURVED(name, extension, type) \
SIMSIMD_DYNAMIC void simsimd_##name##_##extension(simsimd_##type##_t const *a, simsimd_##type##_t const *b, \
simsimd_##type##_t const *c, simsimd_size_t n, \
Expand Down Expand Up @@ -195,6 +234,8 @@ SIMSIMD_DECLARATION_DENSE(js, f64, f64)
// Sparse sets
SIMSIMD_DECLARATION_SPARSE(intersect, u16, u16)
SIMSIMD_DECLARATION_SPARSE(intersect, u32, u32)
// Sparse dot
SIMSIMD_DECLARATION_SPARSE_DOT(spdot_weights, f32, u16)

// Curved spaces
SIMSIMD_DECLARATION_CURVED(bilinear, f64, f64)
Expand Down
12 changes: 12 additions & 0 deletions include/simsimd/simsimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,12 @@ typedef void (*simsimd_metric_sparse_punned_t)(void const *a, void const *b,
simsimd_size_t a_length, simsimd_size_t b_length, //
simsimd_distance_t *d);


typedef void (*simsimd_metric_sparse_weight_punned_t)(void const *a, void const *b, //
void const *a_weights, void const *b_weights, //
simsimd_size_t a_length, simsimd_size_t b_length, //
simsimd_distance_t *d);

/**
* @brief Type-punned function pointer for curved vector spaces and similarity measures.
*
Expand Down Expand Up @@ -616,6 +622,8 @@ SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f32(simsimd_capability_t v, si
case simsimd_metric_kl_k: *m = (m_t)&simsimd_kl_f32_neon, *c = simsimd_cap_neon_k; return;
case simsimd_metric_fma_k: *m = (m_t)&simsimd_fma_f32_neon, *c = simsimd_cap_neon_k; return;
case simsimd_metric_wsum_k: *m = (m_t)&simsimd_wsum_f32_neon, *c = simsimd_cap_neon_k; return;
case simsimd_metric_spdot_weights_k: *m = (m_t)&simsimd_spdot_weights_u16_f32_neon, *c = simsimd_cap_serial_k; return;

default: break;
}
#endif
Expand Down Expand Up @@ -658,6 +666,7 @@ SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f32(simsimd_capability_t v, si
case simsimd_metric_mahalanobis_k: *m = (m_t)&simsimd_mahalanobis_f32_serial, *c = simsimd_cap_serial_k; return;
case simsimd_metric_fma_k: *m = (m_t)&simsimd_fma_f32_serial, *c = simsimd_cap_serial_k; return;
case simsimd_metric_wsum_k: *m = (m_t)&simsimd_wsum_f32_serial, *c = simsimd_cap_serial_k; return;
case simsimd_metric_spdot_weights_k: *m = (m_t)&simsimd_spdot_weights_u16_serial, *c = simsimd_cap_serial_k; return;
default: break;
}
}
Expand Down Expand Up @@ -1122,6 +1131,7 @@ SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_u16(simsimd_capability_t v, si
#endif
if (v & simsimd_cap_serial_k) switch (k) {
case simsimd_metric_intersect_k: *m = (m_t)&simsimd_intersect_u16_serial, *c = simsimd_cap_serial_k; return;
case simsimd_metric_spdot_weights_k: *m = (m_t)&simsimd_spdot_weights_u16_serial, *c = simsimd_cap_serial_k; return;
default: break;
}
}
Expand Down Expand Up @@ -2087,6 +2097,8 @@ SIMSIMD_PUBLIC void simsimd_spdot_weights_u16(simsimd_u16_t const *a, simsimd_u1
simsimd_spdot_weights_u16_sve2(a, b, a_weights, b_weights, a_length, b_length, d);
#elif SIMSIMD_TARGET_TURIN
simsimd_spdot_weights_u16_turin(a, b, a_weights, b_weights, a_length, b_length, d);
// #elif SIMSIMD_TARGET_NEON
// simsimd_spdot_weights_u16_neon(a, b, a_weights, b_weights, a_length, b_length, d);
#else
simsimd_spdot_weights_u16_serial(a, b, a_weights, b_weights, a_length, b_length, d);
#endif
Expand Down
Loading