-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc++] Optimize std::getline #121346
Open
philnik777
wants to merge
1
commit into
llvm:main
Choose a base branch
from
philnik777:optimize_getline_string
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
[libc++] Optimize std::getline #121346
+78
−26
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Contributor
philnik777
commented
Dec 30, 2024
``` ----------------------------------------------- Benchmark old new ----------------------------------------------- BM_getline_string 318 ns 32.4 ns ```
philnik777
force-pushed
the
optimize_getline_string
branch
from
December 31, 2024 09:39
9e60122
to
60ab33d
Compare
llvmbot
added
the
libc++
libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi.
label
Dec 31, 2024
@llvm/pr-subscribers-libcxx Author: Nikolas Klauser (philnik777) Changes
Full diff: https://github.com/llvm/llvm-project/pull/121346.diff 4 Files Affected:
diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index c8a07fb8b73348..05881722672dda 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -73,6 +73,8 @@ Improvements and New Features
optimized, resulting in a performance improvement of up to 2x for trivial element types (e.g., `std::vector<int>`),
and up to 3.4x for non-trivial element types (e.g., `std::vector<std::vector<int>>`).
+- The performance of ``std::getline`` has been improved, resulting in a performance uplift of up to 10x.
+
Deprecations and Removals
-------------------------
diff --git a/libcxx/include/istream b/libcxx/include/istream
index 4b177c41cc325e..76ddab04bbdcdd 100644
--- a/libcxx/include/istream
+++ b/libcxx/include/istream
@@ -1265,41 +1265,49 @@ _LIBCPP_HIDE_FROM_ABI basic_istream<_CharT, _Traits>&
getline(basic_istream<_CharT, _Traits>& __is, basic_string<_CharT, _Traits, _Allocator>& __str, _CharT __dlm) {
ios_base::iostate __state = ios_base::goodbit;
typename basic_istream<_CharT, _Traits>::sentry __sen(__is, true);
- if (__sen) {
+ if (!__sen)
+ return __is;
# if _LIBCPP_HAS_EXCEPTIONS
- try {
+ try {
# endif
- __str.clear();
- streamsize __extr = 0;
- while (true) {
- typename _Traits::int_type __i = __is.rdbuf()->sbumpc();
- if (_Traits::eq_int_type(__i, _Traits::eof())) {
- __state |= ios_base::eofbit;
- break;
- }
- ++__extr;
- _CharT __ch = _Traits::to_char_type(__i);
- if (_Traits::eq(__ch, __dlm))
- break;
- __str.push_back(__ch);
- if (__str.size() == __str.max_size()) {
+ __str.clear();
+
+ auto& __buffer = *__is.rdbuf();
+
+ auto __next = __buffer.sgetc();
+ for (; !_Traits::eq_int_type(__next, _Traits::eof()); __next = __buffer.sgetc()) {
+ const auto* __first = __buffer.gptr();
+ const auto* __last = __buffer.egptr();
+ const auto* __match = _Traits::find(__first, __last - __first, __dlm);
+ if (__match) {
+ if (auto __cap = __str.max_size() - __str.size(); __cap <= static_cast<size_t>(__match - __first)) {
+ __str.append(__first, __cap);
+ __buffer.__gbump_ptrdiff(__cap);
__state |= ios_base::failbit;
break;
}
+ __str.append(__first, __match);
+ __buffer.__gbump_ptrdiff(__match - __first + 1);
+ break;
}
- if (__extr == 0)
- __state |= ios_base::failbit;
+
+ __str.append(__first, __last);
+ __buffer.__gbump_ptrdiff(__last - __first);
+ }
+
+ if (_Traits::eq_int_type(__next, _Traits::eof()))
+ __state |= ios_base::eofbit | (__str.empty() ? ios_base::failbit : ios_base::goodbit);
+
# if _LIBCPP_HAS_EXCEPTIONS
- } catch (...) {
- __state |= ios_base::badbit;
- __is.__setstate_nothrow(__state);
- if (__is.exceptions() & ios_base::badbit) {
- throw;
- }
+ } catch (...) {
+ __state |= ios_base::badbit;
+ __is.__setstate_nothrow(__state);
+ if (__is.exceptions() & ios_base::badbit) {
+ throw;
}
-# endif
- __is.setstate(__state);
}
+# endif
+ __is.setstate(__state);
return __is;
}
diff --git a/libcxx/include/streambuf b/libcxx/include/streambuf
index 7f02a9b3314110..a3e1cf489d0efa 100644
--- a/libcxx/include/streambuf
+++ b/libcxx/include/streambuf
@@ -241,6 +241,9 @@ protected:
inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void gbump(int __n) { __ninp_ += __n; }
+ // gbump takes an int, so it might not be able to represent the offset we want to add.
+ _LIBCPP_HIDE_FROM_ABI void __gbump_ptrdiff(ptrdiff_t __n) { __ninp_ += __n; }
+
inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void setg(char_type* __gbeg, char_type* __gnext, char_type* __gend) {
_LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gnext), "[gbeg, gnext) must be a valid range");
_LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gend), "[gbeg, gend) must be a valid range");
@@ -297,6 +300,10 @@ private:
char_type* __bout_;
char_type* __nout_;
char_type* __eout_;
+
+ template <class _CharT2, class _Traits2, class _Allocator>
+ _LIBCPP_HIDE_FROM_ABI friend basic_istream<_CharT2, _Traits2>&
+ getline(basic_istream<_CharT2, _Traits2>&, basic_string<_CharT2, _Traits2, _Allocator>&, _CharT2);
};
template <class _CharT, class _Traits>
diff --git a/libcxx/test/benchmarks/streams/getline.bench.cpp b/libcxx/test/benchmarks/streams/getline.bench.cpp
new file mode 100644
index 00000000000000..6a2215fe061167
--- /dev/null
+++ b/libcxx/test/benchmarks/streams/getline.bench.cpp
@@ -0,0 +1,35 @@
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+#include <istream>
+#include <sstream>
+
+#include <benchmark/benchmark.h>
+
+void BM_getline_string(benchmark::State& state) {
+ std::istringstream iss;
+
+ std::string str;
+ str.reserve(128);
+ iss.str("A long string to let getline do some more work, making sure that longer strings are parsed fast enough");
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(iss);
+
+ std::getline(iss, str);
+ benchmark::DoNotOptimize(str);
+ iss.seekg(0);
+ }
+}
+
+BENCHMARK(BM_getline_string);
+
+BENCHMARK_MAIN();
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.