From 74148b3e55adb1e6eb7935d05ab7317f41b8f4b0 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Mon, 26 Aug 2024 19:41:44 -0400 Subject: [PATCH 1/9] Clean up and flesh out AMO section of overview list Add atomic fetch/swap to the list, make it clear CAS is conditional, and unify some language ("a PE" -> "the PE", "returns with" -> "returns", "that symmetric data" -> "the symmetric data") --- content/programming_model_overview.tex | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index a76c99de2..534058feb 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -81,9 +81,12 @@ \item \textbf{\acfp{AMO}} \begin{enumerate} - \item \OPR{Swap}: The \ac{PE} initiating the swap gets the old value of a - symmetric data object from a remote \ac{PE} and copies a new value to - that symmetric data object on the remote \ac{PE}. + \item \OPR{Fetch}: The \ac{PE} initiating the fetch returns the value of the + symmetric data object on the remote \ac{PE}. + \item \OPR{Set}: The \ac{PE} initiating the set copies a new value to the + symmetric data object on the remote \ac{PE}. + \item \OPR{Swap}: The \ac{PE} initiating the swap copies a new value to the + symmetric data object on the remote \ac{PE} and returns the old value. \item \OPR{Increment}: The \ac{PE} initiating the increment adds 1 to the symmetric data object on the remote \ac{PE}. \item \OPR{Add}: The \ac{PE} initiating the add specifies the value to be added @@ -91,14 +94,14 @@ \item \OPR{Bitwise Operations}: The \ac{PE} initiating the bitwise operation specifies the operand value to the bitwise operation to be performed on the symmetric data object on the remote \ac{PE}. - \item \OPR{Compare and Swap}: The \ac{PE} initiating the swap gets the old value - of the symmetric data object based on a value to be compared and copies a - new value to the symmetric data object on the remote \ac{PE}. - \item \OPR{Fetch and Increment}: The \ac{PE} initiating the increment adds 1 to - the symmetric data object on the remote \ac{PE} and returns with the old + \item \OPR{Compare and Swap}: The \ac{PE} initiating the compare and swap + conditionally copies a new value to the symmetric data object on the + remote \ac{PE} and returns the old value. + \item \OPR{Fetch and Increment}: The \ac{PE} initiating the increment adds 1 + to the symmetric data object on the remote \ac{PE} and returns the old value. \item \OPR{Fetch and Add}: The \ac{PE} initiating the add specifies the value to - be added to the symmetric data object on the remote \ac{PE} and returns with + be added to the symmetric data object on the remote \ac{PE} and returns the old value. \item \OPR{Fetch and Bitwise Operations}: The \ac{PE} initiating the bitwise operation specifies the operand value to the bitwise operation to be From 6d0655bd4bd8ac9419dc4f4b1bf000caea7bb1c8 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 07:56:58 -0400 Subject: [PATCH 2/9] Clean up and add new routines to collective section of the overview list Add scan, avoid active set language, don't say broadcast avoids copying to self since the teams based version does do that. --- content/programming_model_overview.tex | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index 534058feb..6c8cf8a69 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -138,7 +138,7 @@ \begin{enumerate} \item \OPR{Broadcast}: The \VAR{root} \ac{PE} specifies a symmetric data object to be copied to a symmetric data object on one or more remote - \acp{PE} (not including itself). + \acp{PE}. \item \OPR{Collection}: All \acp{PE} participating in the routine get the result of concatenated symmetric objects contributed by each of the \acp{PE} in another symmetric data object. @@ -146,8 +146,11 @@ of an associative binary routine over elements of the specified symmetric data object on another symmetric data object. \item \OPR{All-to-All}: All \acp{PE} participating in the routine exchange - a fixed amount of contiguous or strided data with all other \acp{PE} - in the active set. + a fixed amount of contiguous or strided data with all other participating + \acp{PE}. + \item \OPR{Scan}: All \acp{PE} participating in the routine perform an + inclusive or exclusive prefix sum over elements of the specified + symmetric data object. \end{enumerate} \item \textbf{Mutual Exclusion} From 17b6eeec3fb90f34c9607ae5cc56375940064f2f Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 08:27:38 -0400 Subject: [PATCH 3/9] Note allocation routines are collective in overview list This makes the collective nature more explicit to match language in the allocation section. --- content/programming_model_overview.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index 6c8cf8a69..1fc4d5087 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -43,11 +43,11 @@ \item \textbf{Symmetric Data Object Management} \begin{enumerate} - \item \OPR{Allocation}: All executing \acp{PE} must participate in the + \item \OPR{Allocation}: All executing \acp{PE} must collectively participate in the allocation of a symmetric data object with identical arguments. - \item \OPR{Deallocation}: All executing \acp{PE} must participate in the + \item \OPR{Deallocation}: All executing \acp{PE} must collectively participate in the deallocation of the same symmetric data object with identical arguments. - \item \OPR{Reallocation}: All executing \acp{PE} must participate in the + \item \OPR{Reallocation}: All executing \acp{PE} must collectively participate in the reallocation of the same symmetric data object with identical arguments. \end{enumerate} From 68b4caf620021ceff3a2b4a52fb2fd18f44a2957 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 08:27:59 -0400 Subject: [PATCH 4/9] Clean up and add new routines to the signaling section of the overview list Update put signal to match the language of a regular put more closely and add signal set/add/fetch. --- content/programming_model_overview.tex | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index 1fc4d5087..d95d17786 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -111,9 +111,16 @@ \item \textbf{Signaling Operations} \begin{enumerate} - \item \OPR{Signaling Put}: The \source{} data is copied to the symmetric - object on the remote \ac{PE} and a flag on the remote \ac{PE} is subsequently - updated to signal completion. + \item \OPR{Put Signal}: The local \ac{PE} specifies the \source{} data object + to be copied to the symmetric data object on the remote \ac{PE} and + another symmetric data object on the remote \ac{PE} is subsequently + updated to signal completion. + \item \OPR{Signal Add}: The local \ac{PE} specifies a value to be added to + the symmetric data object on the remote \ac{PE}. + \item \OPR{Signal Set}: The local \ac{PE} specifies a value to be copied to + the symmetric data object on the remote \ac{PE}. + \item \OPR{Signal Fetch}: The local \ac{PE} returns the value of a local data + object. \end{enumerate} \item \textbf{Synchronization and Ordering} From 83d8bd609d7557b257071045f9c92354353710b8 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 09:20:35 -0400 Subject: [PATCH 5/9] Add sessions to overview list --- content/programming_model_overview.tex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index d95d17786..6d53ece1e 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -123,6 +123,13 @@ object. \end{enumerate} +\item \textbf{Session Management} +\begin{enumerate} + \item \OPR{Sessions}: Sessions are a mechanism for the application to inform + the implementation about an upcoming sequence of operations that exhibit + a pattern that may be suitable for runtime optimization. +\end{enumerate} + \item \textbf{Synchronization and Ordering} \begin{enumerate} \item \OPR{Fence}: The \ac{PE} calling fence ensures ordering of From b2e630041080fb42ad42f281cf1667fe13c11054 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 10:45:05 -0400 Subject: [PATCH 6/9] Add shmem_team_ptr to memory model text about getting local pointer --- content/memory_model.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/memory_model.tex b/content/memory_model.tex index 20f46b37a..58cf97771 100644 --- a/content/memory_model.tex +++ b/content/memory_model.tex @@ -71,7 +71,7 @@ \subsection{Pointers to Symmetric Objects}\label{subsec:pointers_to_symmetric_ob The ``mem'' interfaces (e.g., \FUNC{shmem\_putmem}) have no alignment requirements. -The \FUNC{shmem\_ptr} routine allows the programmer to query a {\em local +The \FUNC{shmem\_ptr} and \FUNC{shmem\_team\_ptr} routines allow the application to query a {\em local address} to a remotely accessible data object at a specified \ac{PE}. The resulting pointer is valid for direct memory access; however, providing this address as an argument of an \openshmem routine that requires a symmetric From 91658e897238a1cf16dd506921cb24e9b9566c30 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 10:49:35 -0400 Subject: [PATCH 7/9] Update atomic example 3 to avoid deprecated active-set collective Switch example 3 from `shmem_int_sum_to_all` to `shmem_int_sum_reduce` since the former is deprecated. --- example_code/amo_scenario_3.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/example_code/amo_scenario_3.c b/example_code/amo_scenario_3.c index 93586779c..2091b09f8 100644 --- a/example_code/amo_scenario_3.c +++ b/example_code/amo_scenario_3.c @@ -1,19 +1,14 @@ #include int main(void) { - static long psync[SHMEM_REDUCE_SYNC_SIZE]; - static int pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; static int x = 0, y = 0; - for (int i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) - psync[i] = SHMEM_SYNC_VALUE; - shmem_init(); shmem_int_atomic_inc(&x, (shmem_my_pe() + 1) % shmem_n_pes()); /* Undefined behavior: The following reduction operation performs accesses to * symmetric variable 'x' that are concurrent with previously issued atomic * increment operations on the same variable. */ - shmem_int_sum_to_all(&y, &x, 1, 0, 0, shmem_n_pes(), pwrk, psync); + shmem_int_sum_reduce(SHMEM_TEAM_WORLD, &y, &x, 1); shmem_finalize(); return 0; From 84271957051b3334186fdf712126bec729c9f86c Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Tue, 27 Aug 2024 11:57:00 -0400 Subject: [PATCH 8/9] Add session constants to the library constant table Add `SHMEM_CTX_SESSION_TOTAL_OPS` and `SHMEM_CTX_SESSION_BATCH` to the library constant table (text based `SHMEM_TEAM_NUM_CONTEXTS` and `SHMEM_CTX_NOSTORE` respectively since they have similar uses) --- content/library_constants.tex | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/content/library_constants.tex b/content/library_constants.tex index 0a0194de2..b0edb9cf1 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -84,6 +84,19 @@ See Section~\ref{subsec:shmem_ctx_create} for more detail about its use. \tabularnewline \hline %% +\LibConstDecl{SHMEM\_CTX\_SESSION\_TOTAL\_OPS} & +The bitwise flag which specifies that a session start routine should use the +\VAR{total\_ops} member of the provided \CTYPE{shmem\_ctx\_session\_config\_t} +configuration parameter as a hint. See \ref{subsec:shmem_ctx_session_config_t} +for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_CTX\_SESSION\_BATCH} & +The session start option which specifies that operations in the given session +are latency tolerant and may be candidates for batching. See +\ref{subsec:shmem_ctx_session_start} for more detail about its use. +\tabularnewline \hline +%% \LibConstDecl{SHMEM\_SIGNAL\_SET} & An integer constant expression corresponding to the signal update set operation. See Section~\ref{subsec:shmem_put_signal} and From 8ef4cb03deb0e5e8894bcb2c768f8e8f6bbdcade Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Wed, 4 Sep 2024 10:31:53 -0400 Subject: [PATCH 9/9] Add initial list of 1.6 contributors --- content/coverpage.tex | 54 ++++++++++++++++++++++++++++++++++++++++++- utils/defs.tex | 4 ++-- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/content/coverpage.tex b/content/coverpage.tex index a5b3df318..e26a54cac 100644 --- a/content/coverpage.tex +++ b/content/coverpage.tex @@ -47,8 +47,60 @@ \section*{Sponsored by} \end{itemize} \section*{Authors and Collaborators} -This document is a collaborative effort consisting of several releases of \openshmem versions 1.0 through 1.5. This section lists the authors and contributors in reverse chronological order, starting with \openshmem 1.5. +This document is a collaborative effort consisting of several releases of \openshmem versions 1.0 through 1.6. This section lists the authors and contributors in reverse chronological order, starting with \openshmem 1.6. +\subsection*{\openshmem 1.6} +\begin{multicols}{2} +\begin{itemize} +\setlength\itemsep{0.1em} +\item Ferrol Aderholdt, NVIDIA +\item Muhammad Awad, \ac{AMD} +\item Matthew Baker, \ac{ORNL} +\item Swen Boehm, \ac{ORNL} +\item Aurelien Bouteiller, \ac{UTK} +\item Mark Brown, Intel +\item Bob Cernohous, \ac{HPE} +\item James Dinan\footnotemark[1], NVIDIA +\item Megan Grodowitz, Arm Inc. +\item Max Grossman, Georgia Tech +\item Yanfei Guo, \ac{ANL} +\item Khaled Hamidouche, NVIDIA +\item Jeff Hammond, NVIDIA +\item Akihiro Hayashi, Georgia Tech +\item Oscar Hernandez, \ac{ORNL} +\item Kieran Holland, Intel +\item Robert Kierski, \ac{HPE} +\item Bryant Lam, \ac{DoD} +\item Akhil Langer, NVIDIA +\item Tiffany M. Mintz, \ac{ORNL} +\item Bryan Morgan, Intel +\item William Okuno\footnotemark[2], \ac{HPE} +\item David Ozog\footnotemark[5], Intel +\item Nicholas Park, \ac{DoD} +\item Wendy Poole, \ac{LANL} +\item Steve Poole\footnotemark[6], \ac{OSSS} +\item Swaroop Pophale, \ac{ORNL} +\item Sreeram Potluri, NVIDIA +\item Brandon Potter\footnotemark[4], \ac{AMD} +\item Howard Pritchard, \ac{LANL} +\item Md. Wasi-ur- Rahman\footnotemark[11], Intel +\item Naveen Ravichandrasekaran\footnotemark[9], \ac{HPE} +\item Michael Raymond, \ac{HPE} +\item Elliot Ronaghan\footnotemark[8], \ac{HPE} +\item James Ross, \ac{ARL} +\item Pavel Shamis, NVIDIA +\item Sameer Shende, \ac{UO} +\item Danielle Sikich, \ac{HPE} +\item Brian Smith, Cornelis Networks +\item Lawrence Stewart\footnotemark[7], Intel +\item Zach Tiffany, NVIDIA +\item Manjunath Gorentla Venkata\footnotemark[10], NVIDIA +\item Kevin Waters\footnotemark[3], \ac{DoD} +\item Aaron Welch, \ac{ORNL} +\item Nathan Wichmann, \ac{HPE} +\item Jeffrey Young, Georgia Tech +\end{itemize} +\end{multicols} \subsection*{\openshmem 1.5} \begin{multicols}{2} diff --git a/utils/defs.tex b/utils/defs.tex index 771ba8a7b..0a298f439 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -105,7 +105,7 @@ \acro{API}{\emph{Application Programming Interface}} \acro{MPI}{\emph{Message Passing Interface}} \acro{SPMD}{\emph{Single Program Multiple Data}} -\acro{ANL}{Argonne National Labratory} +\acro{ANL}{Argonne National Laboratory} \acro{ARL}{Army Research Laboratory} \acro{AMD}{Advanced Micro Devices} \acro{MPMD}{\emph{Multiple Program Multiple Data}} @@ -120,7 +120,7 @@ \acro{SGI}{Silicon Graphics International} \acro{DoD}{U.S. Department of Defense} \acro{SBU}{Stonybrook University} -\acro{UTK}{University of Tenneesee at Knoxville} +\acro{UTK}{University of Tennessee at Knoxville} \acro{HPE}{Hewlett Packard Enterprise} \end{acronym}