From 59ea0e330a4ea57910687d8edbb50e86eb7dec93 Mon Sep 17 00:00:00 2001 From: Lawrence Stewart Date: Fri, 27 Sep 2024 13:21:51 -0400 Subject: [PATCH 01/18] add missing const specifiers --- content/shmem_test_all_vector.tex | 4 ++-- content/shmem_test_any_vector.tex | 4 ++-- content/shmem_test_some_vector.tex | 4 ++-- content/shmem_wait_until_all_vector.tex | 4 ++-- content/shmem_wait_until_any_vector.tex | 4 ++-- content/shmem_wait_until_some_vector.tex | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/content/shmem_test_all_vector.tex b/content/shmem_test_all_vector.tex index 429e43660..9d5fa66ff 100644 --- a/content/shmem_test_all_vector.tex +++ b/content/shmem_test_all_vector.tex @@ -7,14 +7,14 @@ \begin{C11synopsis} int @\FuncDecl{shmem\_test\_all\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, - TYPE *cmp_values); + const TYPE *cmp_values); \end{C11synopsis} where \TYPE{} is one of the standard \ac{AMO} types specified by Table \ref{stdamotypes}. \begin{Csynopsis} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test\_all\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, - TYPE *cmp_values); + const TYPE *cmp_values); \end{Csynopsis} where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{stdamotypes}. diff --git a/content/shmem_test_any_vector.tex b/content/shmem_test_any_vector.tex index 8e3db96a3..1bc6a0f0d 100644 --- a/content/shmem_test_any_vector.tex +++ b/content/shmem_test_any_vector.tex @@ -7,14 +7,14 @@ \begin{C11synopsis} size_t @\FuncDecl{shmem\_test\_any\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, - TYPE *cmp_values); + const TYPE *cmp_values); \end{C11synopsis} where \TYPE{} is one of the standard \ac{AMO} types specified by Table \ref{stdamotypes}. \begin{Csynopsis} size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test\_any\_vector}@(TYPE *ivars, size_t nelems, const int *status, - int cmp, TYPE *cmp_values); + int cmp, const TYPE *cmp_values); \end{Csynopsis} where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{stdamotypes}. diff --git a/content/shmem_test_some_vector.tex b/content/shmem_test_some_vector.tex index 333302b0b..850041262 100644 --- a/content/shmem_test_some_vector.tex +++ b/content/shmem_test_some_vector.tex @@ -7,14 +7,14 @@ \begin{C11synopsis} size_t @\FuncDecl{shmem\_test\_some\_vector}@(TYPE *ivars, size_t nelems, size_t *indices, const int *status, - int cmp, TYPE *cmp_values); + int cmp, const TYPE *cmp_values); \end{C11synopsis} where \TYPE{} is one of the standard \ac{AMO} types specified by Table \ref{stdamotypes}. \begin{Csynopsis} size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test\_some\_vector}@(TYPE *ivars, size_t nelems, size_t *indices, - const int *status, int cmp, TYPE *cmp_values); + const int *status, int cmp, const TYPE *cmp_values); \end{Csynopsis} where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{stdamotypes}. diff --git a/content/shmem_wait_until_all_vector.tex b/content/shmem_wait_until_all_vector.tex index a3abdf9cb..c39aa91a2 100644 --- a/content/shmem_wait_until_all_vector.tex +++ b/content/shmem_wait_until_all_vector.tex @@ -7,13 +7,13 @@ \begin{C11synopsis} void @\FuncDecl{shmem\_wait\_until\_all\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, - TYPE *cmp_values); + const TYPE *cmp_values); \end{C11synopsis} where \TYPE{} is one of the standard \ac{AMO} types specified by Table \ref{stdamotypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_all\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, TYPE *cmp_values); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_all\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, const TYPE *cmp_values); \end{Csynopsis} where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table~\ref{stdamotypes}. diff --git a/content/shmem_wait_until_any_vector.tex b/content/shmem_wait_until_any_vector.tex index 09bcc5c77..416702545 100644 --- a/content/shmem_wait_until_any_vector.tex +++ b/content/shmem_wait_until_any_vector.tex @@ -7,14 +7,14 @@ \begin{C11synopsis} size_t @\FuncDecl{shmem\_wait\_until\_any\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, - TYPE *cmp_values); + const TYPE *cmp_values); \end{C11synopsis} where \TYPE{} is one of the standard \ac{AMO} types specified by Table \ref{stdamotypes}. \begin{Csynopsis} size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_any\_vector}@(TYPE *ivars, size_t nelems, const int *status, - int cmp, TYPE *cmp_values); + int cmp, const TYPE *cmp_values); \end{Csynopsis} where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table~\ref{stdamotypes}. diff --git a/content/shmem_wait_until_some_vector.tex b/content/shmem_wait_until_some_vector.tex index e3a414fb9..34abd5621 100644 --- a/content/shmem_wait_until_some_vector.tex +++ b/content/shmem_wait_until_some_vector.tex @@ -7,14 +7,14 @@ \begin{C11synopsis} size_t @\FuncDecl{shmem\_wait\_until\_some\_vector}@(TYPE *ivars, size_t nelems, size_t *indices, - const int *status, int cmp, TYPE *cmp_values); + const int *status, int cmp, const TYPE *cmp_values); \end{C11synopsis} where \TYPE{} is one of the standard \ac{AMO} types specified by Table \ref{stdamotypes}. \begin{Csynopsis} size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_some\_vector}@(TYPE *ivars, size_t nelems, size_t *indices, - const int *status, int cmp, TYPE *cmp_values); + const int *status, int cmp, const TYPE *cmp_values); \end{Csynopsis} where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table~\ref{stdamotypes}. From 9647db53a0e4a4d6b50bc0d2dfdcf594be6bd861 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Fri, 27 Sep 2024 16:19:49 -0400 Subject: [PATCH 02/18] Minor edits to OpenSHMEM 1.6 RC1 Signed-off-by: James Dinan --- content/atomics_intro.tex | 7 ------- content/backmatter.tex | 22 +++++++++++----------- content/context_intro.tex | 4 ++-- content/coverpage.tex | 7 ++++--- content/interoperability.tex | 4 ++-- content/library_handles.tex | 4 ++-- content/p2p_sync_intro.tex | 2 +- content/profiling_interface.tex | 2 +- content/shmem_alltoall.tex | 8 ++++---- content/shmem_atomic_fetch_and.tex | 2 +- content/shmem_atomic_fetch_or.tex | 2 +- content/shmem_atomic_fetch_or_nbi.tex | 4 ++-- content/shmem_atomic_fetch_xor.tex | 2 +- content/shmem_atomic_fetch_xor_nbi.tex | 4 ++-- content/shmem_broadcast.tex | 6 +++--- content/shmem_collect.tex | 6 +++--- content/shmem_ctx_session_config_t.tex | 2 +- content/shmem_ctx_session_start.tex | 2 +- content/shmem_fence.tex | 4 ++-- content/shmem_g.tex | 1 - content/shmem_init.tex | 8 +++----- content/shmem_quiet.tex | 4 ++-- content/shmem_reductions.tex | 10 +++++----- content/shmem_scan.tex | 2 +- content/shmem_sync.tex | 8 ++++---- content/shmem_team_config_t.tex | 2 +- content/shmem_team_split_strided.tex | 6 +++--- content/shmem_wait_until_any_vector.tex | 2 +- content/signaling.tex | 2 +- content/teams_intro.tex | 4 ++-- example_code/shmem_reduce_example.c | 2 +- 31 files changed, 68 insertions(+), 77 deletions(-) diff --git a/content/atomics_intro.tex b/content/atomics_intro.tex index 7664d6354..fda22c435 100644 --- a/content/atomics_intro.tex +++ b/content/atomics_intro.tex @@ -130,10 +130,3 @@ \label{bitamotypes} \end{center} \end{table} -] - - - - - - diff --git a/content/backmatter.tex b/content/backmatter.tex index bc7013ff3..50a895ea6 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -50,7 +50,7 @@ \section*{Incorporating OpenSHMEM into Programs}\label{sec:writing_programs} The \KEYWORD{static} keyword makes the \VAR{dest} array symmetric on all \acp{PE}. Each \ac{PE} is able to transfer data to a remote \dest{} array by simply -specifying to an OpenSHMEM routine such as \hyperref[subsec:shmem_put]{\FUNC{shmem\_put}} +specifying to an \openshmem routine such as \hyperref[subsec:shmem_put]{\FUNC{shmem\_put}} the local address of the symmetric data object that will receive the data. This local address resolution aids programmability because the address of the \dest{} need not be exchanged with the active side (\ac{PE} \CONST{0}) prior to @@ -151,8 +151,8 @@ \chapter{Undefined Behavior in OpenSHMEM}\label{sec:undefined} \tabularnewline \hline Use of non-symmetric variables & Some routines require remotely accessible -variables to perform their function. For example, an \openshmem libray may detect a \PUT{} to a non-symmetric variable -and choose to abort the program. +variables to perform their function. For example, an \openshmem library may detect a \PUT{} to a non-symmetric variable +and choose to abort the program. However, another implementation may choose to continue execution with or without a warning. \tabularnewline \hline @@ -844,11 +844,11 @@ \section{Version 1.5} \ChangelogRef{p2psynctypes, dep:p2p_sync_types}% % \item Clarified that point-to-point synchronization routines preserve the - atomicity of OpenSHMEM \acp{AMO}. + atomicity of \openshmem \acp{AMO}. \ChangelogRef{subsec:amo_guarantees}% % \item Clarified that symmetric variables used as \VAR{ivar} arguments to - point-to-point synchronization routines must be updated using OpenSHMEM + point-to-point synchronization routines must be updated using \openshmem \acp{AMO}. \ChangelogRef{subsec:p2p_intro}% % @@ -1182,7 +1182,7 @@ \section{Version 1.1} extensions, clarifications to completion semantics and \ac{API} descriptions in agreement with the \ac{SGI} SHMEM specification, -and general readabilty and usability improvements to the document structure. +and general readability and usability improvements to the document structure. The following list describes the specific changes in \openshmem[1.1]: \begin{enumerate} @@ -1300,13 +1300,13 @@ \chapter{Errata}\label{sec:errata} Errors or ambiguities in the \openshmem specification may be discovered after publication. -Errata, or corrections, are included in the the sections below indicating the -version of the OpenSHMEM specification that required the correction or +Errata, or corrections, are included in the sections below indicating the +version of the \openshmem specification that required the correction or clarification. These corrections have been applied to all subsequent versions of the specification and this section serves as a historical record of the changes -made to assist users and implementers with applying the necessary corrections. -Errata that result in a change to the specifciation are also included in +made to assist users and implementors with applying the necessary corrections. +Errata that result in a change to the specification are also included in Annex~\ref{sec:changelog}. For an implementation to comply with a particular version of \openshmem, it must account for all errata associated with that version as indicated below. @@ -1328,7 +1328,7 @@ \section{Version 1.5} \FUNC{shmem\_test\_all\_vector} routines return 1 when the test set is empty (\ref{changelog:v1.6}.\ref{changelog:test_all}). \item Clarified that \FUNC{shmem\_team\_split\_strided} and - \FUNC{shmem\_team\_split\_2d} return nonzero when the parent team is + \FUNC{shmem\_team\_split\_2d} return a nonzero value when the parent team is \LibConstRef{SHMEM\_TEAM\_INVALID} (\ref{changelog:v1.6}.\ref{changelog:split_strided_2d}). \item Corrected the \VAR{level} argument's recommended value in API notes for diff --git a/content/context_intro.tex b/content/context_intro.tex index e363c56da..a88473d85 100644 --- a/content/context_intro.tex +++ b/content/context_intro.tex @@ -41,12 +41,12 @@ explicitly or implicitly, are performed. All point-to-point routines that operate on this context will do so with respect to the team-relative \ac{PE} numbering of the associated team. -If the PE number passed to such a routine is invalid, being negative or greater +If the \ac{PE} number passed to such a routine is invalid, being negative or greater than or equal to the size of the \openshmem team, then the behavior is undefined. By default, contexts are {\em shareable} and, when it is allowed by the threading model provided by the \openshmem library, they can be used concurrently by -multiple threads within the PE where they were created. +multiple threads within the \ac{PE} where they were created. % The following options can be supplied during context creation to restrict this usage model and enable performance optimizations. When using a given diff --git a/content/coverpage.tex b/content/coverpage.tex index e26a54cac..fe740877a 100644 --- a/content/coverpage.tex +++ b/content/coverpage.tex @@ -29,7 +29,7 @@ \section*{Development by} \begin{itemize} \item For a current list of contributors and collaborators please see\\ \url{http://www.openshmem.org/site/Contributors/} -\item For a current list of OpenSHMEM implementations and tools, please see\\ +\item For a current list of \openshmem implementations and tools, please see\\ \url{http://openshmem.org/site/Links#impl/} \end{itemize} @@ -72,6 +72,7 @@ \subsection*{\openshmem 1.6} \item Robert Kierski, \ac{HPE} \item Bryant Lam, \ac{DoD} \item Akhil Langer, NVIDIA +\item Jens Manser, \ac{DoD} \item Tiffany M. Mintz, \ac{ORNL} \item Bryan Morgan, Intel \item William Okuno\footnotemark[2], \ac{HPE} @@ -158,8 +159,8 @@ \subsection*{\openshmem 1.5} \subsection*{\openshmem 1.4} -OpenSHMEM 1.4 is dedicated to the memory of David Charles Knaak. David was a -highly involved colleague and contributor to the entire OpenSHMEM project. He +\openshmem 1.4 is dedicated to the memory of David Charles Knaak. David was a +highly involved colleague and contributor to the entire \openshmem project. He will be missed. \begin{multicols}{2} diff --git a/content/interoperability.tex b/content/interoperability.tex index 7347ee870..9db82aeda 100644 --- a/content/interoperability.tex +++ b/content/interoperability.tex @@ -1,6 +1,6 @@ \chapter{Interoperability with Other Programming Models}\label{sec:interoperability} -OpenSHMEM routines may be used in conjunction with the routines of other +\openshmem routines may be used in conjunction with the routines of other communication libraries or parallel languages in the same program. This section describes the interoperability with other programming models, including clarification of undefined behaviors caused by mixed use of different models, @@ -35,7 +35,7 @@ \subsection{Initialization} call to \FUNC{MPI\_Finalize}. \parimpnotes{ - Portable implementations of OpenSHMEM and \ac{MPI} must ensure that the initialization + Portable implementations of \openshmem and \ac{MPI} must ensure that the initialization calls can be made in an arbitrary order within a program; the same rule also applies to the finalization calls. A software runtime that utilizes a shared communication resource for \openshmem and \ac{MPI} communication may maintain an diff --git a/content/library_handles.tex b/content/library_handles.tex index 6040a61e2..1d74d3a14 100644 --- a/content/library_handles.tex +++ b/content/library_handles.tex @@ -24,9 +24,9 @@ \LibHandleDecl{SHMEM\_TEAM\_SHARED} & Handle of type \CTYPE{shmem\_team\_t} that corresponds to a team of \acp{PE} that share a memory domain. \LibHandleRef{SHMEM\_TEAM\_SHARED} refers to -the team of all PEs that would mutually return a non-null address from a +the team of all \acp{PE} that would mutually return a non-null address from a call to \FUNC{shmem\_ptr} for all symmetric heap objects. That is, -\FUNC{shmem\_ptr} must return a non-null pointer to the local PE for all +\FUNC{shmem\_ptr} must return a non-null pointer to the local \ac{PE} for all symmetric heap objects on all target \acp{PE} in the team. This means that symmetric heap objects on each \ac{PE} are directly load/store accessible by all \acp{PE} in the team. diff --git a/content/p2p_sync_intro.tex b/content/p2p_sync_intro.tex index 366f1a815..fae9e68dc 100644 --- a/content/p2p_sync_intro.tex +++ b/content/p2p_sync_intro.tex @@ -16,7 +16,7 @@ integer types defined in \HEADER{stdint.h} by \Cstd[99]~\S7.18.1.1 and \Cstd[11]~\S7.20.1.1. When the \Cstd translation environment does not provide exact-width integer types with \HEADER{stdint.h}, an -\openshmem implemementation is not required to provide support for these types. +\openshmem implementation is not required to provide support for these types. The \FUNC{shmem\_test\_any} and \FUNC{shmem\_wait\_until\_any} routines require the \CONST{SIZE\_MAX} macro defined in \HEADER{stdint.h} by \Cstd[99]~\S7.18.3 and \Cstd[11]~\S7.20.3. diff --git a/content/profiling_interface.tex b/content/profiling_interface.tex index 9fec48758..b6526310d 100644 --- a/content/profiling_interface.tex +++ b/content/profiling_interface.tex @@ -6,7 +6,7 @@ unreasonable to expect that the authors and developers of profiling tools for \openshmem will have access to the source code that implements \openshmem on any particular machine. It is, therefore, -necessary to provide a mechanism by which the implementers of such +necessary to provide a mechanism by which the implementors of such tools can collect whatever performance information they wish \emph{without} access to the underlying implementation. diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index f271de112..9b642ddfe 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -110,10 +110,10 @@ array is ready across all \acp{PE} prior to calling this routine. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for - the local PE: + the local \ac{PE}: \begin{itemize} \item Its \VAR{dest} symmetric data object is completely updated and the - data has been copied out of the source data object. + data has been copied out of the \VAR{source} data object. \end{itemize} \begin{DeprecateBlock} @@ -145,10 +145,10 @@ Otherwise, the behavior is undefined. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for - the local PE: + the local \ac{PE}: \begin{itemize} \item Its \VAR{dest} symmetric data object is completely updated and the - data has been copied out of the source data object. + data has been copied out of the \VAR{source} data object. \item For active-set-based routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} diff --git a/content/shmem_atomic_fetch_and.tex b/content/shmem_atomic_fetch_and.tex index 675449f27..ec07088ff 100644 --- a/content/shmem_atomic_fetch_and.tex +++ b/content/shmem_atomic_fetch_and.tex @@ -38,7 +38,7 @@ } \apireturnvalues{ - The value pointed to by \VAR{dest} on PE \VAR{pe} immediately before the + The value pointed to by \VAR{dest} on \ac{PE} \VAR{pe} immediately before the operation is performed. } diff --git a/content/shmem_atomic_fetch_or.tex b/content/shmem_atomic_fetch_or.tex index 0eb922bd5..612d0bab8 100644 --- a/content/shmem_atomic_fetch_or.tex +++ b/content/shmem_atomic_fetch_or.tex @@ -38,7 +38,7 @@ } \apireturnvalues{ - The value pointed to by \VAR{dest} on PE \VAR{pe} immediately before the + The value pointed to by \VAR{dest} on \ac{PE} \VAR{pe} immediately before the operation is performed. } diff --git a/content/shmem_atomic_fetch_or_nbi.tex b/content/shmem_atomic_fetch_or_nbi.tex index d62fcd3ab..5d64eb1ec 100644 --- a/content/shmem_atomic_fetch_or_nbi.tex +++ b/content/shmem_atomic_fetch_or_nbi.tex @@ -37,11 +37,11 @@ \apidescription{ The nonblocking \FUNC{shmem\_atomic\_fetch\_or\_nbi} routines perform an atomic fetching bitwise OR on the remotely accessible data object pointed - by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine + by \VAR{dest} at \ac{PE} \VAR{pe} with the operand \VAR{value}. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, these routines have performed a fetching bitwise OR on - \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetched the prior + \VAR{dest} at \ac{PE} \VAR{pe} with the operand \VAR{value} and fetched the prior contents of \VAR{dest} into the \VAR{fetch} local data object. } diff --git a/content/shmem_atomic_fetch_xor.tex b/content/shmem_atomic_fetch_xor.tex index fd563cb10..8dda47961 100644 --- a/content/shmem_atomic_fetch_xor.tex +++ b/content/shmem_atomic_fetch_xor.tex @@ -39,7 +39,7 @@ } \apireturnvalues{ - The value pointed to by \VAR{dest} on PE \VAR{pe} immediately before the + The value pointed to by \VAR{dest} on \ac{PE} \VAR{pe} immediately before the operation is performed. } diff --git a/content/shmem_atomic_fetch_xor_nbi.tex b/content/shmem_atomic_fetch_xor_nbi.tex index f69739c34..6cbdcb8a7 100644 --- a/content/shmem_atomic_fetch_xor_nbi.tex +++ b/content/shmem_atomic_fetch_xor_nbi.tex @@ -37,11 +37,11 @@ \apidescription{ The nonblocking \FUNC{shmem\_atomic\_fetch\_xor\_nbi} routines perform an atomic fetching bitwise XOR on the remotely accessible data object pointed - by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine + by \VAR{dest} at \ac{PE} \VAR{pe} with the operand \VAR{value}. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, these routines have performed a fetching bitwise XOR on - \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetched the prior + \VAR{dest} at \ac{PE} \VAR{pe} with the operand \VAR{value} and fetched the prior contents of \VAR{dest} into the \VAR{fetch} local data object. } diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index ec3d4aa47..b008ac2e3 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -45,7 +45,7 @@ respectively. } \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to - the calling PEs, from which the data is copied.} + the calling \acp{PE}, from which the data is copied.} \begin{DeprecateBlock} @@ -114,7 +114,7 @@ For active-set-based broadcasts: \begin{itemize} - \item The \VAR{dest} object is updated on all PEs other than the root PE. + \item The \VAR{dest} object is updated on all \acp{PE} other than the root \ac{PE}. \item All \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet must participate in the operation. @@ -144,7 +144,7 @@ Upon return from an active-based broadcast routine, the following are true for the local \ac{PE}: \begin{itemize} - \item If the current PE is not the root PE, the \dest{} data object is updated. + \item If the current \ac{PE} is not the root \ac{PE}, the \dest{} data object is updated. \item The \source{} data object may be safely reused. \item The values in the \VAR{pSync} array are restored to the original values. diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 921a7dd9d..8d973e7b7 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -66,7 +66,7 @@ \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective operation to concatenate \VAR{nelems} data items from the \source{} array into the - \dest{} array, over an \openshmem team in processor number order. + \dest{} array, over an \openshmem team in \ac{PE} number order. The resultant \dest{} array contains the contribution from \acp{PE} as follows: @@ -104,7 +104,7 @@ operation to concatenate \VAR{nelems} data items from the \source{} array into the \dest{} array, over an \openshmem active set - in processor number order. The resultant \dest{} array contains the contribution from + in \ac{PE} number order. The resultant \dest{} array contains the contribution from \acp{PE} as follows: \begin{itemize} \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the @@ -151,7 +151,7 @@ with no performance degradation when \VAR{nelems} is a non-power-of-two value. \end{DeprecateBlock} The collective routines that operate on teams containing a - non-power-of-two of PEs do so with some performance degradation. They operate + non-power-of-two of \acp{PE} do so with some performance degradation. They operate with no performance degradation when \VAR{nelems} is a non-power-of-two value. } diff --git a/content/shmem_ctx_session_config_t.tex b/content/shmem_ctx_session_config_t.tex index 11adff1ff..9ec12f423 100644 --- a/content/shmem_ctx_session_config_t.tex +++ b/content/shmem_ctx_session_config_t.tex @@ -69,7 +69,7 @@ routines within a session whenever possible, because the library must impose strict completions to comply with ordering semantics. However, hints provided by \FUNC{shmem\_ctx\_session\_config\_t} do not imply - the occurence of any completion or memory ordering operations. + the occurrence of any completion or memory ordering operations. The requirements on buffers provided to \openshmem routines that are \textit{in-use} (as described in Section \ref{subsec:invoking_openshmem_operations}) apply regardless of any diff --git a/content/shmem_ctx_session_start.tex b/content/shmem_ctx_session_start.tex index 7c771d240..cd4c00958 100644 --- a/content/shmem_ctx_session_start.tex +++ b/content/shmem_ctx_session_start.tex @@ -106,7 +106,7 @@ \FUNC{shmem\_ctx\_session\_start} is non-collective, so there is no implied synchronization. Blocking puts must be sufficiently small to benefit from batching, and the - exact threshold for this benefit depends on the \openshmem implemenation + exact threshold for this benefit depends on the \openshmem implementation and/or the application. } diff --git a/content/shmem_fence.tex b/content/shmem_fence.tex index a470a4776..54b4f2f38 100644 --- a/content/shmem_fence.tex +++ b/content/shmem_fence.tex @@ -39,8 +39,8 @@ \FUNC{shmem\_fence} only provides per-\ac{PE} ordering guarantees and does not guarantee completion of delivery. \FUNC{shmem\_fence} also does not have an effect on the ordering between memory - accesses issued by the target PE. \FUNC{shmem\_wait\_until}, \FUNC{shmem\_test}, - \FUNC{shmem\_barrier}, \FUNC{shmem\_barrier\_all} routines can be called by the target PE to guarantee + accesses issued by the target \ac{PE}. \FUNC{shmem\_wait\_until}, \FUNC{shmem\_test}, + \FUNC{shmem\_barrier}, \FUNC{shmem\_barrier\_all} routines can be called by the target \ac{PE} to guarantee ordering of its memory accesses. There is a subtle difference between \FUNC{shmem\_fence} and \FUNC{shmem\_quiet}, in that, \FUNC{shmem\_quiet} diff --git a/content/shmem_g.tex b/content/shmem_g.tex index 8f1b91fc6..c2cb573d9 100644 --- a/content/shmem_g.tex +++ b/content/shmem_g.tex @@ -22,7 +22,6 @@ the default context.} \apiargument{IN}{source}{Symmetric address of the source data object. The type of \source{} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{pe}{The number of the remote \ac{PE} on which \VAR{source} resides.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE} on which \VAR{source} resides relative to the team associated with the given \VAR{ctx} when provided, or the default context otherwise.} diff --git a/content/shmem_init.tex b/content/shmem_init.tex index 4929ecaf0..8d240b510 100644 --- a/content/shmem_init.tex +++ b/content/shmem_init.tex @@ -16,9 +16,9 @@ \apidescription{ \FUNC{shmem\_init} allocates and initializes resources used by the \openshmem library. It is a collective operation that all \acp{PE} must call before any - other \openshmem routine may be called, except \FUNC{shmem\_query\_initialized} - which checks the current initialized state of the library. In the - \openshmem program which it initialized, each call to \FUNC{shmem\_init} must + other \openshmem routine may be called, except \FUNC{shmem\_query\_initialized} + which checks the current initialized state of the library. In the + \openshmem program which it initialized, each call to \FUNC{shmem\_init} must be matched with a corresponding call to \FUNC{shmem\_finalize}. The \FUNC{shmem\_init} and \FUNC{shmem\_init\_thread} initialization @@ -42,8 +42,6 @@ users are encouraged to use \FUNC{shmem\_init}. An important difference between \FUNC{shmem\_init} and \FUNC{start\_pes} is that every call to \FUNC{shmem\_init} within a program must be matched with a call to \FUNC{shmem\_finalize}. - In the case of \FUNC{start\_pes}, any subsequent calls to \FUNC{start\_pes} after the - first one results in a no-op. } \end{DeprecateBlock} diff --git a/content/shmem_quiet.tex b/content/shmem_quiet.tex index ce335085e..bf9aa106a 100644 --- a/content/shmem_quiet.tex +++ b/content/shmem_quiet.tex @@ -63,9 +63,9 @@ operations before a thread calls \FUNC{shmem\_quiet}. \FUNC{shmem\_quiet} does not have an effect on the ordering between memory - accesses issued by the target PE. \FUNC{shmem\_wait\_until}, + accesses issued by the target \ac{PE}. \FUNC{shmem\_wait\_until}, \FUNC{shmem\_test}, \FUNC{shmem\_barrier}, \FUNC{shmem\_barrier\_all} routines - can be called by the target PE to guarantee ordering of its memory accesses. + can be called by the target \ac{PE} to guarantee ordering of its memory accesses. } \begin{apiexamples} diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index e99a12f6c..cd9e12955 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -252,7 +252,7 @@ \subsubsubsection{PROD} contains one element for each separate reduction routine. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} - arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t.} + arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type \CTYPE{size\_t}.} \begin{DeprecateBlock} \apiargument{IN}{nreduce}{In active-set based \ac{API} calls, @@ -283,7 +283,7 @@ \subsubsubsection{PROD} provides one element for each reduction. The results of the reductions are placed in the \dest{} array on all \acp{PE} participating in the reduction. - The same \source{} and \dest{} arrays must be passed by all PEs that + The same \source{} and \dest{} arrays must be passed by all \acp{PE} that participate in the collective. The \source{} and \dest{} arguments must either be the same symmetric address, or two different symmetric addresses corresponding to buffers that @@ -323,7 +323,7 @@ \subsubsubsection{PROD} provides one element for each reduction. The results of the reductions are placed in the \dest{} array on all \acp{PE} participating in the reduction. - The same \source{} and \dest{} arrays must be passed by all PEs that + The same \source{} and \dest{} arrays must be passed by all \acp{PE} that participate in the collective. The \source{} and \dest{} arguments must either be the same symmetric address, or two different symmetric addresses corresponding to buffers that @@ -379,11 +379,11 @@ \subsubsubsection{PROD} \begin{apiexamples} \apicexample - {In the following \Cstd[11] example, each \ac{PE} intializes an array of + {In the following \Cstd[11] example, each \ac{PE} initializes an array of random integers with values between $0$ and $npes-1$, inclusively. An OR reduction then tracks the array indices where maximal values occur (maximal values equal $npes - 1$), and a SUM reduction counts the total number of - maximal values across all PEs. + maximal values across all \acp{PE}. } {./example_code/shmem_reduce_example.c} {} diff --git a/content/shmem_scan.tex b/content/shmem_scan.tex index b50cdb681..9cd7b2017 100644 --- a/content/shmem_scan.tex +++ b/content/shmem_scan.tex @@ -76,7 +76,7 @@ \end{equation*} - The same \source{} and \dest{} arrays must be passed by all PEs that + The same \source{} and \dest{} arrays must be passed by all \acp{PE} that participate in the collective. The \source{} and \dest{} arguments must either be the same symmetric address, or two different symmetric addresses diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 91a2ce61c..18c6b6e2f 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -1,10 +1,10 @@ \apisummary{ Registers the arrival of a \ac{PE} at a synchronization point. - This routine does not return until all other \acp{PE} in a given OpenSHMEM team - arrive at this synchronization point. + This routine does not return until all other \acp{PE} in a given \openshmem team + arrives at this synchronization point. \begin{DeprecateBlock} Registers the arrival of a \ac{PE} at a synchronization point. - This routine does not return until all other \acp{PE} in a given OpenSHMEM active set arrive at this synchronization point. + This routine does not return until all other \acp{PE} in a given \openshmem active set arrive at this synchronization point. \end{DeprecateBlock} } @@ -95,7 +95,7 @@ \apinotes{ The \FUNC{shmem\_sync} routine can be used to portably ensure that memory access operations observe remote updates in the order enforced by the - initiator \acp{PE}, provided that the initiator PE ensures completion of remote + initiator \acp{PE}, provided that the initiator \ac{PE} ensures completion of remote updates with a call to \FUNC{shmem\_quiet} prior to the call to the \FUNC{shmem\_sync} routine. } diff --git a/content/shmem_team_config_t.tex b/content/shmem_team_config_t.tex index dd2ad01b0..97af84658 100644 --- a/content/shmem_team_config_t.tex +++ b/content/shmem_team_config_t.tex @@ -27,7 +27,7 @@ creation of a team configured with \VAR{num\_contexts} of $N$ means that the implementation will make a best effort to reserve enough resources to support $N$ contexts created from the team - in existance at any given time. It is not a guarantee that $N$ + in existence at any given time. It is not a guarantee that $N$ calls to \FUNC{shmem\_team\_create\_ctx} will succeed. See Section~\ref{sec:ctx} for more on communication contexts and Section~\ref{subsec:shmem_team_create_ctx} for team-based context creation. diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index a211c17e6..a8781816c 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -51,10 +51,10 @@ where $\mathbb{Z}$ is the set of natural numbers ($0, 1, \dots$), $N$ is the number of \acp{PE} in the parent team, $size$ is a positive number indicating the number of \acp{PE} in the new team, and $stride$ is an integer. -The index $i$ specifies the number of the given PE in the new team. -When $stride$ is greater than zero, PEs in the new team remain in the same +The index $i$ specifies the number of the given \ac{PE} in the new team. +When $stride$ is greater than zero, \acp{PE} in the new team remain in the same relative order as in the parent team. -When $stride$ is less than zero, PEs in the new team are in \textit{reverse} +When $stride$ is less than zero, \acp{PE} in the new team are in \textit{reverse} relative order with respect to the parent team. If a $stride$ value equal to 0 is passed to \FUNC{shmem\_team\_split\_strided}, then the $size$ argument passed must be 1, or the behavior is undefined. diff --git a/content/shmem_wait_until_any_vector.tex b/content/shmem_wait_until_any_vector.tex index 30ebd077e..032dd3849 100644 --- a/content/shmem_wait_until_any_vector.tex +++ b/content/shmem_wait_until_any_vector.tex @@ -74,7 +74,7 @@ \apicexample {The following \Cstd[11] example demonstrates the use of \FUNC{shmem\_wait\_until\_any\_vector} to wait on values that differ - between even PEs and odd PEs.} + between even \acp{PE} and odd \acp{PE}.} {./example_code/shmem_wait_until_any_vector.c} {} \end{apiexamples} diff --git a/content/signaling.tex b/content/signaling.tex index bd04940b4..39fef486f 100644 --- a/content/signaling.tex +++ b/content/signaling.tex @@ -1,4 +1,4 @@ -This section specifies the OpenSHMEM support for \OPR{put-with-signal}, +This section specifies the \openshmem support for \OPR{put-with-signal}, nonblocking \OPR{put-with-signal}, and \OPR{signal-\{add, fetch, set\}} routines. The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} and subsequently diff --git a/content/teams_intro.tex b/content/teams_intro.tex index cca6d01b7..1b597846b 100644 --- a/content/teams_intro.tex +++ b/content/teams_intro.tex @@ -46,7 +46,7 @@ \subsubsection*{Team Handles} \subsubsection*{Thread Safety} -When it is allowed by the threading model provided by the OpenSHMEM +When it is allowed by the threading model provided by the \openshmem library, a team may be used concurrently in non-collective operations (e.g., \FUNC{shmem\_team\_my\_pe}) by multiple threads within the \ac{PE} where it was created. @@ -93,7 +93,7 @@ \subsubsection*{Team Creation} team-based collectives specified in Section~\ref{subsec:coll}. In particular, in multithreaded executions, threads at a given \ac{PE} must not perform simultaneous split operations on the same parent team. -Team creation operations are matched across participating PEs based +Team creation operations are matched across participating \acp{PE} based on the order in which they are performed. Thus, team creation events must also occur in the same order on all \acp{PE} in the parent team. diff --git a/example_code/shmem_reduce_example.c b/example_code/shmem_reduce_example.c index 9e025c9d7..c04439a03 100644 --- a/example_code/shmem_reduce_example.c +++ b/example_code/shmem_reduce_example.c @@ -35,7 +35,7 @@ int main(void) { if (mype == 0) { printf("Found %d maximal random numbers across all PEs.\n", maximal_values_total); - printf("A maximal number occured (at least once) at the following indices:\n"); + printf("A maximal number occurred (at least once) at the following indices:\n"); for (int i = 0; i < NELEMS; i++) { if (value_is_maximal_all[i] == 1) { printf("%d ", i); From 42d687e609199144d79d28473236af2d27069151 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Fri, 27 Sep 2024 18:01:59 -0400 Subject: [PATCH 03/18] Update workflow to use upload-artifact@v4 Signed-off-by: James Dinan --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 48d819e4f..e92c1f6b5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,7 +18,7 @@ jobs: - name: Rename PDF run: mv main_spec.pdf openshmem-draft-${{ github.event.pull_request.head.sha }}.pdf - name: Upload PDF artifact - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v4 with: name: openshmem-draft-${{ github.event.pull_request.head.sha }} path: openshmem-draft-${{ github.event.pull_request.head.sha }}.pdf From 7360ca1cdc149d29522e1c805e76846c5621d58a Mon Sep 17 00:00:00 2001 From: "Rahman, Md" Date: Wed, 2 Oct 2024 21:27:02 -0500 Subject: [PATCH 04/18] Unresolved issues of team_ptr and multi-init --- content/shmem_team_ptr.tex | 3 +++ content/threads_intro.tex | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/content/shmem_team_ptr.tex b/content/shmem_team_ptr.tex index 81deaa92d..8e8c23cad 100644 --- a/content/shmem_team_ptr.tex +++ b/content/shmem_team_ptr.tex @@ -37,6 +37,9 @@ when it can be accessed using memory loads and stores. Otherwise, a null pointer is returned. + If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_WORLD}, then + the behavior is identical to that of \FUNC{shmem\_ptr} with same \VAR{dest} + and \VAR{pe} arguments. If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}, then a null pointer is returned. If \VAR{team} is otherwise invalid, the behavior is undefined. diff --git a/content/threads_intro.tex b/content/threads_intro.tex index 3aa329c6b..babe6577d 100644 --- a/content/threads_intro.tex +++ b/content/threads_intro.tex @@ -29,10 +29,10 @@ \begin{enumerate} \item -In the \CONST{SHMEM\_THREAD\_FUNNELED}, \CONST{SHMEM\_THREAD\_SERIALIZED}, and -\CONST{SHMEM\_THREAD\_MULTIPLE} thread levels, the \FUNC{shmem\_init\_thread} and -\FUNC{shmem\_finalize} calls must be invoked by the same thread. - +In the \CONST{SHMEM\_THREAD\_FUNNELED} and \CONST{SHMEM\_THREAD\_SERIALIZED} +thread levels, all invocations of \FUNC{shmem\_init\_thread} and +\FUNC{shmem\_finalize} must be made by the same thread. + \item Any \openshmem operation initiated by a thread is considered an action of the \ac{PE} as a whole. The symmetric heap and symmetric variables scope are not From 1266292432a4b76957dc623b1da837722c8f777b Mon Sep 17 00:00:00 2001 From: David Ozog Date: Mon, 30 Sep 2024 14:36:39 -0400 Subject: [PATCH 05/18] collectives: inconsistency in call preconditions --- content/shmem_alltoall.tex | 2 +- content/shmem_broadcast.tex | 2 +- content/shmem_collect.tex | 2 +- content/shmem_reductions.tex | 2 +- content/shmem_scan.tex | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index f271de112..e53461c12 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -98,7 +98,7 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following + Before the local \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following conditions must be ensured, otherwise the behavior is undefined: \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index ec3d4aa47..d0b0ddb59 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -85,7 +85,7 @@ the team. \end{itemize} - Before any \ac{PE} calls a broadcast routine, the following conditions + Before the local \ac{PE} calls a broadcast routine, the following conditions must be ensured, otherwise the behavior is undefined: \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 921a7dd9d..d53f5d620 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -88,7 +88,7 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a collect routine, the following conditions must + Before the local \ac{PE} calls a collect routine, the following conditions must be ensured, otherwise the behavior is undefined: \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index e99a12f6c..adf318c5f 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -295,7 +295,7 @@ \subsubsubsection{PROD} If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a reduction routine, the following conditions + Before the local \ac{PE} calls a reduction routine, the following conditions must be ensured, otherwise the behavior is undefined: \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to diff --git a/content/shmem_scan.tex b/content/shmem_scan.tex index b50cdb681..2d8a1f72b 100644 --- a/content/shmem_scan.tex +++ b/content/shmem_scan.tex @@ -90,7 +90,7 @@ \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a scan routine, the following conditions must be + Before the local \ac{PE} calls a scan routine, the following conditions must be ensured, otherwise the behavior is undefined: \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept From ea4a09603bb29dd1b476b8b327d70486695ffcd3 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 27 Sep 2024 10:04:43 -0400 Subject: [PATCH 06/18] collectives intro: add Scan to team-based list --- content/collective_intro.tex | 1 + 1 file changed, 1 insertion(+) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index 4996b1784..c472a6f46 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -58,6 +58,7 @@ \subsubsection*{Team-based collectives} \item \FUNC{shmem\_[\FuncParam{TYPENAME}\_]collect[mem]} \item \FUNC{shmem\_[\FuncParam{TYPENAME}\_]fcollect[mem]} \item \FUNC{shmem\_[\FuncParam{TYPENAME}\_]\{and, or, xor, max, min, sum, prod\}\_reduce} +\item \FUNC{shmem\_[\FuncParam{TYPENAME}\_]sum\_\{in, ex\}scan} \end{itemize} In addition, all team creation functions are collective operations. In addition to the ordering From 090019859eb45b1927fe2b227d047381202dda25 Mon Sep 17 00:00:00 2001 From: wokuno <38401861+wokuno@users.noreply.github.com> Date: Thu, 3 Oct 2024 12:02:12 -0500 Subject: [PATCH 07/18] Teams: Fix split strided wrap-around example --- content/shmem_team_split_strided.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index a211c17e6..27ef95b9e 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -67,9 +67,9 @@ $stride$). That is, \textit{wrap-around} with respect to the parent team's \ac{PE} values is not permitted. -For example, the list of \acp{PE} in the parent team should not start at a high -number and then continue to include \acp{PE} in the lower end of the parent -team's \ac{PE} range. +For example, given a parent team with a size of 8 \acp{PE}, a call to +\FUNC{shmem\_team\_split\_strided} with the following arguments would +be invalid: $start$ equal to 3, $stride$ equal to 3, and $size$ equal to 3. This routine must be called by all \acp{PE} in the parent team. All \acp{PE} must provide the same values for the \ac{PE} triplet. From aa875fdec824537e5138e6eee2618f82c26ed3e0 Mon Sep 17 00:00:00 2001 From: Md Rahman Date: Thu, 3 Oct 2024 12:42:45 -0500 Subject: [PATCH 08/18] Update content/threads_intro.tex Co-authored-by: James Dinan --- content/threads_intro.tex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/content/threads_intro.tex b/content/threads_intro.tex index babe6577d..2407c6698 100644 --- a/content/threads_intro.tex +++ b/content/threads_intro.tex @@ -29,10 +29,10 @@ \begin{enumerate} \item -In the \CONST{SHMEM\_THREAD\_FUNNELED} and \CONST{SHMEM\_THREAD\_SERIALIZED} -thread levels, all invocations of \FUNC{shmem\_init\_thread} and -\FUNC{shmem\_finalize} must be made by the same thread. - +In the \CONST{SHMEM\_THREAD\_FUNNELED}, \CONST{SHMEM\_THREAD\_SERIALIZED}, and +\CONST{SHMEM\_THREAD\_MULTIPLE} thread levels, the \FUNC{shmem\_finalize} +call must be invoked by the same thread that called \FUNC{shmem\_init\_thread}. + \item Any \openshmem operation initiated by a thread is considered an action of the \ac{PE} as a whole. The symmetric heap and symmetric variables scope are not From 18ab41886ff4c0a3fe9fa19fb5b20d2577ac5cc6 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 9 Oct 2024 08:41:38 -0400 Subject: [PATCH 09/18] reductions: add note about FP associativity --- content/programming_model_overview.tex | 2 +- content/shmem_reductions.tex | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index 6d53ece1e..8305607bf 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -157,7 +157,7 @@ of concatenated symmetric objects contributed by each of the \acp{PE} in another symmetric data object. \item \OPR{Reduction}: All \acp{PE} participating in the routine get the result - of an associative binary routine over elements of the specified symmetric + of a binary operation over elements of the specified symmetric data object on another symmetric data object. \item \OPR{All-to-All}: All \acp{PE} participating in the routine exchange a fixed amount of contiguous or strided data with all other participating diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index e99a12f6c..917e18f3f 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -373,7 +373,20 @@ \subsubsubsection{PROD} } \apireturnvalues{ - Zero on successful local completion. Nonzero otherwise. + Zero on successful local completion. Nonzero otherwise. +} + +\apinotes{ + The binary operations performed by \openshmem reductions are intended to be + associative and commutative. + However, floating point arithmetic is not associative or commutative due to + the inherent inaccuracies of floating-point representations caused by + rounding errors, finite precision, and the order of the applied binary + operations. + This can lead to variations in the result of \openshmem arithmetic + reduction operations on floating-point datatypes, including NaN values. + A future version of the \openshmem specification may clarify the behavior + of reductions on floating point datatypes. } \begin{apiexamples} From c299db116a82e64d503ea7f5fe96646686beb93d Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 9 Oct 2024 09:20:13 -0400 Subject: [PATCH 10/18] reductions: revert small whitespace change --- content/shmem_reductions.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 917e18f3f..3a8af194d 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -373,7 +373,7 @@ \subsubsubsection{PROD} } \apireturnvalues{ - Zero on successful local completion. Nonzero otherwise. + Zero on successful local completion. Nonzero otherwise. } \apinotes{ From 198335a4aba724d65c80aff61f6af86762da9972 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 9 Oct 2024 09:21:16 -0400 Subject: [PATCH 11/18] reductions: add small whitespace change --- content/shmem_reductions.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index e99a12f6c..0272f30dd 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -373,7 +373,7 @@ \subsubsubsection{PROD} } \apireturnvalues{ - Zero on successful local completion. Nonzero otherwise. + Zero on successful local completion. Nonzero otherwise. } \begin{apiexamples} From 11c76e0812ee26e20f07c66d6437e36a615c4294 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 9 Oct 2024 10:52:01 -0400 Subject: [PATCH 12/18] Update content/shmem_reductions.tex Co-authored-by: James Dinan --- content/shmem_reductions.tex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 3a8af194d..a4f15e29b 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -381,8 +381,7 @@ \subsubsubsection{PROD} associative and commutative. However, floating point arithmetic is not associative or commutative due to the inherent inaccuracies of floating-point representations caused by - rounding errors, finite precision, and the order of the applied binary - operations. + rounding errors and finite precision. This can lead to variations in the result of \openshmem arithmetic reduction operations on floating-point datatypes, including NaN values. A future version of the \openshmem specification may clarify the behavior From d7a4aa58556b8b91ed62034f14b87d447364b07e Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 9 Oct 2024 10:59:00 -0400 Subject: [PATCH 13/18] reductions: revert back to associative binary ops --- content/programming_model_overview.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index 8305607bf..6d53ece1e 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -157,7 +157,7 @@ of concatenated symmetric objects contributed by each of the \acp{PE} in another symmetric data object. \item \OPR{Reduction}: All \acp{PE} participating in the routine get the result - of a binary operation over elements of the specified symmetric + of an associative binary routine over elements of the specified symmetric data object on another symmetric data object. \item \OPR{All-to-All}: All \acp{PE} participating in the routine exchange a fixed amount of contiguous or strided data with all other participating From 9852e626676a1cc6239780ba977c0279251eeb94 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Wed, 9 Oct 2024 14:14:25 -0400 Subject: [PATCH 14/18] Add Matthew Davis to contributors --- content/coverpage.tex | 1 + 1 file changed, 1 insertion(+) diff --git a/content/coverpage.tex b/content/coverpage.tex index e26a54cac..09d319e17 100644 --- a/content/coverpage.tex +++ b/content/coverpage.tex @@ -60,6 +60,7 @@ \subsection*{\openshmem 1.6} \item Aurelien Bouteiller, \ac{UTK} \item Mark Brown, Intel \item Bob Cernohous, \ac{HPE} +\item Matthew Davis, Georgia Tech \item James Dinan\footnotemark[1], NVIDIA \item Megan Grodowitz, Arm Inc. \item Max Grossman, Georgia Tech From 9be6e038f12a9a65c9349e47b39ef2c9f7f2dc28 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Wed, 9 Oct 2024 15:35:01 -0400 Subject: [PATCH 15/18] Add shmem_team_ptr to SHMEM_TEAM_SHARED description --- content/library_handles.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/library_handles.tex b/content/library_handles.tex index 6040a61e2..8c6faa3e7 100644 --- a/content/library_handles.tex +++ b/content/library_handles.tex @@ -25,8 +25,8 @@ Handle of type \CTYPE{shmem\_team\_t} that corresponds to a team of \acp{PE} that share a memory domain. \LibHandleRef{SHMEM\_TEAM\_SHARED} refers to the team of all PEs that would mutually return a non-null address from a -call to \FUNC{shmem\_ptr} for all symmetric heap objects. That is, -\FUNC{shmem\_ptr} must return a non-null pointer to the local PE for all +call to \FUNC{shmem\_ptr} or \FUNC{shmem\_team\_ptr} for all symmetric heap objects. That is, +\FUNC{shmem\_ptr} and \FUNC{shmem\_team\_ptr} must return a non-null pointer to the local PE for all symmetric heap objects on all target \acp{PE} in the team. This means that symmetric heap objects on each \ac{PE} are directly load/store accessible by all \acp{PE} in the team. From 4873b2535a444c9e30a96cd0cd968733b1ad7745 Mon Sep 17 00:00:00 2001 From: Elliot Ronaghan Date: Wed, 9 Oct 2024 16:12:15 -0400 Subject: [PATCH 16/18] Add definition of remote PE The term "remote PE" is used widely in the spec even in cases where the PE can be node-local or even the same PE. In many cases "target PE" might be a better term, but given the widespread use and time constraints for the 1.6 spec we've opted to just define the term to include be all remotely accessible PEs. --- content/programming_model_overview.tex | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index 6d53ece1e..e30ef7920 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -2,7 +2,10 @@ mechanisms to share information among \openshmem processes, or \acp{PE}, and private data objects that are accessible by only the \ac{PE} itself. The \ac{API} allows communication and synchronization operations on both private (local to -the \ac{PE} initiating the operation) and remotely accessible data objects. The key +the \ac{PE} initiating the operation) and remotely accessible data objects. A +remote \ac{PE} is defined as any \ac{PE} whose remotely accessible data objects +are accessible to the \ac{PE} initiating an operation and may include the +initiating \ac{PE}. The key feature of \openshmem is that data transfer operations are \emph{one-sided} in nature. This means that a local \ac{PE} executing a data transfer routine does not require the participation of the remote \ac{PE} From 080fb03204265fff4b55753d3b99ca45f876aead Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 10 Oct 2024 16:32:57 -0400 Subject: [PATCH 17/18] backmatter: add const to cmp_values as v1.5 erratum --- content/backmatter.tex | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index bc7013ff3..7ce2833aa 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -749,6 +749,21 @@ \section{Version 1.6} additional arguments. \label{changelog:pcontrol} \ChangelogRef{subsec:shmem_pcontrol} % +\item Added a \texttt{const} qualifier to the \VAR{cmp\_values} argument in the + following point-to-point synchronization routines: + \FUNC{shmem\_wait\_until\_all\_vector}, + \FUNC{shmem\_wait\_until\_any\_vector}, + \FUNC{shmem\_wait\_until\_some\_vector}, + \FUNC{shmem\_test\_all\_vector}, \FUNC{shmem\_test\_any\_vector}, and + \FUNC{shmem\_test\_some\_vector}. \label{changelog:p2p_vector_const} +\ChangelogRef{ + subsec:shmem_wait_until_all_vector, + subsec:shmem_wait_until_any_vector, + subsec:shmem_wait_until_some_vector, + subsec:shmem_test_all_vector, + subsec:shmem_test_any_vector, + subsec:shmem_test_some_vector}% +% \end{enumerate} @@ -1335,6 +1350,14 @@ \section{Version 1.5} \FUNC{shmem\_pcontrol} to indicate that the value should be greater than 2 to enable profiling with profile library defined effects and additional arguments (\ref{changelog:v1.6}.\ref{changelog:pcontrol}). + \item Added a \texttt{const} qualifier to the \VAR{cmp\_values} argument in the + following point-to-point synchronization routines: + \FUNC{shmem\_wait\_until\_all\_vector}, + \FUNC{shmem\_wait\_until\_any\_vector}, + \FUNC{shmem\_wait\_until\_some\_vector}, + \FUNC{shmem\_test\_all\_vector}, \FUNC{shmem\_test\_any\_vector}, and + \FUNC{shmem\_test\_some\_vector} + (\ref{changelog:v1.6}.\ref{changelog:p2p_vector_const}). \end{enumerate} %end of setlength command that was started in frontmatter.tex From 9a4656a5be45bc9a1693b39571d81a1ea6ba406c Mon Sep 17 00:00:00 2001 From: James Dinan Date: Fri, 1 Nov 2024 14:09:46 -0400 Subject: [PATCH 18/18] Add missing contributor --- content/coverpage.tex | 1 + 1 file changed, 1 insertion(+) diff --git a/content/coverpage.tex b/content/coverpage.tex index b48f6b1f1..885913c41 100644 --- a/content/coverpage.tex +++ b/content/coverpage.tex @@ -62,6 +62,7 @@ \subsection*{\openshmem 1.6} \item Bob Cernohous, \ac{HPE} \item Matthew Davis, Georgia Tech \item James Dinan\footnotemark[1], NVIDIA +\item Youssef Elmougy, Georgia Tech \item Megan Grodowitz, Arm Inc. \item Max Grossman, Georgia Tech \item Yanfei Guo, \ac{ANL}